diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /Documentation/driver-api | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'Documentation/driver-api')
306 files changed, 59432 insertions, 0 deletions
diff --git a/Documentation/driver-api/80211/cfg80211.rst b/Documentation/driver-api/80211/cfg80211.rst new file mode 100644 index 0000000000..836f609c3f --- /dev/null +++ b/Documentation/driver-api/80211/cfg80211.rst @@ -0,0 +1,178 @@ +================== +cfg80211 subsystem +================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Introduction + +Device registration +=================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Device registration + +.. kernel-doc:: include/net/cfg80211.h + :functions: + ieee80211_channel_flags + ieee80211_channel + ieee80211_rate_flags + ieee80211_rate + ieee80211_sta_ht_cap + ieee80211_supported_band + cfg80211_signal_type + wiphy_params_flags + wiphy_flags + wiphy + wireless_dev + wiphy_new + wiphy_read_of_freq_limits + wiphy_register + wiphy_unregister + wiphy_free + wiphy_name + wiphy_dev + wiphy_priv + priv_to_wiphy + set_wiphy_dev + wdev_priv + ieee80211_iface_limit + ieee80211_iface_combination + cfg80211_check_combinations + +Actions and configuration +========================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Actions and configuration + +.. kernel-doc:: include/net/cfg80211.h + :functions: + cfg80211_ops + vif_params + key_params + survey_info_flags + survey_info + cfg80211_beacon_data + cfg80211_ap_settings + station_parameters + rate_info_flags + rate_info + station_info + monitor_flags + mpath_info_flags + mpath_info + bss_parameters + ieee80211_txq_params + cfg80211_crypto_settings + cfg80211_auth_request + cfg80211_assoc_request + cfg80211_deauth_request + cfg80211_disassoc_request + cfg80211_ibss_params + cfg80211_connect_params + cfg80211_pmksa + cfg80211_rx_mlme_mgmt + cfg80211_auth_timeout + cfg80211_rx_assoc_resp + cfg80211_assoc_timeout + cfg80211_tx_mlme_mgmt + cfg80211_ibss_joined + cfg80211_connect_resp_params + cfg80211_connect_done + cfg80211_connect_result + cfg80211_connect_bss + cfg80211_connect_timeout + cfg80211_roamed + cfg80211_disconnected + cfg80211_ready_on_channel + cfg80211_remain_on_channel_expired + cfg80211_new_sta + cfg80211_rx_mgmt + cfg80211_mgmt_tx_status + cfg80211_cqm_rssi_notify + cfg80211_cqm_pktloss_notify + cfg80211_michael_mic_failure + +Scanning and BSS list handling +============================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Scanning and BSS list handling + +.. kernel-doc:: include/net/cfg80211.h + :functions: + cfg80211_ssid + cfg80211_scan_request + cfg80211_scan_done + cfg80211_bss + cfg80211_inform_bss + cfg80211_inform_bss_frame_data + cfg80211_inform_bss_data + cfg80211_unlink_bss + cfg80211_find_ie + ieee80211_bss_get_ie + +Utility functions +================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Utility functions + +.. kernel-doc:: include/net/cfg80211.h + :functions: + ieee80211_channel_to_frequency + ieee80211_frequency_to_channel + ieee80211_get_channel + ieee80211_get_response_rate + ieee80211_hdrlen + ieee80211_get_hdrlen_from_skb + ieee80211_radiotap_iterator + +Data path helpers +================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Data path helpers + +.. kernel-doc:: include/net/cfg80211.h + :functions: + ieee80211_data_to_8023 + ieee80211_amsdu_to_8023s + cfg80211_classify8021d + +Regulatory enforcement infrastructure +===================================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Regulatory enforcement infrastructure + +.. kernel-doc:: include/net/cfg80211.h + :functions: + regulatory_hint + wiphy_apply_custom_regulatory + freq_reg_info + +RFkill integration +================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: RFkill integration + +.. kernel-doc:: include/net/cfg80211.h + :functions: + wiphy_rfkill_set_hw_state + wiphy_rfkill_start_polling + wiphy_rfkill_stop_polling + +Test mode +========= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Test mode + +.. kernel-doc:: include/net/cfg80211.h + :functions: + cfg80211_testmode_alloc_reply_skb + cfg80211_testmode_reply + cfg80211_testmode_alloc_event_skb + cfg80211_testmode_event diff --git a/Documentation/driver-api/80211/index.rst b/Documentation/driver-api/80211/index.rst new file mode 100644 index 0000000000..af210859d3 --- /dev/null +++ b/Documentation/driver-api/80211/index.rst @@ -0,0 +1,17 @@ +===================================== +Linux 802.11 Driver Developer's Guide +===================================== + +.. toctree:: + + introduction + cfg80211 + mac80211 + mac80211-advanced + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/80211/introduction.rst b/Documentation/driver-api/80211/introduction.rst new file mode 100644 index 0000000000..4938fa8769 --- /dev/null +++ b/Documentation/driver-api/80211/introduction.rst @@ -0,0 +1,17 @@ +============ +Introduction +============ + +Explaining wireless 802.11 networking in the Linux kernel + +Copyright 2007-2009 Johannes Berg + +These books attempt to give a description of the various subsystems +that play a role in 802.11 wireless networking in Linux. Since these +books are for kernel developers they attempts to document the +structures and functions used in the kernel as well as giving a +higher-level overview. + +The reader is expected to be familiar with the 802.11 standard as +published by the IEEE in 802.11-2007 (or possibly later versions). +References to this standard will be given as "802.11-2007 8.1.5". diff --git a/Documentation/driver-api/80211/mac80211-advanced.rst b/Documentation/driver-api/80211/mac80211-advanced.rst new file mode 100644 index 0000000000..f8df7b3af8 --- /dev/null +++ b/Documentation/driver-api/80211/mac80211-advanced.rst @@ -0,0 +1,239 @@ +============================= +mac80211 subsystem (advanced) +============================= + +Information contained within this part of the book is of interest only +for advanced interaction of mac80211 with drivers to exploit more +hardware capabilities and improve performance. + +LED support +=========== + +Mac80211 supports various ways of blinking LEDs. Wherever possible, +device LEDs should be exposed as LED class devices and hooked up to the +appropriate trigger, which will then be triggered appropriately by +mac80211. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_get_tx_led_name + ieee80211_get_rx_led_name + ieee80211_get_assoc_led_name + ieee80211_get_radio_led_name + ieee80211_tpt_blink + ieee80211_tpt_led_trigger_flags + ieee80211_create_tpt_led_trigger + +Hardware crypto acceleration +============================ + +.. kernel-doc:: include/net/mac80211.h + :doc: Hardware crypto acceleration + +.. kernel-doc:: include/net/mac80211.h + :functions: + set_key_cmd + ieee80211_key_conf + ieee80211_key_flags + ieee80211_get_tkip_p1k + ieee80211_get_tkip_p1k_iv + ieee80211_get_tkip_p2k + +Powersave support +================= + +.. kernel-doc:: include/net/mac80211.h + :doc: Powersave support + +Beacon filter support +===================== + +.. kernel-doc:: include/net/mac80211.h + :doc: Beacon filter support + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_beacon_loss + +Multiple queues and QoS support +=============================== + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_queue_params + +Access point mode support +========================= + +TBD + +Some parts of the if_conf should be discussed here instead + +Insert notes about VLAN interfaces with hw crypto here or in the hw +crypto chapter. + +support for powersaving clients +------------------------------- + +.. kernel-doc:: include/net/mac80211.h + :doc: AP support for powersaving clients + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_get_buffered_bc + ieee80211_beacon_get + ieee80211_sta_eosp + ieee80211_frame_release_type + ieee80211_sta_ps_transition + ieee80211_sta_ps_transition_ni + ieee80211_sta_set_buffered + ieee80211_sta_block_awake + +Supporting multiple virtual interfaces +====================================== + +TBD + +Note: WDS with identical MAC address should almost always be OK + +Insert notes about having multiple virtual interfaces with different MAC +addresses here, note which configurations are supported by mac80211, add +notes about supporting hw crypto with it. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_iterate_active_interfaces + ieee80211_iterate_active_interfaces_atomic + +Station handling +================ + +TODO + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_sta + sta_notify_cmd + ieee80211_find_sta + ieee80211_find_sta_by_ifaddr + +Hardware scan offload +===================== + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_scan_completed + +Aggregation +=========== + +TX A-MPDU aggregation +--------------------- + +.. kernel-doc:: net/mac80211/agg-tx.c + :doc: TX A-MPDU aggregation + +.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-tx.c + +RX A-MPDU aggregation +--------------------- + +.. kernel-doc:: net/mac80211/agg-rx.c + :doc: RX A-MPDU aggregation + +.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-rx.c + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_ampdu_mlme_action + +Spatial Multiplexing Powersave (SMPS) +===================================== + +.. kernel-doc:: include/net/mac80211.h + :doc: Spatial multiplexing power save + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_request_smps + ieee80211_smps_mode + +TBD + +This part of the book describes the rate control algorithm interface and +how it relates to mac80211 and drivers. + +Rate Control API +================ + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_start_tx_ba_session + ieee80211_start_tx_ba_cb_irqsafe + ieee80211_stop_tx_ba_session + ieee80211_stop_tx_ba_cb_irqsafe + ieee80211_rate_control_changed + ieee80211_tx_rate_control + +TBD + +This part of the book describes mac80211 internals. + +Key handling +============ + +Key handling basics +------------------- + +.. kernel-doc:: net/mac80211/key.c + :doc: Key handling basics + +MORE TBD +-------- + +TBD + +Receive processing +================== + +TBD + +Transmit processing +=================== + +TBD + +Station info handling +===================== + +Programming information +----------------------- + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: + sta_info + ieee80211_sta_info_flags + +STA information lifetime rules +------------------------------ + +.. kernel-doc:: net/mac80211/sta_info.c + :doc: STA information lifetime rules + +Aggregation Functions +===================== + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: + sta_ampdu_mlme + tid_ampdu_tx + tid_ampdu_rx + +Synchronisation Functions +========================= + +TBD + +Locking, lots of RCU diff --git a/Documentation/driver-api/80211/mac80211.rst b/Documentation/driver-api/80211/mac80211.rst new file mode 100644 index 0000000000..67d2e58b45 --- /dev/null +++ b/Documentation/driver-api/80211/mac80211.rst @@ -0,0 +1,155 @@ +=========================== +mac80211 subsystem (basics) +=========================== + +You should read and understand the information contained within this +part of the book while implementing a mac80211 driver. In some chapters, +advanced usage is noted, those may be skipped if this isn't needed. + +This part of the book only covers station and monitor mode +functionality, additional information required to implement the other +modes is covered in the second part of the book. + +Basic hardware handling +======================= + +TBD + +This chapter shall contain information on getting a hw struct allocated +and registered with mac80211. + +Since it is required to allocate rates/modes before registering a hw +struct, this chapter shall also contain information on setting up the +rate/mode structs. + +Additionally, some discussion about the callbacks and the general +programming model should be in here, including the definition of +ieee80211_ops which will be referred to a lot. + +Finally, a discussion of hardware capabilities should be done with +references to other parts of the book. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_hw + ieee80211_hw_flags + SET_IEEE80211_DEV + SET_IEEE80211_PERM_ADDR + ieee80211_ops + ieee80211_alloc_hw + ieee80211_register_hw + ieee80211_unregister_hw + ieee80211_free_hw + +PHY configuration +================= + +TBD + +This chapter should describe PHY handling including start/stop callbacks +and the various structures used. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_conf + ieee80211_conf_flags + +Virtual interfaces +================== + +TBD + +This chapter should describe virtual interface basics that are relevant +to the driver (VLANs, MGMT etc are not.) It should explain the use of +the add_iface/remove_iface callbacks as well as the interface +configuration callbacks. + +Things related to AP mode should be discussed there. + +Things related to supporting multiple interfaces should be in the +appropriate chapter, a BIG FAT note should be here about this though and +the recommendation to allow only a single interface in STA mode at +first! + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_vif + +Receive and transmit processing +=============================== + +what should be here +------------------- + +TBD + +This should describe the receive and transmit paths in mac80211/the +drivers as well as transmit status handling. + +Frame format +------------ + +.. kernel-doc:: include/net/mac80211.h + :doc: Frame format + +Packet alignment +---------------- + +.. kernel-doc:: net/mac80211/rx.c + :doc: Packet alignment + +Calling into mac80211 from interrupts +------------------------------------- + +.. kernel-doc:: include/net/mac80211.h + :doc: Calling mac80211 from interrupts + +functions/definitions +--------------------- + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_rx_status + mac80211_rx_encoding_flags + mac80211_rx_flags + mac80211_tx_info_flags + mac80211_tx_control_flags + mac80211_rate_control_flags + ieee80211_tx_rate + ieee80211_tx_info + ieee80211_tx_info_clear_status + ieee80211_rx + ieee80211_rx_ni + ieee80211_rx_irqsafe + ieee80211_tx_status + ieee80211_tx_status_ni + ieee80211_tx_status_irqsafe + ieee80211_rts_get + ieee80211_rts_duration + ieee80211_ctstoself_get + ieee80211_ctstoself_duration + ieee80211_generic_frame_duration + ieee80211_wake_queue + ieee80211_stop_queue + ieee80211_wake_queues + ieee80211_stop_queues + ieee80211_queue_stopped + +Frame filtering +=============== + +.. kernel-doc:: include/net/mac80211.h + :doc: Frame filtering + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_filter_flags + +The mac80211 workqueue +====================== + +.. kernel-doc:: include/net/mac80211.h + :doc: mac80211 workqueue + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_queue_work + ieee80211_queue_delayed_work diff --git a/Documentation/driver-api/acpi/index.rst b/Documentation/driver-api/acpi/index.rst new file mode 100644 index 0000000000..ace0008e54 --- /dev/null +++ b/Documentation/driver-api/acpi/index.rst @@ -0,0 +1,9 @@ +============ +ACPI Support +============ + +.. toctree:: + :maxdepth: 2 + + linuxized-acpica + scan_handlers diff --git a/Documentation/driver-api/acpi/linuxized-acpica.rst b/Documentation/driver-api/acpi/linuxized-acpica.rst new file mode 100644 index 0000000000..cc234353d2 --- /dev/null +++ b/Documentation/driver-api/acpi/linuxized-acpica.rst @@ -0,0 +1,279 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +============================================================ +Linuxized ACPICA - Introduction to ACPICA Release Automation +============================================================ + +:Copyright: |copy| 2013-2016, Intel Corporation + +:Author: Lv Zheng <lv.zheng@intel.com> + + +Abstract +======== +This document describes the ACPICA project and the relationship between +ACPICA and Linux. It also describes how ACPICA code in drivers/acpi/acpica, +include/acpi and tools/power/acpi is automatically updated to follow the +upstream. + +ACPICA Project +============== + +The ACPI Component Architecture (ACPICA) project provides an operating +system (OS)-independent reference implementation of the Advanced +Configuration and Power Interface Specification (ACPI). It has been +adapted by various host OSes. By directly integrating ACPICA, Linux can +also benefit from the application experiences of ACPICA from other host +OSes. + +The homepage of ACPICA project is: www.acpica.org, it is maintained and +supported by Intel Corporation. + +The following figure depicts the Linux ACPI subsystem where the ACPICA +adaptation is included:: + + +---------------------------------------------------------+ + | | + | +---------------------------------------------------+ | + | | +------------------+ | | + | | | Table Management | | | + | | +------------------+ | | + | | +----------------------+ | | + | | | Namespace Management | | | + | | +----------------------+ | | + | | +------------------+ ACPICA Components | | + | | | Event Management | | | + | | +------------------+ | | + | | +---------------------+ | | + | | | Resource Management | | | + | | +---------------------+ | | + | | +---------------------+ | | + | | | Hardware Management | | | + | | +---------------------+ | | + | +---------------------------------------------------+ | | + | | | +------------------+ | | | + | | | | OS Service Layer | | | | + | | | +------------------+ | | | + | | +-------------------------------------------------|-+ | + | | +--------------------+ | | + | | | Device Enumeration | | | + | | +--------------------+ | | + | | +------------------+ | | + | | | Power Management | | | + | | +------------------+ Linux/ACPI Components | | + | | +--------------------+ | | + | | | Thermal Management | | | + | | +--------------------+ | | + | | +--------------------------+ | | + | | | Drivers for ACPI Devices | | | + | | +--------------------------+ | | + | | +--------+ | | + | | | ...... | | | + | | +--------+ | | + | +---------------------------------------------------+ | + | | + +---------------------------------------------------------+ + + Figure 1. Linux ACPI Software Components + +.. note:: + A. OS Service Layer - Provided by Linux to offer OS dependent + implementation of the predefined ACPICA interfaces (acpi_os_*). + :: + + include/acpi/acpiosxf.h + drivers/acpi/osl.c + include/acpi/platform + include/asm/acenv.h + B. ACPICA Functionality - Released from ACPICA code base to offer + OS independent implementation of the ACPICA interfaces (acpi_*). + :: + + drivers/acpi/acpica + include/acpi/ac*.h + tools/power/acpi + C. Linux/ACPI Functionality - Providing Linux specific ACPI + functionality to the other Linux kernel subsystems and user space + programs. + :: + + drivers/acpi + include/linux/acpi.h + include/linux/acpi*.h + include/acpi + tools/power/acpi + D. Architecture Specific ACPICA/ACPI Functionalities - Provided by the + ACPI subsystem to offer architecture specific implementation of the + ACPI interfaces. They are Linux specific components and are out of + the scope of this document. + :: + + include/asm/acpi.h + include/asm/acpi*.h + arch/*/acpi + +ACPICA Release +============== + +The ACPICA project maintains its code base at the following repository URL: +https://github.com/acpica/acpica.git. As a rule, a release is made every +month. + +As the coding style adopted by the ACPICA project is not acceptable by +Linux, there is a release process to convert the ACPICA git commits into +Linux patches. The patches generated by this process are referred to as +"linuxized ACPICA patches". The release process is carried out on a local +copy the ACPICA git repository. Each commit in the monthly release is +converted into a linuxized ACPICA patch. Together, they form the monthly +ACPICA release patchset for the Linux ACPI community. This process is +illustrated in the following figure:: + + +-----------------------------+ + | acpica / master (-) commits | + +-----------------------------+ + /|\ | + | \|/ + | /---------------------\ +----------------------+ + | < Linuxize repo Utility >-->| old linuxized acpica |--+ + | \---------------------/ +----------------------+ | + | | + /---------\ | + < git reset > \ + \---------/ \ + /|\ /+-+ + | / | + +-----------------------------+ | | + | acpica / master (+) commits | | | + +-----------------------------+ | | + | | | + \|/ | | + /-----------------------\ +----------------------+ | | + < Linuxize repo Utilities >-->| new linuxized acpica |--+ | + \-----------------------/ +----------------------+ | + \|/ + +--------------------------+ /----------------------\ + | Linuxized ACPICA Patches |<----------------< Linuxize patch Utility > + +--------------------------+ \----------------------/ + | + \|/ + /---------------------------\ + < Linux ACPI Community Review > + \---------------------------/ + | + \|/ + +-----------------------+ /------------------\ +----------------+ + | linux-pm / linux-next |-->< Linux Merge Window >-->| linux / master | + +-----------------------+ \------------------/ +----------------+ + + Figure 2. ACPICA -> Linux Upstream Process + +.. note:: + A. Linuxize Utilities - Provided by the ACPICA repository, including a + utility located in source/tools/acpisrc folder and a number of + scripts located in generate/linux folder. + B. acpica / master - "master" branch of the git repository at + <https://github.com/acpica/acpica.git>. + C. linux-pm / linux-next - "linux-next" branch of the git repository at + <https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git>. + D. linux / master - "master" branch of the git repository at + <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git>. + + Before the linuxized ACPICA patches are sent to the Linux ACPI community + for review, there is a quality assurance build test process to reduce + porting issues. Currently this build process only takes care of the + following kernel configuration options: + CONFIG_ACPI/CONFIG_ACPI_DEBUG/CONFIG_ACPI_DEBUGGER + +ACPICA Divergences +================== + +Ideally, all of the ACPICA commits should be converted into Linux patches +automatically without manual modifications, the "linux / master" tree should +contain the ACPICA code that exactly corresponds to the ACPICA code +contained in "new linuxized acpica" tree and it should be possible to run +the release process fully automatically. + +As a matter of fact, however, there are source code differences between +the ACPICA code in Linux and the upstream ACPICA code, referred to as +"ACPICA Divergences". + +The various sources of ACPICA divergences include: + 1. Legacy divergences - Before the current ACPICA release process was + established, there already had been divergences between Linux and + ACPICA. Over the past several years those divergences have been greatly + reduced, but there still are several ones and it takes time to figure + out the underlying reasons for their existence. + 2. Manual modifications - Any manual modification (eg. coding style fixes) + made directly in the Linux sources obviously hurts the ACPICA release + automation. Thus it is recommended to fix such issues in the ACPICA + upstream source code and generate the linuxized fix using the ACPICA + release utilities (please refer to Section 4 below for the details). + 3. Linux specific features - Sometimes it's impossible to use the + current ACPICA APIs to implement features required by the Linux kernel, + so Linux developers occasionally have to change ACPICA code directly. + Those changes may not be acceptable by ACPICA upstream and in such cases + they are left as committed ACPICA divergences unless the ACPICA side can + implement new mechanisms as replacements for them. + 4. ACPICA release fixups - ACPICA only tests commits using a set of the + user space simulation utilities, thus the linuxized ACPICA patches may + break the Linux kernel, leaving us build/boot failures. In order to + avoid breaking Linux bisection, fixes are applied directly to the + linuxized ACPICA patches during the release process. When the release + fixups are backported to the upstream ACPICA sources, they must follow + the upstream ACPICA rules and so further modifications may appear. + That may result in the appearance of new divergences. + 5. Fast tracking of ACPICA commits - Some ACPICA commits are regression + fixes or stable-candidate material, so they are applied in advance with + respect to the ACPICA release process. If such commits are reverted or + rebased on the ACPICA side in order to offer better solutions, new ACPICA + divergences are generated. + +ACPICA Development +================== + +This paragraph guides Linux developers to use the ACPICA upstream release +utilities to obtain Linux patches corresponding to upstream ACPICA commits +before they become available from the ACPICA release process. + + 1. Cherry-pick an ACPICA commit + + First you need to git clone the ACPICA repository and the ACPICA change + you want to cherry pick must be committed into the local repository. + + Then the gen-patch.sh command can help to cherry-pick an ACPICA commit + from the ACPICA local repository:: + + $ git clone https://github.com/acpica/acpica + $ cd acpica + $ generate/linux/gen-patch.sh -u [commit ID] + + Here the commit ID is the ACPICA local repository commit ID you want to + cherry pick. It can be omitted if the commit is "HEAD". + + 2. Cherry-pick recent ACPICA commits + + Sometimes you need to rebase your code on top of the most recent ACPICA + changes that haven't been applied to Linux yet. + + You can generate the ACPICA release series yourself and rebase your code on + top of the generated ACPICA release patches:: + + $ git clone https://github.com/acpica/acpica + $ cd acpica + $ generate/linux/make-patches.sh -u [commit ID] + + The commit ID should be the last ACPICA commit accepted by Linux. Usually, + it is the commit modifying ACPI_CA_VERSION. It can be found by executing + "git blame source/include/acpixf.h" and referencing the line that contains + "ACPI_CA_VERSION". + + 3. Inspect the current divergences + + If you have local copies of both Linux and upstream ACPICA, you can generate + a diff file indicating the state of the current divergences:: + + # git clone https://github.com/acpica/acpica + # git clone https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + # cd acpica + # generate/linux/divergence.sh -s ../linux diff --git a/Documentation/driver-api/acpi/scan_handlers.rst b/Documentation/driver-api/acpi/scan_handlers.rst new file mode 100644 index 0000000000..7a197b3a33 --- /dev/null +++ b/Documentation/driver-api/acpi/scan_handlers.rst @@ -0,0 +1,83 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +================== +ACPI Scan Handlers +================== + +:Copyright: |copy| 2012, Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + +During system initialization and ACPI-based device hot-add, the ACPI namespace +is scanned in search of device objects that generally represent various pieces +of hardware. This causes a struct acpi_device object to be created and +registered with the driver core for every device object in the ACPI namespace +and the hierarchy of those struct acpi_device objects reflects the namespace +layout (i.e. parent device objects in the namespace are represented by parent +struct acpi_device objects and analogously for their children). Those struct +acpi_device objects are referred to as "device nodes" in what follows, but they +should not be confused with struct device_node objects used by the Device Trees +parsing code (although their role is analogous to the role of those objects). + +During ACPI-based device hot-remove device nodes representing pieces of hardware +being removed are unregistered and deleted. + +The core ACPI namespace scanning code in drivers/acpi/scan.c carries out basic +initialization of device nodes, such as retrieving common configuration +information from the device objects represented by them and populating them with +appropriate data, but some of them require additional handling after they have +been registered. For example, if the given device node represents a PCI host +bridge, its registration should cause the PCI bus under that bridge to be +enumerated and PCI devices on that bus to be registered with the driver core. +Similarly, if the device node represents a PCI interrupt link, it is necessary +to configure that link so that the kernel can use it. + +Those additional configuration tasks usually depend on the type of the hardware +component represented by the given device node which can be determined on the +basis of the device node's hardware ID (HID). They are performed by objects +called ACPI scan handlers represented by the following structure:: + + struct acpi_scan_handler { + const struct acpi_device_id *ids; + struct list_head list_node; + int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); + void (*detach)(struct acpi_device *dev); + }; + +where ids is the list of IDs of device nodes the given handler is supposed to +take care of, list_node is the hook to the global list of ACPI scan handlers +maintained by the ACPI core and the .attach() and .detach() callbacks are +executed, respectively, after registration of new device nodes and before +unregistration of device nodes the handler attached to previously. + +The namespace scanning function, acpi_bus_scan(), first registers all of the +device nodes in the given namespace scope with the driver core. Then, it tries +to match a scan handler against each of them using the ids arrays of the +available scan handlers. If a matching scan handler is found, its .attach() +callback is executed for the given device node. If that callback returns 1, +that means that the handler has claimed the device node and is now responsible +for carrying out any additional configuration tasks related to it. It also will +be responsible for preparing the device node for unregistration in that case. +The device node's handler field is then populated with the address of the scan +handler that has claimed it. + +If the .attach() callback returns 0, it means that the device node is not +interesting to the given scan handler and may be matched against the next scan +handler in the list. If it returns a (negative) error code, that means that +the namespace scan should be terminated due to a serious error. The error code +returned should then reflect the type of the error. + +The namespace trimming function, acpi_bus_trim(), first executes .detach() +callbacks from the scan handlers of all device nodes in the given namespace +scope (if they have scan handlers). Next, it unregisters all of the device +nodes in that scope. + +ACPI scan handlers can be added to the list maintained by the ACPI core with the +help of the acpi_scan_add_handler() function taking a pointer to the new scan +handler as an argument. The order in which scan handlers are added to the list +is the order in which they are matched against device nodes during namespace +scans. + +All scan handles must be added to the list before acpi_bus_scan() is run for the +first time and they cannot be removed from it. diff --git a/Documentation/driver-api/aperture.rst b/Documentation/driver-api/aperture.rst new file mode 100644 index 0000000000..d173f4e7a7 --- /dev/null +++ b/Documentation/driver-api/aperture.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Managing Ownership of the Framebuffer Aperture +============================================== + +.. kernel-doc:: drivers/video/aperture.c + :doc: overview + +.. kernel-doc:: include/linux/aperture.h + :internal: + +.. kernel-doc:: drivers/video/aperture.c + :export: diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst new file mode 100644 index 0000000000..cec84908fb --- /dev/null +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -0,0 +1,50 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +.. _auxiliary_bus: + +============= +Auxiliary Bus +============= + +.. kernel-doc:: drivers/base/auxiliary.c + :doc: PURPOSE + +When Should the Auxiliary Bus Be Used +===================================== + +.. kernel-doc:: drivers/base/auxiliary.c + :doc: USAGE + + +Auxiliary Device Creation +========================= + +.. kernel-doc:: include/linux/auxiliary_bus.h + :identifiers: auxiliary_device + +.. kernel-doc:: drivers/base/auxiliary.c + :identifiers: auxiliary_device_init __auxiliary_device_add + auxiliary_find_device + +Auxiliary Device Memory Model and Lifespan +------------------------------------------ + +.. kernel-doc:: include/linux/auxiliary_bus.h + :doc: DEVICE_LIFESPAN + + +Auxiliary Drivers +================= + +.. kernel-doc:: include/linux/auxiliary_bus.h + :identifiers: auxiliary_driver module_auxiliary_driver + +.. kernel-doc:: drivers/base/auxiliary.c + :identifiers: __auxiliary_driver_register auxiliary_driver_unregister + +Example Usage +============= + +.. kernel-doc:: drivers/base/auxiliary.c + :doc: EXAMPLE + diff --git a/Documentation/driver-api/backlight/lp855x-driver.rst b/Documentation/driver-api/backlight/lp855x-driver.rst new file mode 100644 index 0000000000..1e0b224fc3 --- /dev/null +++ b/Documentation/driver-api/backlight/lp855x-driver.rst @@ -0,0 +1,81 @@ +==================== +Kernel driver lp855x +==================== + +Backlight driver for LP855x ICs + +Supported chips: + + Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and + LP8557 + +Author: Milo(Woogyom) Kim <milo.kim@ti.com> + +Description +----------- + +* Brightness control + + Brightness can be controlled by the pwm input or the i2c command. + The lp855x driver supports both cases. + +* Device attributes + + 1) bl_ctl_mode + + Backlight control mode. + + Value: pwm based or register based + + 2) chip_id + + The lp855x chip id. + + Value: lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557 + +Platform data for lp855x +------------------------ + +For supporting platform specific data, the lp855x platform data can be used. + +* name: + Backlight driver name. If it is not defined, default name is set. +* device_control: + Value of DEVICE CONTROL register. +* initial_brightness: + Initial value of backlight brightness. +* period_ns: + Platform specific PWM period value. unit is nano. + Only valid when brightness is pwm input mode. +* size_program: + Total size of lp855x_rom_data. +* rom_data: + List of new eeprom/eprom registers. + +Examples +======== + +1) lp8552 platform data: i2c register mode with new eeprom data:: + + #define EEPROM_A5_ADDR 0xA5 + #define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */ + + static struct lp855x_rom_data lp8552_eeprom_arr[] = { + {EEPROM_A5_ADDR, EEPROM_A5_VAL}, + }; + + static struct lp855x_platform_data lp8552_pdata = { + .name = "lcd-bl", + .device_control = I2C_CONFIG(LP8552), + .initial_brightness = INITIAL_BRT, + .size_program = ARRAY_SIZE(lp8552_eeprom_arr), + .rom_data = lp8552_eeprom_arr, + }; + +2) lp8556 platform data: pwm input mode with default rom data:: + + static struct lp855x_platform_data lp8556_pdata = { + .device_control = PWM_CONFIG(LP8556), + .initial_brightness = INITIAL_BRT, + .period_ns = 1000000, + }; diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst new file mode 100644 index 0000000000..d78b7c328f --- /dev/null +++ b/Documentation/driver-api/basics.rst @@ -0,0 +1,130 @@ +Driver Basics +============= + +Driver Entry and Exit points +---------------------------- + +.. kernel-doc:: include/linux/module.h + :internal: + +Driver device table +------------------- + +.. kernel-doc:: include/linux/mod_devicetable.h + :internal: + :no-identifiers: pci_device_id + + +Delaying and scheduling routines +-------------------------------- + +.. kernel-doc:: include/linux/sched.h + :internal: + +.. kernel-doc:: kernel/sched/core.c + :export: + +.. kernel-doc:: kernel/sched/cpupri.c + :internal: + +.. kernel-doc:: kernel/sched/fair.c + :internal: + +.. kernel-doc:: include/linux/completion.h + :internal: + +Time and timer routines +----------------------- + +.. kernel-doc:: include/linux/jiffies.h + :internal: + +.. kernel-doc:: kernel/time/time.c + :export: + +.. kernel-doc:: kernel/time/timer.c + :export: + +High-resolution timers +---------------------- + +.. kernel-doc:: include/linux/ktime.h + :internal: + +.. kernel-doc:: include/linux/hrtimer.h + :internal: + +.. kernel-doc:: kernel/time/hrtimer.c + :export: + +Wait queues and Wake events +--------------------------- + +.. kernel-doc:: include/linux/wait.h + :internal: + +.. kernel-doc:: kernel/sched/wait.c + :export: + +Internal Functions +------------------ + +.. kernel-doc:: kernel/exit.c + :internal: + +.. kernel-doc:: kernel/signal.c + :internal: + +.. kernel-doc:: include/linux/kthread.h + :internal: + +.. kernel-doc:: kernel/kthread.c + :export: + +Reference counting +------------------ + +.. kernel-doc:: include/linux/refcount.h + :internal: + +.. kernel-doc:: lib/refcount.c + :export: + +Atomics +------- + +.. kernel-doc:: include/linux/atomic/atomic-instrumented.h + :internal: + +.. kernel-doc:: include/linux/atomic/atomic-arch-fallback.h + :internal: + +.. kernel-doc:: include/linux/atomic/atomic-long.h + :internal: + +Kernel objects manipulation +--------------------------- + +.. kernel-doc:: lib/kobject.c + :export: + +Kernel utility functions +------------------------ + +.. kernel-doc:: include/linux/kernel.h + :internal: + :no-identifiers: kstrtol kstrtoul + +.. kernel-doc:: kernel/printk/printk.c + :export: + :no-identifiers: printk + +.. kernel-doc:: kernel/panic.c + :export: + +Device Resource Management +-------------------------- + +.. kernel-doc:: drivers/base/devres.c + :export: + diff --git a/Documentation/driver-api/clk.rst b/Documentation/driver-api/clk.rst new file mode 100644 index 0000000000..93bab5336d --- /dev/null +++ b/Documentation/driver-api/clk.rst @@ -0,0 +1,312 @@ +======================== +The Common Clk Framework +======================== + +:Author: Mike Turquette <mturquette@ti.com> + +This document endeavours to explain the common clk framework details, +and how to port a platform over to this framework. It is not yet a +detailed explanation of the clock api in include/linux/clk.h, but +perhaps someday it will include that information. + +Introduction and interface split +================================ + +The common clk framework is an interface to control the clock nodes +available on various devices today. This may come in the form of clock +gating, rate adjustment, muxing or other operations. This framework is +enabled with the CONFIG_COMMON_CLK option. + +The interface itself is divided into two halves, each shielded from the +details of its counterpart. First is the common definition of struct +clk which unifies the framework-level accounting and infrastructure that +has traditionally been duplicated across a variety of platforms. Second +is a common implementation of the clk.h api, defined in +drivers/clk/clk.c. Finally there is struct clk_ops, whose operations +are invoked by the clk api implementation. + +The second half of the interface is comprised of the hardware-specific +callbacks registered with struct clk_ops and the corresponding +hardware-specific structures needed to model a particular clock. For +the remainder of this document any reference to a callback in struct +clk_ops, such as .enable or .set_rate, implies the hardware-specific +implementation of that code. Likewise, references to struct clk_foo +serve as a convenient shorthand for the implementation of the +hardware-specific bits for the hypothetical "foo" hardware. + +Tying the two halves of this interface together is struct clk_hw, which +is defined in struct clk_foo and pointed to within struct clk_core. This +allows for easy navigation between the two discrete halves of the common +clock interface. + +Common data structures and api +============================== + +Below is the common struct clk_core definition from +drivers/clk/clk.c, modified for brevity:: + + struct clk_core { + const char *name; + const struct clk_ops *ops; + struct clk_hw *hw; + struct module *owner; + struct clk_core *parent; + const char **parent_names; + struct clk_core **parents; + u8 num_parents; + u8 new_parent_index; + ... + }; + +The members above make up the core of the clk tree topology. The clk +api itself defines several driver-facing functions which operate on +struct clk. That api is documented in include/linux/clk.h. + +Platforms and devices utilizing the common struct clk_core use the struct +clk_ops pointer in struct clk_core to perform the hardware-specific parts of +the operations defined in clk-provider.h:: + + struct clk_ops { + int (*prepare)(struct clk_hw *hw); + void (*unprepare)(struct clk_hw *hw); + int (*is_prepared)(struct clk_hw *hw); + void (*unprepare_unused)(struct clk_hw *hw); + int (*enable)(struct clk_hw *hw); + void (*disable)(struct clk_hw *hw); + int (*is_enabled)(struct clk_hw *hw); + void (*disable_unused)(struct clk_hw *hw); + unsigned long (*recalc_rate)(struct clk_hw *hw, + unsigned long parent_rate); + long (*round_rate)(struct clk_hw *hw, + unsigned long rate, + unsigned long *parent_rate); + int (*determine_rate)(struct clk_hw *hw, + struct clk_rate_request *req); + int (*set_parent)(struct clk_hw *hw, u8 index); + u8 (*get_parent)(struct clk_hw *hw); + int (*set_rate)(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate); + int (*set_rate_and_parent)(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate, + u8 index); + unsigned long (*recalc_accuracy)(struct clk_hw *hw, + unsigned long parent_accuracy); + int (*get_phase)(struct clk_hw *hw); + int (*set_phase)(struct clk_hw *hw, int degrees); + void (*init)(struct clk_hw *hw); + void (*debug_init)(struct clk_hw *hw, + struct dentry *dentry); + }; + +Hardware clk implementations +============================ + +The strength of the common struct clk_core comes from its .ops and .hw pointers +which abstract the details of struct clk from the hardware-specific bits, and +vice versa. To illustrate consider the simple gateable clk implementation in +drivers/clk/clk-gate.c:: + + struct clk_gate { + struct clk_hw hw; + void __iomem *reg; + u8 bit_idx; + ... + }; + +struct clk_gate contains struct clk_hw hw as well as hardware-specific +knowledge about which register and bit controls this clk's gating. +Nothing about clock topology or accounting, such as enable_count or +notifier_count, is needed here. That is all handled by the common +framework code and struct clk_core. + +Let's walk through enabling this clk from driver code:: + + struct clk *clk; + clk = clk_get(NULL, "my_gateable_clk"); + + clk_prepare(clk); + clk_enable(clk); + +The call graph for clk_enable is very simple:: + + clk_enable(clk); + clk->ops->enable(clk->hw); + [resolves to...] + clk_gate_enable(hw); + [resolves struct clk gate with to_clk_gate(hw)] + clk_gate_set_bit(gate); + +And the definition of clk_gate_set_bit:: + + static void clk_gate_set_bit(struct clk_gate *gate) + { + u32 reg; + + reg = __raw_readl(gate->reg); + reg |= BIT(gate->bit_idx); + writel(reg, gate->reg); + } + +Note that to_clk_gate is defined as:: + + #define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw) + +This pattern of abstraction is used for every clock hardware +representation. + +Supporting your own clk hardware +================================ + +When implementing support for a new type of clock it is only necessary to +include the following header:: + + #include <linux/clk-provider.h> + +To construct a clk hardware structure for your platform you must define +the following:: + + struct clk_foo { + struct clk_hw hw; + ... hardware specific data goes here ... + }; + +To take advantage of your data you'll need to support valid operations +for your clk:: + + struct clk_ops clk_foo_ops = { + .enable = &clk_foo_enable, + .disable = &clk_foo_disable, + }; + +Implement the above functions using container_of:: + + #define to_clk_foo(_hw) container_of(_hw, struct clk_foo, hw) + + int clk_foo_enable(struct clk_hw *hw) + { + struct clk_foo *foo; + + foo = to_clk_foo(hw); + + ... perform magic on foo ... + + return 0; + }; + +Below is a matrix detailing which clk_ops are mandatory based upon the +hardware capabilities of that clock. A cell marked as "y" means +mandatory, a cell marked as "n" implies that either including that +callback is invalid or otherwise unnecessary. Empty cells are either +optional or must be evaluated on a case-by-case basis. + +.. table:: clock hardware characteristics + + +----------------+------+-------------+---------------+-------------+------+ + | | gate | change rate | single parent | multiplexer | root | + +================+======+=============+===============+=============+======+ + |.prepare | | | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.unprepare | | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.enable | y | | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.disable | y | | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.is_enabled | y | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.recalc_rate | | y | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.round_rate | | y [1]_ | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.determine_rate | | y [1]_ | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.set_rate | | y | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.set_parent | | | n | y | n | + +----------------+------+-------------+---------------+-------------+------+ + |.get_parent | | | n | y | n | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.recalc_accuracy| | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.init | | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +.. [1] either one of round_rate or determine_rate is required. + +Finally, register your clock at run-time with a hardware-specific +registration function. This function simply populates struct clk_foo's +data and then passes the common struct clk parameters to the framework +with a call to:: + + clk_register(...) + +See the basic clock types in ``drivers/clk/clk-*.c`` for examples. + +Disabling clock gating of unused clocks +======================================= + +Sometimes during development it can be useful to be able to bypass the +default disabling of unused clocks. For example, if drivers aren't enabling +clocks properly but rely on them being on from the bootloader, bypassing +the disabling means that the driver will remain functional while the issues +are sorted out. + +You can see which clocks have been disabled by booting your kernel with these +parameters:: + + tp_printk trace_event=clk:clk_disable + +To bypass this disabling, include "clk_ignore_unused" in the bootargs to the +kernel. + +Locking +======= + +The common clock framework uses two global locks, the prepare lock and the +enable lock. + +The enable lock is a spinlock and is held across calls to the .enable, +.disable operations. Those operations are thus not allowed to sleep, +and calls to the clk_enable(), clk_disable() API functions are allowed in +atomic context. + +For clk_is_enabled() API, it is also designed to be allowed to be used in +atomic context. However, it doesn't really make any sense to hold the enable +lock in core, unless you want to do something else with the information of +the enable state with that lock held. Otherwise, seeing if a clk is enabled is +a one-shot read of the enabled state, which could just as easily change after +the function returns because the lock is released. Thus the user of this API +needs to handle synchronizing the read of the state with whatever they're +using it for to make sure that the enable state doesn't change during that +time. + +The prepare lock is a mutex and is held across calls to all other operations. +All those operations are allowed to sleep, and calls to the corresponding API +functions are not allowed in atomic context. + +This effectively divides operations in two groups from a locking perspective. + +Drivers don't need to manually protect resources shared between the operations +of one group, regardless of whether those resources are shared by multiple +clocks or not. However, access to resources that are shared between operations +of the two groups needs to be protected by the drivers. An example of such a +resource would be a register that controls both the clock rate and the clock +enable/disable state. + +The clock framework is reentrant, in that a driver is allowed to call clock +framework functions from within its implementation of clock operations. This +can for instance cause a .set_rate operation of one clock being called from +within the .set_rate operation of another clock. This case must be considered +in the driver implementations, but the code flow is usually controlled by the +driver in that case. + +Note that locking must also be considered when code outside of the common +clock framework needs to access resources used by the clock operations. This +is considered out of scope of this document. diff --git a/Documentation/driver-api/component.rst b/Documentation/driver-api/component.rst new file mode 100644 index 0000000000..57e3759073 --- /dev/null +++ b/Documentation/driver-api/component.rst @@ -0,0 +1,19 @@ +.. _component: + +====================================== +Component Helper for Aggregate Drivers +====================================== + +.. kernel-doc:: drivers/base/component.c + :doc: overview + + +API +=== + +.. kernel-doc:: include/linux/component.h + :internal: + +.. kernel-doc:: drivers/base/component.c + :export: + diff --git a/Documentation/driver-api/connector.rst b/Documentation/driver-api/connector.rst new file mode 100644 index 0000000000..631b84a48a --- /dev/null +++ b/Documentation/driver-api/connector.rst @@ -0,0 +1,157 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +Kernel Connector +================ + +Kernel connector - new netlink based userspace <-> kernel space easy +to use communication module. + +The Connector driver makes it easy to connect various agents using a +netlink based network. One must register a callback and an identifier. +When the driver receives a special netlink message with the appropriate +identifier, the appropriate callback will be called. + +From the userspace point of view it's quite straightforward: + + - socket(); + - bind(); + - send(); + - recv(); + +But if kernelspace wants to use the full power of such connections, the +driver writer must create special sockets, must know about struct sk_buff +handling, etc... The Connector driver allows any kernelspace agents to use +netlink based networking for inter-process communication in a significantly +easier way:: + + int cn_add_callback(const struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *)); + void cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask); + void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask); + + struct cb_id + { + __u32 idx; + __u32 val; + }; + +idx and val are unique identifiers which must be registered in the +connector.h header for in-kernel usage. `void (*callback) (void *)` is a +callback function which will be called when a message with above idx.val +is received by the connector core. The argument for that function must +be dereferenced to `struct cn_msg *`:: + + struct cn_msg + { + struct cb_id id; + + __u32 seq; + __u32 ack; + + __u16 len; /* Length of the following data */ + __u16 flags; + __u8 data[0]; + }; + +Connector interfaces +==================== + + .. kernel-doc:: include/linux/connector.h + + Note: + When registering new callback user, connector core assigns + netlink group to the user which is equal to its id.idx. + +Protocol description +==================== + +The current framework offers a transport layer with fixed headers. The +recommended protocol which uses such a header is as following: + +msg->seq and msg->ack are used to determine message genealogy. When +someone sends a message, they use a locally unique sequence and random +acknowledge number. The sequence number may be copied into +nlmsghdr->nlmsg_seq too. + +The sequence number is incremented with each message sent. + +If you expect a reply to the message, then the sequence number in the +received message MUST be the same as in the original message, and the +acknowledge number MUST be the same + 1. + +If we receive a message and its sequence number is not equal to one we +are expecting, then it is a new message. If we receive a message and +its sequence number is the same as one we are expecting, but its +acknowledge is not equal to the sequence number in the original +message + 1, then it is a new message. + +Obviously, the protocol header contains the above id. + +The connector allows event notification in the following form: kernel +driver or userspace process can ask connector to notify it when +selected ids will be turned on or off (registered or unregistered its +callback). It is done by sending a special command to the connector +driver (it also registers itself with id={-1, -1}). + +As example of this usage can be found in the cn_test.c module which +uses the connector to request notification and to send messages. + +Reliability +=========== + +Netlink itself is not a reliable protocol. That means that messages can +be lost due to memory pressure or process' receiving queue overflowed, +so caller is warned that it must be prepared. That is why the struct +cn_msg [main connector's message header] contains u32 seq and u32 ack +fields. + +Userspace usage +=============== + +2.6.14 has a new netlink socket implementation, which by default does not +allow people to send data to netlink groups other than 1. +So, if you wish to use a netlink socket (for example using connector) +with a different group number, the userspace application must subscribe to +that group first. It can be achieved by the following pseudocode:: + + s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 12345; + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) { + perror("bind"); + close(s); + return -1; + } + + { + int on = l_local.nl_groups; + setsockopt(s, 270, 1, &on, sizeof(on)); + } + +Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket +option. To drop a multicast subscription, one should call the above socket +option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0. + +2.6.14 netlink code only allows to select a group which is less or equal to +the maximum group number, which is used at netlink_kernel_create() time. +In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use +group number 12345, you must increment CN_NETLINK_USERS to that number. +Additional 0xf numbers are allocated to be used by non-in-kernel users. + +Due to this limitation, group 0xffffffff does not work now, so one can +not use add/remove connector's group notifications, but as far as I know, +only cn_test.c test module used it. + +Some work in netlink area is still being done, so things can be changed in +2.6.15 timeframe, if it will happen, documentation will be updated for that +kernel. + +Code samples +============ + +Sample code for a connector test module and user space can be found +in samples/connector/. To build this code, enable CONFIG_CONNECTOR +and CONFIG_SAMPLES. diff --git a/Documentation/driver-api/console.rst b/Documentation/driver-api/console.rst new file mode 100644 index 0000000000..8394ad7747 --- /dev/null +++ b/Documentation/driver-api/console.rst @@ -0,0 +1,152 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +Console Drivers +=============== + +The Linux kernel has 2 general types of console drivers. The first type is +assigned by the kernel to all the virtual consoles during the boot process. +This type will be called 'system driver', and only one system driver is allowed +to exist. The system driver is persistent and it can never be unloaded, though +it may become inactive. + +The second type has to be explicitly loaded and unloaded. This will be called +'modular driver' by this document. Multiple modular drivers can coexist at +any time with each driver sharing the console with other drivers including +the system driver. However, modular drivers cannot take over the console +that is currently occupied by another modular driver. (Exception: Drivers that +call do_take_over_console() will succeed in the takeover regardless of the type +of driver occupying the consoles.) They can only take over the console that is +occupied by the system driver. In the same token, if the modular driver is +released by the console, the system driver will take over. + +Modular drivers, from the programmer's point of view, have to call:: + + do_take_over_console() - load and bind driver to console layer + give_up_console() - unload driver; it will only work if driver + is fully unbound + +In newer kernels, the following are also available:: + + do_register_con_driver() + do_unregister_con_driver() + +If sysfs is enabled, the contents of /sys/class/vtconsole can be +examined. This shows the console backends currently registered by the +system which are named vtcon<n> where <n> is an integer from 0 to 15. +Thus:: + + ls /sys/class/vtconsole + . .. vtcon0 vtcon1 + +Each directory in /sys/class/vtconsole has 3 files:: + + ls /sys/class/vtconsole/vtcon0 + . .. bind name uevent + +What do these files signify? + + 1. bind - this is a read/write file. It shows the status of the driver if + read, or acts to bind or unbind the driver to the virtual consoles + when written to. The possible values are: + + 0 + - means the driver is not bound and if echo'ed, commands the driver + to unbind + + 1 + - means the driver is bound and if echo'ed, commands the driver to + bind + + 2. name - read-only file. Shows the name of the driver in this format:: + + cat /sys/class/vtconsole/vtcon0/name + (S) VGA+ + + '(S)' stands for a (S)ystem driver, i.e., it cannot be directly + commanded to bind or unbind + + 'VGA+' is the name of the driver + + cat /sys/class/vtconsole/vtcon1/name + (M) frame buffer device + + In this case, '(M)' stands for a (M)odular driver, one that can be + directly commanded to bind or unbind. + + 3. uevent - ignore this file + +When unbinding, the modular driver is detached first, and then the system +driver takes over the consoles vacated by the driver. Binding, on the other +hand, will bind the driver to the consoles that are currently occupied by a +system driver. + +NOTE1: + Binding and unbinding must be selected in Kconfig. It's under:: + + Device Drivers -> + Character devices -> + Support for binding and unbinding console drivers + +NOTE2: + If any of the virtual consoles are in KD_GRAPHICS mode, then binding or + unbinding will not succeed. An example of an application that sets the + console to KD_GRAPHICS is X. + +How useful is this feature? This is very useful for console driver +developers. By unbinding the driver from the console layer, one can unload the +driver, make changes, recompile, reload and rebind the driver without any need +for rebooting the kernel. For regular users who may want to switch from +framebuffer console to VGA console and vice versa, this feature also makes +this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb +for more details.) + +Notes for developers +==================== + +do_take_over_console() is now broken up into:: + + do_register_con_driver() + do_bind_con_driver() - private function + +give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must +be fully unbound for this call to succeed. con_is_bound() will check if the +driver is bound or not. + +Guidelines for console driver writers +===================================== + +In order for binding to and unbinding from the console to properly work, +console drivers must follow these guidelines: + +1. All drivers, except system drivers, must call either do_register_con_driver() + or do_take_over_console(). do_register_con_driver() will just add the driver + to the console's internal list. It won't take over the + console. do_take_over_console(), as it name implies, will also take over (or + bind to) the console. + +2. All resources allocated during con->con_init() must be released in + con->con_deinit(). + +3. All resources allocated in con->con_startup() must be released when the + driver, which was previously bound, becomes unbound. The console layer + does not have a complementary call to con->con_startup() so it's up to the + driver to check when it's legal to release these resources. Calling + con_is_bound() in con->con_deinit() will help. If the call returned + false(), then it's safe to release the resources. This balance has to be + ensured because con->con_startup() can be called again when a request to + rebind the driver to the console arrives. + +4. Upon exit of the driver, ensure that the driver is totally unbound. If the + condition is satisfied, then the driver must call do_unregister_con_driver() + or give_up_console(). + +5. do_unregister_con_driver() can also be called on conditions which make it + impossible for the driver to service console requests. This can happen + with the framebuffer console that suddenly lost all of its drivers. + +The current crop of console drivers should still work correctly, but binding +and unbinding them may cause problems. With minimal fixes, these drivers can +be made to work correctly. + +Antonino Daplas <adaplas@pol.net> diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst new file mode 100644 index 0000000000..036e495535 --- /dev/null +++ b/Documentation/driver-api/cxl/index.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +Compute Express Link +==================== + +.. toctree:: + :maxdepth: 1 + + memory-devices + +.. only:: subproject and html diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst new file mode 100644 index 0000000000..5149ecdc53 --- /dev/null +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -0,0 +1,383 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +=================================== +Compute Express Link Memory Devices +=================================== + +A Compute Express Link Memory Device is a CXL component that implements the +CXL.mem protocol. It contains some amount of volatile memory, persistent memory, +or both. It is enumerated as a PCI device for configuration and passing +messages over an MMIO mailbox. Its contribution to the System Physical +Address space is handled via HDM (Host Managed Device Memory) decoders +that optionally define a device's contribution to an interleaved address +range across multiple devices underneath a host-bridge or interleaved +across host-bridges. + +CXL Bus: Theory of Operation +============================ +Similar to how a RAID driver takes disk objects and assembles them into a new +logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and +assemble them into a CXL.mem decode topology. The need for runtime configuration +of the CXL.mem topology is also similar to RAID in that different environments +with the same hardware configuration may decide to assemble the topology in +contrasting ways. One may choose performance (RAID0) striping memory across +multiple Host Bridges and endpoints while another may opt for fault tolerance +and disable any striping in the CXL.mem topology. + +Platform firmware enumerates a menu of interleave options at the "CXL root port" +(Linux term for the top of the CXL decode topology). From there, PCIe topology +dictates which endpoints can participate in which Host Bridge decode regimes. +Each PCIe Switch in the path between the root and an endpoint introduces a point +at which the interleave can be split. For example platform firmware may say at a +given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn +interleave cycles across multiple Root Ports. An intervening Switch between a +port and an endpoint may interleave cycles across multiple Downstream Switch +Ports, etc. + +Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test' +module generates an emulated CXL topology of 2 Host Bridges each with 2 Root +Ports. Each of those Root Ports are connected to 2-way switches with endpoints +connected to those downstream ports for a total of 8 endpoints:: + + # cxl list -BEMPu -b cxl_test + { + "bus":"root3", + "provider":"cxl_test", + "ports:root3":[ + { + "port":"port5", + "host":"cxl_host_bridge.1", + "ports:port5":[ + { + "port":"port8", + "host":"cxl_switch_uport.1", + "endpoints:port8":[ + { + "endpoint":"endpoint9", + "host":"mem2", + "memdev":{ + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x1", + "numa_node":1, + "host":"cxl_mem.1" + } + }, + { + "endpoint":"endpoint15", + "host":"mem6", + "memdev":{ + "memdev":"mem6", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x5", + "numa_node":1, + "host":"cxl_mem.5" + } + } + ] + }, + { + "port":"port12", + "host":"cxl_switch_uport.3", + "endpoints:port12":[ + { + "endpoint":"endpoint17", + "host":"mem8", + "memdev":{ + "memdev":"mem8", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x7", + "numa_node":1, + "host":"cxl_mem.7" + } + }, + { + "endpoint":"endpoint13", + "host":"mem4", + "memdev":{ + "memdev":"mem4", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x3", + "numa_node":1, + "host":"cxl_mem.3" + } + } + ] + } + ] + }, + { + "port":"port4", + "host":"cxl_host_bridge.0", + "ports:port4":[ + { + "port":"port6", + "host":"cxl_switch_uport.0", + "endpoints:port6":[ + { + "endpoint":"endpoint7", + "host":"mem1", + "memdev":{ + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + } + }, + { + "endpoint":"endpoint14", + "host":"mem5", + "memdev":{ + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + } + } + ] + }, + { + "port":"port10", + "host":"cxl_switch_uport.2", + "endpoints:port10":[ + { + "endpoint":"endpoint16", + "host":"mem7", + "memdev":{ + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + } + }, + { + "endpoint":"endpoint11", + "host":"mem3", + "memdev":{ + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + } + ] + } + ] + } + ] + } + +In that listing each "root", "port", and "endpoint" object correspond a kernel +'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to +its descendants. So "root" claims non-PCIe enumerable platform decode ranges and +decodes them to "ports", "ports" decode to "endpoints", and "endpoints" +represent the decode from SPA (System Physical Address) to DPA (Device Physical +Address). + +Continuing the RAID analogy, disks have both topology metadata and on device +metadata that determine RAID set assembly. CXL Port topology and CXL Port link +status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated +by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches +the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port +objects. Conversely for hot-unplug / removal scenarios, there is no need for +the Linux PCI core to tear down switch-level CXL resources because the endpoint +->remove() event cleans up the port data that was established to support that +Memory Expander. + +The port metadata and potential decode schemes that a give memory device may +participate can be determined via a command like:: + + # cxl list -BDMu -d root -m mem3 + { + "bus":"root3", + "provider":"cxl_test", + "decoders:root3":[ + { + "decoder":"decoder3.1", + "resource":"0x8030000000", + "size":"512.00 MiB (536.87 MB)", + "volatile_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.3", + "resource":"0x8060000000", + "size":"512.00 MiB (536.87 MB)", + "pmem_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.0", + "resource":"0x8020000000", + "size":"256.00 MiB (268.44 MB)", + "volatile_capable":true, + "nr_targets":1 + }, + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ], + "memdevs:root3":[ + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + } + +...which queries the CXL topology to ask "given CXL Memory Expander with a kernel +device name of 'mem3' which platform level decode ranges may this device +participate". A given expander can participate in multiple CXL.mem interleave +sets simultaneously depending on how many decoder resource it has. In this +example mem3 can participate in one or more of a PMEM interleave that spans to +Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile +memory interleave that spans 2 Host Bridges, and a Volatile memory interleave +that only targets a single Host Bridge. + +Conversely the memory devices that can participate in a given platform level +decode scheme can be determined via a command like the following:: + + # cxl list -MDu -d 3.2 + [ + { + "memdevs":[ + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + }, + { + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + }, + { + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ] + } + ] + +...where the naming scheme for decoders is "decoder<port_id>.<instance_id>". + +Driver Infrastructure +===================== + +This section covers the driver infrastructure for a CXL memory device. + +CXL Memory Device +----------------- + +.. kernel-doc:: drivers/cxl/pci.c + :doc: cxl pci + +.. kernel-doc:: drivers/cxl/pci.c + :internal: + +.. kernel-doc:: drivers/cxl/mem.c + :doc: cxl mem + +CXL Port +-------- +.. kernel-doc:: drivers/cxl/port.c + :doc: cxl port + +CXL Core +-------- +.. kernel-doc:: drivers/cxl/cxl.h + :doc: cxl objects + +.. kernel-doc:: drivers/cxl/cxl.h + :internal: + +.. kernel-doc:: drivers/cxl/core/port.c + :doc: cxl core + +.. kernel-doc:: drivers/cxl/core/port.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/pci.c + :doc: cxl core pci + +.. kernel-doc:: drivers/cxl/core/pci.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/pmem.c + :doc: cxl pmem + +.. kernel-doc:: drivers/cxl/core/regs.c + :doc: cxl registers + +.. kernel-doc:: drivers/cxl/core/mbox.c + :doc: cxl mbox + +CXL Regions +----------- +.. kernel-doc:: drivers/cxl/core/region.c + :doc: cxl core region + +.. kernel-doc:: drivers/cxl/core/region.c + :identifiers: + +External Interfaces +=================== + +CXL IOCTL Interface +------------------- + +.. kernel-doc:: include/uapi/linux/cxl_mem.h + :doc: UAPI + +.. kernel-doc:: include/uapi/linux/cxl_mem.h + :internal: diff --git a/Documentation/driver-api/dcdbas.rst b/Documentation/driver-api/dcdbas.rst new file mode 100644 index 0000000000..309cc57a7c --- /dev/null +++ b/Documentation/driver-api/dcdbas.rst @@ -0,0 +1,99 @@ +=================================== +Dell Systems Management Base Driver +=================================== + +Overview +======== + +The Dell Systems Management Base Driver provides a sysfs interface for +systems management software such as Dell OpenManage to perform system +management interrupts and host control actions (system power cycle or +power off after OS shutdown) on certain Dell systems. + +Dell OpenManage requires this driver on the following Dell PowerEdge systems: +300, 1300, 1400, 400SC, 500SC, 1500SC, 1550, 600SC, 1600SC, 650, 1655MC, +700, and 750. Other Dell software such as the open source libsmbios project +is expected to make use of this driver, and it may include the use of this +driver on other Dell systems. + +The Dell libsmbios project aims towards providing access to as much BIOS +information as possible. See http://linux.dell.com/libsmbios/main/ for +more information about the libsmbios project. + + +System Management Interrupt +=========================== + +On some Dell systems, systems management software must access certain +management information via a system management interrupt (SMI). The SMI data +buffer must reside in 32-bit address space, and the physical address of the +buffer is required for the SMI. The driver maintains the memory required for +the SMI and provides a way for the application to generate the SMI. +The driver creates the following sysfs entries for systems management +software to perform these system management interrupts:: + + /sys/devices/platform/dcdbas/smi_data + /sys/devices/platform/dcdbas/smi_data_buf_phys_addr + /sys/devices/platform/dcdbas/smi_data_buf_size + /sys/devices/platform/dcdbas/smi_request + +Systems management software must perform the following steps to execute +a SMI using this driver: + +1) Lock smi_data. +2) Write system management command to smi_data. +3) Write "1" to smi_request to generate a calling interface SMI or + "2" to generate a raw SMI. +4) Read system management command response from smi_data. +5) Unlock smi_data. + + +Host Control Action +=================== + +Dell OpenManage supports a host control feature that allows the administrator +to perform a power cycle or power off of the system after the OS has finished +shutting down. On some Dell systems, this host control feature requires that +a driver perform a SMI after the OS has finished shutting down. + +The driver creates the following sysfs entries for systems management software +to schedule the driver to perform a power cycle or power off host control +action after the system has finished shutting down: + +/sys/devices/platform/dcdbas/host_control_action +/sys/devices/platform/dcdbas/host_control_smi_type +/sys/devices/platform/dcdbas/host_control_on_shutdown + +Dell OpenManage performs the following steps to execute a power cycle or +power off host control action using this driver: + +1) Write host control action to be performed to host_control_action. +2) Write type of SMI that driver needs to perform to host_control_smi_type. +3) Write "1" to host_control_on_shutdown to enable host control action. +4) Initiate OS shutdown. + (Driver will perform host control SMI when it is notified that the OS + has finished shutting down.) + + +Host Control SMI Type +===================== + +The following table shows the value to write to host_control_smi_type to +perform a power cycle or power off host control action: + +=================== ===================== +PowerEdge System Host Control SMI Type +=================== ===================== + 300 HC_SMITYPE_TYPE1 + 1300 HC_SMITYPE_TYPE1 + 1400 HC_SMITYPE_TYPE2 + 500SC HC_SMITYPE_TYPE2 + 1500SC HC_SMITYPE_TYPE2 + 1550 HC_SMITYPE_TYPE2 + 600SC HC_SMITYPE_TYPE2 + 1600SC HC_SMITYPE_TYPE2 + 650 HC_SMITYPE_TYPE2 + 1655MC HC_SMITYPE_TYPE2 + 700 HC_SMITYPE_TYPE3 + 750 HC_SMITYPE_TYPE3 +=================== ===================== diff --git a/Documentation/driver-api/devfreq.rst b/Documentation/driver-api/devfreq.rst new file mode 100644 index 0000000000..4a0bf87a3b --- /dev/null +++ b/Documentation/driver-api/devfreq.rst @@ -0,0 +1,30 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================== +Device Frequency Scaling +======================== + +Introduction +------------ + +This framework provides a standard kernel interface for Dynamic Voltage and +Frequency Switching on arbitrary devices. + +It exposes controls for adjusting frequency through sysfs files which are +similar to the cpufreq subsystem. + +Devices for which current usage can be measured can have their frequency +automatically adjusted by governors. + +API +--- + +Device drivers need to initialize a :c:type:`devfreq_profile` and call the +:c:func:`devfreq_add_device` function to create a :c:type:`devfreq` instance. + +.. kernel-doc:: include/linux/devfreq.h +.. kernel-doc:: include/linux/devfreq-event.h +.. kernel-doc:: drivers/devfreq/devfreq.c + :export: +.. kernel-doc:: drivers/devfreq/devfreq-event.c + :export: diff --git a/Documentation/driver-api/device-io.rst b/Documentation/driver-api/device-io.rst new file mode 100644 index 0000000000..2c7abd234f --- /dev/null +++ b/Documentation/driver-api/device-io.rst @@ -0,0 +1,521 @@ +.. Copyright 2001 Matthew Wilcox +.. +.. This documentation is free software; you can redistribute +.. it and/or modify it under the terms of the GNU General Public +.. License as published by the Free Software Foundation; either +.. version 2 of the License, or (at your option) any later +.. version. + +=============================== +Bus-Independent Device Accesses +=============================== + +:Author: Matthew Wilcox +:Author: Alan Cox + +Introduction +============ + +Linux provides an API which abstracts performing IO across all busses +and devices, allowing device drivers to be written independently of bus +type. + +Memory Mapped IO +================ + +Getting Access to the Device +---------------------------- + +The most widely supported form of IO is memory mapped IO. That is, a +part of the CPU's address space is interpreted not as accesses to +memory, but as accesses to a device. Some architectures define devices +to be at a fixed address, but most have some method of discovering +devices. The PCI bus walk is a good example of such a scheme. This +document does not cover how to receive such an address, but assumes you +are starting with one. Physical addresses are of type unsigned long. + +This address should not be used directly. Instead, to get an address +suitable for passing to the accessor functions described below, you +should call ioremap(). An address suitable for accessing +the device will be returned to you. + +After you've finished using the device (say, in your module's exit +routine), call iounmap() in order to return the address +space to the kernel. Most architectures allocate new address space each +time you call ioremap(), and they can run out unless you +call iounmap(). + +Accessing the device +-------------------- + +The part of the interface most used by drivers is reading and writing +memory-mapped registers on the device. Linux provides interfaces to read +and write 8-bit, 16-bit, 32-bit and 64-bit quantities. Due to a +historical accident, these are named byte, word, long and quad accesses. +Both read and write accesses are supported; there is no prefetch support +at this time. + +The functions are named readb(), readw(), readl(), readq(), +readb_relaxed(), readw_relaxed(), readl_relaxed(), readq_relaxed(), +writeb(), writew(), writel() and writeq(). + +Some devices (such as framebuffers) would like to use larger transfers than +8 bytes at a time. For these devices, the memcpy_toio(), +memcpy_fromio() and memset_io() functions are +provided. Do not use memset or memcpy on IO addresses; they are not +guaranteed to copy data in order. + +The read and write functions are defined to be ordered. That is the +compiler is not permitted to reorder the I/O sequence. When the ordering +can be compiler optimised, you can use __readb() and friends to +indicate the relaxed ordering. Use this with care. + +While the basic functions are defined to be synchronous with respect to +each other and ordered with respect to each other the busses the devices +sit on may themselves have asynchronicity. In particular many authors +are burned by the fact that PCI bus writes are posted asynchronously. A +driver author must issue a read from the same device to ensure that +writes have occurred in the specific cases the author cares. This kind +of property cannot be hidden from driver writers in the API. In some +cases, the read used to flush the device may be expected to fail (if the +card is resetting, for example). In that case, the read should be done +from config space, which is guaranteed to soft-fail if the card doesn't +respond. + +The following is an example of flushing a write to a device when the +driver would like to ensure the write's effects are visible prior to +continuing execution:: + + static inline void + qla1280_disable_intrs(struct scsi_qla_host *ha) + { + struct device_reg *reg; + + reg = ha->iobase; + /* disable risc and host interrupts */ + WRT_REG_WORD(®->ictrl, 0); + /* + * The following read will ensure that the above write + * has been received by the device before we return from this + * function. + */ + RD_REG_WORD(®->ictrl); + ha->flags.ints_enabled = 0; + } + +PCI ordering rules also guarantee that PIO read responses arrive after any +outstanding DMA writes from that bus, since for some devices the result of +a readb() call may signal to the driver that a DMA transaction is +complete. In many cases, however, the driver may want to indicate that the +next readb() call has no relation to any previous DMA writes +performed by the device. The driver can use readb_relaxed() for +these cases, although only some platforms will honor the relaxed +semantics. Using the relaxed read functions will provide significant +performance benefits on platforms that support it. The qla2xxx driver +provides examples of how to use readX_relaxed(). In many cases, a majority +of the driver's readX() calls can safely be converted to readX_relaxed() +calls, since only a few will indicate or depend on DMA completion. + +Port Space Accesses +=================== + +Port Space Explained +-------------------- + +Another form of IO commonly supported is Port Space. This is a range of +addresses separate to the normal memory address space. Access to these +addresses is generally not as fast as accesses to the memory mapped +addresses, and it also has a potentially smaller address space. + +Unlike memory mapped IO, no preparation is required to access port +space. + +Accessing Port Space +-------------------- + +Accesses to this space are provided through a set of functions which +allow 8-bit, 16-bit and 32-bit accesses; also known as byte, word and +long. These functions are inb(), inw(), +inl(), outb(), outw() and +outl(). + +Some variants are provided for these functions. Some devices require +that accesses to their ports are slowed down. This functionality is +provided by appending a ``_p`` to the end of the function. +There are also equivalents to memcpy. The ins() and +outs() functions copy bytes, words or longs to the given +port. + +__iomem pointer tokens +====================== + +The data type for an MMIO address is an ``__iomem`` qualified pointer, such as +``void __iomem *reg``. On most architectures it is a regular pointer that +points to a virtual memory address and can be offset or dereferenced, but in +portable code, it must only be passed from and to functions that explicitly +operated on an ``__iomem`` token, in particular the ioremap() and +readl()/writel() functions. The 'sparse' semantic code checker can be used to +verify that this is done correctly. + +While on most architectures, ioremap() creates a page table entry for an +uncached virtual address pointing to the physical MMIO address, some +architectures require special instructions for MMIO, and the ``__iomem`` pointer +just encodes the physical address or an offsettable cookie that is interpreted +by readl()/writel(). + +Differences between I/O access functions +======================================== + +readq(), readl(), readw(), readb(), writeq(), writel(), writew(), writeb() + + These are the most generic accessors, providing serialization against other + MMIO accesses and DMA accesses as well as fixed endianness for accessing + little-endian PCI devices and on-chip peripherals. Portable device drivers + should generally use these for any access to ``__iomem`` pointers. + + Note that posted writes are not strictly ordered against a spinlock, see + Documentation/driver-api/io_ordering.rst. + +readq_relaxed(), readl_relaxed(), readw_relaxed(), readb_relaxed(), +writeq_relaxed(), writel_relaxed(), writew_relaxed(), writeb_relaxed() + + On architectures that require an expensive barrier for serializing against + DMA, these "relaxed" versions of the MMIO accessors only serialize against + each other, but contain a less expensive barrier operation. A device driver + might use these in a particularly performance sensitive fast path, with a + comment that explains why the usage in a specific location is safe without + the extra barriers. + + See memory-barriers.txt for a more detailed discussion on the precise ordering + guarantees of the non-relaxed and relaxed versions. + +ioread64(), ioread32(), ioread16(), ioread8(), +iowrite64(), iowrite32(), iowrite16(), iowrite8() + + These are an alternative to the normal readl()/writel() functions, with almost + identical behavior, but they can also operate on ``__iomem`` tokens returned + for mapping PCI I/O space with pci_iomap() or ioport_map(). On architectures + that require special instructions for I/O port access, this adds a small + overhead for an indirect function call implemented in lib/iomap.c, while on + other architectures, these are simply aliases. + +ioread64be(), ioread32be(), ioread16be() +iowrite64be(), iowrite32be(), iowrite16be() + + These behave in the same way as the ioread32()/iowrite32() family, but with + reversed byte order, for accessing devices with big-endian MMIO registers. + Device drivers that can operate on either big-endian or little-endian + registers may have to implement a custom wrapper function that picks one or + the other depending on which device was found. + + Note: On some architectures, the normal readl()/writel() functions + traditionally assume that devices are the same endianness as the CPU, while + using a hardware byte-reverse on the PCI bus when running a big-endian kernel. + Drivers that use readl()/writel() this way are generally not portable, but + tend to be limited to a particular SoC. + +hi_lo_readq(), lo_hi_readq(), hi_lo_readq_relaxed(), lo_hi_readq_relaxed(), +ioread64_lo_hi(), ioread64_hi_lo(), ioread64be_lo_hi(), ioread64be_hi_lo(), +hi_lo_writeq(), lo_hi_writeq(), hi_lo_writeq_relaxed(), lo_hi_writeq_relaxed(), +iowrite64_lo_hi(), iowrite64_hi_lo(), iowrite64be_lo_hi(), iowrite64be_hi_lo() + + Some device drivers have 64-bit registers that cannot be accessed atomically + on 32-bit architectures but allow two consecutive 32-bit accesses instead. + Since it depends on the particular device which of the two halves has to be + accessed first, a helper is provided for each combination of 64-bit accessors + with either low/high or high/low word ordering. A device driver must include + either <linux/io-64-nonatomic-lo-hi.h> or <linux/io-64-nonatomic-hi-lo.h> to + get the function definitions along with helpers that redirect the normal + readq()/writeq() to them on architectures that do not provide 64-bit access + natively. + +__raw_readq(), __raw_readl(), __raw_readw(), __raw_readb(), +__raw_writeq(), __raw_writel(), __raw_writew(), __raw_writeb() + + These are low-level MMIO accessors without barriers or byteorder changes and + architecture specific behavior. Accesses are usually atomic in the sense that + a four-byte __raw_readl() does not get split into individual byte loads, but + multiple consecutive accesses can be combined on the bus. In portable code, it + is only safe to use these to access memory behind a device bus but not MMIO + registers, as there are no ordering guarantees with regard to other MMIO + accesses or even spinlocks. The byte order is generally the same as for normal + memory, so unlike the other functions, these can be used to copy data between + kernel memory and device memory. + +inl(), inw(), inb(), outl(), outw(), outb() + + PCI I/O port resources traditionally require separate helpers as they are + implemented using special instructions on the x86 architecture. On most other + architectures, these are mapped to readl()/writel() style accessors + internally, usually pointing to a fixed area in virtual memory. Instead of an + ``__iomem`` pointer, the address is a 32-bit integer token to identify a port + number. PCI requires I/O port access to be non-posted, meaning that an outb() + must complete before the following code executes, while a normal writeb() may + still be in progress. On architectures that correctly implement this, I/O port + access is therefore ordered against spinlocks. Many non-x86 PCI host bridge + implementations and CPU architectures however fail to implement non-posted I/O + space on PCI, so they can end up being posted on such hardware. + + In some architectures, the I/O port number space has a 1:1 mapping to + ``__iomem`` pointers, but this is not recommended and device drivers should + not rely on that for portability. Similarly, an I/O port number as described + in a PCI base address register may not correspond to the port number as seen + by a device driver. Portable drivers need to read the port number for the + resource provided by the kernel. + + There are no direct 64-bit I/O port accessors, but pci_iomap() in combination + with ioread64/iowrite64 can be used instead. + +inl_p(), inw_p(), inb_p(), outl_p(), outw_p(), outb_p() + + On ISA devices that require specific timing, the _p versions of the I/O + accessors add a small delay. On architectures that do not have ISA buses, + these are aliases to the normal inb/outb helpers. + +readsq, readsl, readsw, readsb +writesq, writesl, writesw, writesb +ioread64_rep, ioread32_rep, ioread16_rep, ioread8_rep +iowrite64_rep, iowrite32_rep, iowrite16_rep, iowrite8_rep +insl, insw, insb, outsl, outsw, outsb + + These are helpers that access the same address multiple times, usually to copy + data between kernel memory byte stream and a FIFO buffer. Unlike the normal + MMIO accessors, these do not perform a byteswap on big-endian kernels, so the + first byte in the FIFO register corresponds to the first byte in the memory + buffer regardless of the architecture. + +Device memory mapping modes +=========================== + +Some architectures support multiple modes for mapping device memory. +ioremap_*() variants provide a common abstraction around these +architecture-specific modes, with a shared set of semantics. + +ioremap() is the most common mapping type, and is applicable to typical device +memory (e.g. I/O registers). Other modes can offer weaker or stronger +guarantees, if supported by the architecture. From most to least common, they +are as follows: + +ioremap() +--------- + +The default mode, suitable for most memory-mapped devices, e.g. control +registers. Memory mapped using ioremap() has the following characteristics: + +* Uncached - CPU-side caches are bypassed, and all reads and writes are handled + directly by the device +* No speculative operations - the CPU may not issue a read or write to this + memory, unless the instruction that does so has been reached in committed + program flow. +* No reordering - The CPU may not reorder accesses to this memory mapping with + respect to each other. On some architectures, this relies on barriers in + readl_relaxed()/writel_relaxed(). +* No repetition - The CPU may not issue multiple reads or writes for a single + program instruction. +* No write-combining - Each I/O operation results in one discrete read or write + being issued to the device, and multiple writes are not combined into larger + writes. This may or may not be enforced when using __raw I/O accessors or + pointer dereferences. +* Non-executable - The CPU is not allowed to speculate instruction execution + from this memory (it probably goes without saying, but you're also not + allowed to jump into device memory). + +On many platforms and buses (e.g. PCI), writes issued through ioremap() +mappings are posted, which means that the CPU does not wait for the write to +actually reach the target device before retiring the write instruction. + +On many platforms, I/O accesses must be aligned with respect to the access +size; failure to do so will result in an exception or unpredictable results. + +ioremap_wc() +------------ + +Maps I/O memory as normal memory with write combining. Unlike ioremap(), + +* The CPU may speculatively issue reads from the device that the program + didn't actually execute, and may choose to basically read whatever it wants. +* The CPU may reorder operations as long as the result is consistent from the + program's point of view. +* The CPU may write to the same location multiple times, even when the program + issued a single write. +* The CPU may combine several writes into a single larger write. + +This mode is typically used for video framebuffers, where it can increase +performance of writes. It can also be used for other blocks of memory in +devices (e.g. buffers or shared memory), but care must be taken as accesses are +not guaranteed to be ordered with respect to normal ioremap() MMIO register +accesses without explicit barriers. + +On a PCI bus, it is usually safe to use ioremap_wc() on MMIO areas marked as +``IORESOURCE_PREFETCH``, but it may not be used on those without the flag. +For on-chip devices, there is no corresponding flag, but a driver can use +ioremap_wc() on a device that is known to be safe. + +ioremap_wt() +------------ + +Maps I/O memory as normal memory with write-through caching. Like ioremap_wc(), +but also, + +* The CPU may cache writes issued to and reads from the device, and serve reads + from that cache. + +This mode is sometimes used for video framebuffers, where drivers still expect +writes to reach the device in a timely manner (and not be stuck in the CPU +cache), but reads may be served from the cache for efficiency. However, it is +rarely useful these days, as framebuffer drivers usually perform writes only, +for which ioremap_wc() is more efficient (as it doesn't needlessly trash the +cache). Most drivers should not use this. + +ioremap_np() +------------ + +Like ioremap(), but explicitly requests non-posted write semantics. On some +architectures and buses, ioremap() mappings have posted write semantics, which +means that writes can appear to "complete" from the point of view of the +CPU before the written data actually arrives at the target device. Writes are +still ordered with respect to other writes and reads from the same device, but +due to the posted write semantics, this is not the case with respect to other +devices. ioremap_np() explicitly requests non-posted semantics, which means +that the write instruction will not appear to complete until the device has +received (and to some platform-specific extent acknowledged) the written data. + +This mapping mode primarily exists to cater for platforms with bus fabrics that +require this particular mapping mode to work correctly. These platforms set the +``IORESOURCE_MEM_NONPOSTED`` flag for a resource that requires ioremap_np() +semantics and portable drivers should use an abstraction that automatically +selects it where appropriate (see the `Higher-level ioremap abstractions`_ +section below). + +The bare ioremap_np() is only available on some architectures; on others, it +always returns NULL. Drivers should not normally use it, unless they are +platform-specific or they derive benefit from non-posted writes where +supported, and can fall back to ioremap() otherwise. The normal approach to +ensure posted write completion is to do a dummy read after a write as +explained in `Accessing the device`_, which works with ioremap() on all +platforms. + +ioremap_np() should never be used for PCI drivers. PCI memory space writes are +always posted, even on architectures that otherwise implement ioremap_np(). +Using ioremap_np() for PCI BARs will at best result in posted write semantics, +and at worst result in complete breakage. + +Note that non-posted write semantics are orthogonal to CPU-side ordering +guarantees. A CPU may still choose to issue other reads or writes before a +non-posted write instruction retires. See the previous section on MMIO access +functions for details on the CPU side of things. + +ioremap_uc() +------------ + +ioremap_uc() behaves like ioremap() except that on the x86 architecture without +'PAT' mode, it marks memory as uncached even when the MTRR has designated +it as cacheable, see Documentation/arch/x86/pat.rst. + +Portable drivers should avoid the use of ioremap_uc(). + +ioremap_cache() +--------------- + +ioremap_cache() effectively maps I/O memory as normal RAM. CPU write-back +caches can be used, and the CPU is free to treat the device as if it were a +block of RAM. This should never be used for device memory which has side +effects of any kind, or which does not return the data previously written on +read. + +It should also not be used for actual RAM, as the returned pointer is an +``__iomem`` token. memremap() can be used for mapping normal RAM that is outside +of the linear kernel memory area to a regular pointer. + +Portable drivers should avoid the use of ioremap_cache(). + +Architecture example +-------------------- + +Here is how the above modes map to memory attribute settings on the ARM64 +architecture: + ++------------------------+--------------------------------------------+ +| API | Memory region type and cacheability | ++------------------------+--------------------------------------------+ +| ioremap_np() | Device-nGnRnE | ++------------------------+--------------------------------------------+ +| ioremap() | Device-nGnRE | ++------------------------+--------------------------------------------+ +| ioremap_uc() | (not implemented) | ++------------------------+--------------------------------------------+ +| ioremap_wc() | Normal-Non Cacheable | ++------------------------+--------------------------------------------+ +| ioremap_wt() | (not implemented; fallback to ioremap) | ++------------------------+--------------------------------------------+ +| ioremap_cache() | Normal-Write-Back Cacheable | ++------------------------+--------------------------------------------+ + +Higher-level ioremap abstractions +================================= + +Instead of using the above raw ioremap() modes, drivers are encouraged to use +higher-level APIs. These APIs may implement platform-specific logic to +automatically choose an appropriate ioremap mode on any given bus, allowing for +a platform-agnostic driver to work on those platforms without any special +cases. At the time of this writing, the following ioremap() wrappers have such +logic: + +devm_ioremap_resource() + + Can automatically select ioremap_np() over ioremap() according to platform + requirements, if the ``IORESOURCE_MEM_NONPOSTED`` flag is set on the struct + resource. Uses devres to automatically unmap the resource when the driver + probe() function fails or a device in unbound from its driver. + + Documented in Documentation/driver-api/driver-model/devres.rst. + +of_address_to_resource() + + Automatically sets the ``IORESOURCE_MEM_NONPOSTED`` flag for platforms that + require non-posted writes for certain buses (see the nonposted-mmio and + posted-mmio device tree properties). + +of_iomap() + + Maps the resource described in a ``reg`` property in the device tree, doing + all required translations. Automatically selects ioremap_np() according to + platform requirements, as above. + +pci_ioremap_bar(), pci_ioremap_wc_bar() + + Maps the resource described in a PCI base address without having to extract + the physical address first. + +pci_iomap(), pci_iomap_wc() + + Like pci_ioremap_bar()/pci_ioremap_bar(), but also works on I/O space when + used together with ioread32()/iowrite32() and similar accessors + +pcim_iomap() + + Like pci_iomap(), but uses devres to automatically unmap the resource when + the driver probe() function fails or a device in unbound from its driver + + Documented in Documentation/driver-api/driver-model/devres.rst. + +Not using these wrappers may make drivers unusable on certain platforms with +stricter rules for mapping I/O memory. + +Generalizing Access to System and I/O Memory +============================================ + +.. kernel-doc:: include/linux/iosys-map.h + :doc: overview + +.. kernel-doc:: include/linux/iosys-map.h + :internal: + +Public Functions Provided +========================= + +.. kernel-doc:: arch/x86/include/asm/io.h + :internal: + +.. kernel-doc:: lib/pci_iomap.c + :export: diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst new file mode 100644 index 0000000000..ee913ae163 --- /dev/null +++ b/Documentation/driver-api/device_link.rst @@ -0,0 +1,320 @@ +.. _device_link: + +============ +Device links +============ + +By default, the driver core only enforces dependencies between devices +that are borne out of a parent/child relationship within the device +hierarchy: When suspending, resuming or shutting down the system, devices +are ordered based on this relationship, i.e. children are always suspended +before their parent, and the parent is always resumed before its children. + +Sometimes there is a need to represent device dependencies beyond the +mere parent/child relationship, e.g. between siblings, and have the +driver core automatically take care of them. + +Secondly, the driver core by default does not enforce any driver presence +dependencies, i.e. that one device must be bound to a driver before +another one can probe or function correctly. + +Often these two dependency types come together, so a device depends on +another one both with regards to driver presence *and* with regards to +suspend/resume and shutdown ordering. + +Device links allow representation of such dependencies in the driver core. + +In its standard or *managed* form, a device link combines *both* dependency +types: It guarantees correct suspend/resume and shutdown ordering between a +"supplier" device and its "consumer" devices, and it guarantees driver +presence on the supplier. The consumer devices are not probed before the +supplier is bound to a driver, and they're unbound before the supplier +is unbound. + +When driver presence on the supplier is irrelevant and only correct +suspend/resume and shutdown ordering is needed, the device link may +simply be set up with the ``DL_FLAG_STATELESS`` flag. In other words, +enforcing driver presence on the supplier is optional. + +Another optional feature is runtime PM integration: By setting the +``DL_FLAG_PM_RUNTIME`` flag on addition of the device link, the PM core +is instructed to runtime resume the supplier and keep it active +whenever and for as long as the consumer is runtime resumed. + +Usage +===== + +The earliest point in time when device links can be added is after +:c:func:`device_add()` has been called for the supplier and +:c:func:`device_initialize()` has been called for the consumer. + +It is legal to add them later, but care must be taken that the system +remains in a consistent state: E.g. a device link cannot be added in +the midst of a suspend/resume transition, so either commencement of +such a transition needs to be prevented with :c:func:`lock_system_sleep()`, +or the device link needs to be added from a function which is guaranteed +not to run in parallel to a suspend/resume transition, such as from a +device ``->probe`` callback or a boot-time PCI quirk. + +Another example for an inconsistent state would be a device link that +represents a driver presence dependency, yet is added from the consumer's +``->probe`` callback while the supplier hasn't started to probe yet: Had the +driver core known about the device link earlier, it wouldn't have probed the +consumer in the first place. The onus is thus on the consumer to check +presence of the supplier after adding the link, and defer probing on +non-presence. [Note that it is valid to create a link from the consumer's +``->probe`` callback while the supplier is still probing, but the consumer must +know that the supplier is functional already at the link creation time (that is +the case, for instance, if the consumer has just acquired some resources that +would not have been available had the supplier not been functional then).] + +If a device link with ``DL_FLAG_STATELESS`` set (i.e. a stateless device link) +is added in the ``->probe`` callback of the supplier or consumer driver, it is +typically deleted in its ``->remove`` callback for symmetry. That way, if the +driver is compiled as a module, the device link is added on module load and +orderly deleted on unload. The same restrictions that apply to device link +addition (e.g. exclusion of a parallel suspend/resume transition) apply equally +to deletion. Device links managed by the driver core are deleted automatically +by it. + +Several flags may be specified on device link addition, two of which +have already been mentioned above: ``DL_FLAG_STATELESS`` to express that no +driver presence dependency is needed (but only correct suspend/resume and +shutdown ordering) and ``DL_FLAG_PM_RUNTIME`` to express that runtime PM +integration is desired. + +Two other flags are specifically targeted at use cases where the device +link is added from the consumer's ``->probe`` callback: ``DL_FLAG_RPM_ACTIVE`` +can be specified to runtime resume the supplier and prevent it from suspending +before the consumer is runtime suspended. ``DL_FLAG_AUTOREMOVE_CONSUMER`` +causes the device link to be automatically purged when the consumer fails to +probe or later unbinds. + +Similarly, when the device link is added from supplier's ``->probe`` callback, +``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically +purged when the supplier fails to probe or later unbinds. + +If neither ``DL_FLAG_AUTOREMOVE_CONSUMER`` nor ``DL_FLAG_AUTOREMOVE_SUPPLIER`` +is set, ``DL_FLAG_AUTOPROBE_CONSUMER`` can be used to request the driver core +to probe for a driver for the consumer driver on the link automatically after +a driver has been bound to the supplier device. + +Note, however, that any combinations of ``DL_FLAG_AUTOREMOVE_CONSUMER``, +``DL_FLAG_AUTOREMOVE_SUPPLIER`` or ``DL_FLAG_AUTOPROBE_CONSUMER`` with +``DL_FLAG_STATELESS`` are invalid and cannot be used. + +Limitations +=========== + +Driver authors should be aware that a driver presence dependency for managed +device links (i.e. when ``DL_FLAG_STATELESS`` is not specified on link addition) +may cause probing of the consumer to be deferred indefinitely. This can become +a problem if the consumer is required to probe before a certain initcall level +is reached. Worse, if the supplier driver is blacklisted or missing, the +consumer will never be probed. + +Moreover, managed device links cannot be deleted directly. They are deleted +by the driver core when they are not necessary any more in accordance with the +``DL_FLAG_AUTOREMOVE_CONSUMER`` and ``DL_FLAG_AUTOREMOVE_SUPPLIER`` flags. +However, stateless device links (i.e. device links with ``DL_FLAG_STATELESS`` +set) are expected to be removed by whoever called :c:func:`device_link_add()` +to add them with the help of either :c:func:`device_link_del()` or +:c:func:`device_link_remove()`. + +Passing ``DL_FLAG_RPM_ACTIVE`` along with ``DL_FLAG_STATELESS`` to +:c:func:`device_link_add()` may cause the PM-runtime usage counter of the +supplier device to remain nonzero after a subsequent invocation of either +:c:func:`device_link_del()` or :c:func:`device_link_remove()` to remove the +device link returned by it. This happens if :c:func:`device_link_add()` is +called twice in a row for the same consumer-supplier pair without removing the +link between these calls, in which case allowing the PM-runtime usage counter +of the supplier to drop on an attempt to remove the link may cause it to be +suspended while the consumer is still PM-runtime-active and that has to be +avoided. [To work around this limitation it is sufficient to let the consumer +runtime suspend at least once, or call :c:func:`pm_runtime_set_suspended()` for +it with PM-runtime disabled, between the :c:func:`device_link_add()` and +:c:func:`device_link_del()` or :c:func:`device_link_remove()` calls.] + +Sometimes drivers depend on optional resources. They are able to operate +in a degraded mode (reduced feature set or performance) when those resources +are not present. An example is an SPI controller that can use a DMA engine +or work in PIO mode. The controller can determine presence of the optional +resources at probe time but on non-presence there is no way to know whether +they will become available in the near future (due to a supplier driver +probing) or never. Consequently it cannot be determined whether to defer +probing or not. It would be possible to notify drivers when optional +resources become available after probing, but it would come at a high cost +for drivers as switching between modes of operation at runtime based on the +availability of such resources would be much more complex than a mechanism +based on probe deferral. In any case optional resources are beyond the +scope of device links. + +Examples +======== + +* An MMU device exists alongside a busmaster device, both are in the same + power domain. The MMU implements DMA address translation for the busmaster + device and shall be runtime resumed and kept active whenever and as long + as the busmaster device is active. The busmaster device's driver shall + not bind before the MMU is bound. To achieve this, a device link with + runtime PM integration is added from the busmaster device (consumer) + to the MMU device (supplier). The effect with regards to runtime PM + is the same as if the MMU was the parent of the master device. + + The fact that both devices share the same power domain would normally + suggest usage of a struct dev_pm_domain or struct generic_pm_domain, + however these are not independent devices that happen to share a power + switch, but rather the MMU device serves the busmaster device and is + useless without it. A device link creates a synthetic hierarchical + relationship between the devices and is thus more apt. + +* A Thunderbolt host controller comprises a number of PCIe hotplug ports + and an NHI device to manage the PCIe switch. On resume from system sleep, + the NHI device needs to re-establish PCI tunnels to attached devices + before the hotplug ports can resume. If the hotplug ports were children + of the NHI, this resume order would automatically be enforced by the + PM core, but unfortunately they're aunts. The solution is to add + device links from the hotplug ports (consumers) to the NHI device + (supplier). A driver presence dependency is not necessary for this + use case. + +* Discrete GPUs in hybrid graphics laptops often feature an HDA controller + for HDMI/DP audio. In the device hierarchy the HDA controller is a sibling + of the VGA device, yet both share the same power domain and the HDA + controller is only ever needed when an HDMI/DP display is attached to the + VGA device. A device link from the HDA controller (consumer) to the + VGA device (supplier) aptly represents this relationship. + +* ACPI allows definition of a device start order by way of _DEP objects. + A classical example is when ACPI power management methods on one device + are implemented in terms of I\ :sup:`2`\ C accesses and require a specific + I\ :sup:`2`\ C controller to be present and functional for the power + management of the device in question to work. + +* In some SoCs a functional dependency exists from display, video codec and + video processing IP cores on transparent memory access IP cores that handle + burst access and compression/decompression. + +Alternatives +============ + +* A struct dev_pm_domain can be used to override the bus, + class or device type callbacks. It is intended for devices sharing + a single on/off switch, however it does not guarantee a specific + suspend/resume ordering, this needs to be implemented separately. + It also does not by itself track the runtime PM status of the involved + devices and turn off the power switch only when all of them are runtime + suspended. Furthermore it cannot be used to enforce a specific shutdown + ordering or a driver presence dependency. + +* A struct generic_pm_domain is a lot more heavyweight than a + device link and does not allow for shutdown ordering or driver presence + dependencies. It also cannot be used on ACPI systems. + +Implementation +============== + +The device hierarchy, which -- as the name implies -- is a tree, +becomes a directed acyclic graph once device links are added. + +Ordering of these devices during suspend/resume is determined by the +dpm_list. During shutdown it is determined by the devices_kset. With +no device links present, the two lists are a flattened, one-dimensional +representations of the device tree such that a device is placed behind +all its ancestors. That is achieved by traversing the ACPI namespace +or OpenFirmware device tree top-down and appending devices to the lists +as they are discovered. + +Once device links are added, the lists need to satisfy the additional +constraint that a device is placed behind all its suppliers, recursively. +To ensure this, upon addition of the device link the consumer and the +entire sub-graph below it (all children and consumers of the consumer) +are moved to the end of the list. (Call to :c:func:`device_reorder_to_tail()` +from :c:func:`device_link_add()`.) + +To prevent introduction of dependency loops into the graph, it is +verified upon device link addition that the supplier is not dependent +on the consumer or any children or consumers of the consumer. +(Call to :c:func:`device_is_dependent()` from :c:func:`device_link_add()`.) +If that constraint is violated, :c:func:`device_link_add()` will return +``NULL`` and a ``WARNING`` will be logged. + +Notably this also prevents the addition of a device link from a parent +device to a child. However the converse is allowed, i.e. a device link +from a child to a parent. Since the driver core already guarantees +correct suspend/resume and shutdown ordering between parent and child, +such a device link only makes sense if a driver presence dependency is +needed on top of that. In this case driver authors should weigh +carefully if a device link is at all the right tool for the purpose. +A more suitable approach might be to simply use deferred probing or +add a device flag causing the parent driver to be probed before the +child one. + +State machine +============= + +.. kernel-doc:: include/linux/device.h + :functions: device_link_state + +:: + + .=============================. + | | + v | + DORMANT <=> AVAILABLE <=> CONSUMER_PROBE => ACTIVE + ^ | + | | + '============ SUPPLIER_UNBIND <============' + +* The initial state of a device link is automatically determined by + :c:func:`device_link_add()` based on the driver presence on the supplier + and consumer. If the link is created before any devices are probed, it + is set to ``DL_STATE_DORMANT``. + +* When a supplier device is bound to a driver, links to its consumers + progress to ``DL_STATE_AVAILABLE``. + (Call to :c:func:`device_links_driver_bound()` from + :c:func:`driver_bound()`.) + +* Before a consumer device is probed, presence of supplier drivers is + verified by checking the consumer device is not in the wait_for_suppliers + list and by checking that links to suppliers are in ``DL_STATE_AVAILABLE`` + state. The state of the links is updated to ``DL_STATE_CONSUMER_PROBE``. + (Call to :c:func:`device_links_check_suppliers()` from + :c:func:`really_probe()`.) + This prevents the supplier from unbinding. + (Call to :c:func:`wait_for_device_probe()` from + :c:func:`device_links_unbind_consumers()`.) + +* If the probe fails, links to suppliers revert back to ``DL_STATE_AVAILABLE``. + (Call to :c:func:`device_links_no_driver()` from :c:func:`really_probe()`.) + +* If the probe succeeds, links to suppliers progress to ``DL_STATE_ACTIVE``. + (Call to :c:func:`device_links_driver_bound()` from :c:func:`driver_bound()`.) + +* When the consumer's driver is later on removed, links to suppliers revert + back to ``DL_STATE_AVAILABLE``. + (Call to :c:func:`__device_links_no_driver()` from + :c:func:`device_links_driver_cleanup()`, which in turn is called from + :c:func:`__device_release_driver()`.) + +* Before a supplier's driver is removed, links to consumers that are not + bound to a driver are updated to ``DL_STATE_SUPPLIER_UNBIND``. + (Call to :c:func:`device_links_busy()` from + :c:func:`__device_release_driver()`.) + This prevents the consumers from binding. + (Call to :c:func:`device_links_check_suppliers()` from + :c:func:`really_probe()`.) + Consumers that are bound are freed from their driver; consumers that are + probing are waited for until they are done. + (Call to :c:func:`device_links_unbind_consumers()` from + :c:func:`__device_release_driver()`.) + Once all links to consumers are in ``DL_STATE_SUPPLIER_UNBIND`` state, + the supplier driver is released and the links revert to ``DL_STATE_DORMANT``. + (Call to :c:func:`device_links_driver_cleanup()` from + :c:func:`__device_release_driver()`.) + +API +=== + +See device_link_add(), device_link_del() and device_link_remove(). diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst new file mode 100644 index 0000000000..f92a32d095 --- /dev/null +++ b/Documentation/driver-api/dma-buf.rst @@ -0,0 +1,366 @@ +Buffer Sharing and Synchronization (dma-buf) +============================================ + +The dma-buf subsystem provides the framework for sharing buffers for +hardware (DMA) access across multiple device drivers and subsystems, and +for synchronizing asynchronous hardware access. + +This is used, for example, by drm "prime" multi-GPU support, but is of +course not limited to GPU use cases. + +The three main components of this are: (1) dma-buf, representing a +sg_table and exposed to userspace as a file descriptor to allow passing +between devices, (2) fence, which provides a mechanism to signal when +one device has finished access, and (3) reservation, which manages the +shared or exclusive fence(s) associated with the buffer. + +Shared DMA Buffers +------------------ + +This document serves as a guide to device-driver writers on what is the dma-buf +buffer sharing API, how to use it for exporting and using shared buffers. + +Any device driver which wishes to be a part of DMA buffer sharing, can do so as +either the 'exporter' of buffers, or the 'user' or 'importer' of buffers. + +Say a driver A wants to use buffers created by driver B, then we call B as the +exporter, and A as buffer-user/importer. + +The exporter + + - implements and manages operations in :c:type:`struct dma_buf_ops + <dma_buf_ops>` for the buffer, + - allows other users to share the buffer by using dma_buf sharing APIs, + - manages the details of buffer allocation, wrapped in a :c:type:`struct + dma_buf <dma_buf>`, + - decides about the actual backing storage where this allocation happens, + - and takes care of any migration of scatterlist - for all (shared) users of + this buffer. + +The buffer-user + + - is one of (many) sharing users of the buffer. + - doesn't need to worry about how the buffer is allocated, or where. + - and needs a mechanism to get access to the scatterlist that makes up this + buffer in memory, mapped into its own address space, so it can access the + same area of memory. This interface is provided by :c:type:`struct + dma_buf_attachment <dma_buf_attachment>`. + +Any exporters or users of the dma-buf buffer sharing framework must have a +'select DMA_SHARED_BUFFER' in their respective Kconfigs. + +Userspace Interface Notes +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Mostly a DMA buffer file descriptor is simply an opaque object for userspace, +and hence the generic interface exposed is very minimal. There's a few things to +consider though: + +- Since kernel 3.12 the dma-buf FD supports the llseek system call, but only + with offset=0 and whence=SEEK_END|SEEK_SET. SEEK_SET is supported to allow + the usual size discover pattern size = SEEK_END(0); SEEK_SET(0). Every other + llseek operation will report -EINVAL. + + If llseek on dma-buf FDs isn't support the kernel will report -ESPIPE for all + cases. Userspace can use this to detect support for discovering the dma-buf + size using llseek. + +- In order to avoid fd leaks on exec, the FD_CLOEXEC flag must be set + on the file descriptor. This is not just a resource leak, but a + potential security hole. It could give the newly exec'd application + access to buffers, via the leaked fd, to which it should otherwise + not be permitted access. + + The problem with doing this via a separate fcntl() call, versus doing it + atomically when the fd is created, is that this is inherently racy in a + multi-threaded app[3]. The issue is made worse when it is library code + opening/creating the file descriptor, as the application may not even be + aware of the fd's. + + To avoid this problem, userspace must have a way to request O_CLOEXEC + flag be set when the dma-buf fd is created. So any API provided by + the exporting driver to create a dmabuf fd must provide a way to let + userspace control setting of O_CLOEXEC flag passed in to dma_buf_fd(). + +- Memory mapping the contents of the DMA buffer is also supported. See the + discussion below on `CPU Access to DMA Buffer Objects`_ for the full details. + +- The DMA buffer FD is also pollable, see `Implicit Fence Poll Support`_ below for + details. + +- The DMA buffer FD also supports a few dma-buf-specific ioctls, see + `DMA Buffer ioctls`_ below for details. + +Basic Operation and Device DMA Access +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: dma buf device access + +CPU Access to DMA Buffer Objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: cpu access + +Implicit Fence Poll Support +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: implicit fence polling + +DMA-BUF statistics +~~~~~~~~~~~~~~~~~~ +.. kernel-doc:: drivers/dma-buf/dma-buf-sysfs-stats.c + :doc: overview + +DMA Buffer ioctls +~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/uapi/linux/dma-buf.h + +DMA-BUF locking convention +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: locking convention + +Kernel Functions and Structures Reference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :export: + +.. kernel-doc:: include/linux/dma-buf.h + :internal: + +Reservation Objects +------------------- + +.. kernel-doc:: drivers/dma-buf/dma-resv.c + :doc: Reservation Object Overview + +.. kernel-doc:: drivers/dma-buf/dma-resv.c + :export: + +.. kernel-doc:: include/linux/dma-resv.h + :internal: + +DMA Fences +---------- + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: DMA fences overview + +DMA Fence Cross-Driver Contract +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: fence cross-driver contract + +DMA Fence Signalling Annotations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: fence signalling annotation + +DMA Fence Deadline Hints +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: deadline hints + +DMA Fences Functions Reference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :export: + +.. kernel-doc:: include/linux/dma-fence.h + :internal: + +DMA Fence Array +~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence-array.c + :export: + +.. kernel-doc:: include/linux/dma-fence-array.h + :internal: + +DMA Fence Chain +~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence-chain.c + :export: + +.. kernel-doc:: include/linux/dma-fence-chain.h + :internal: + +DMA Fence unwrap +~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/linux/dma-fence-unwrap.h + :internal: + +DMA Fence Sync File +~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/sync_file.c + :export: + +.. kernel-doc:: include/linux/sync_file.h + :internal: + +DMA Fence Sync File uABI +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/uapi/linux/sync_file.h + :internal: + +Indefinite DMA Fences +~~~~~~~~~~~~~~~~~~~~~ + +At various times struct dma_fence with an indefinite time until dma_fence_wait() +finishes have been proposed. Examples include: + +* Future fences, used in HWC1 to signal when a buffer isn't used by the display + any longer, and created with the screen update that makes the buffer visible. + The time this fence completes is entirely under userspace's control. + +* Proxy fences, proposed to handle &drm_syncobj for which the fence has not yet + been set. Used to asynchronously delay command submission. + +* Userspace fences or gpu futexes, fine-grained locking within a command buffer + that userspace uses for synchronization across engines or with the CPU, which + are then imported as a DMA fence for integration into existing winsys + protocols. + +* Long-running compute command buffers, while still using traditional end of + batch DMA fences for memory management instead of context preemption DMA + fences which get reattached when the compute job is rescheduled. + +Common to all these schemes is that userspace controls the dependencies of these +fences and controls when they fire. Mixing indefinite fences with normal +in-kernel DMA fences does not work, even when a fallback timeout is included to +protect against malicious userspace: + +* Only the kernel knows about all DMA fence dependencies, userspace is not aware + of dependencies injected due to memory management or scheduler decisions. + +* Only userspace knows about all dependencies in indefinite fences and when + exactly they will complete, the kernel has no visibility. + +Furthermore the kernel has to be able to hold up userspace command submission +for memory management needs, which means we must support indefinite fences being +dependent upon DMA fences. If the kernel also support indefinite fences in the +kernel like a DMA fence, like any of the above proposal would, there is the +potential for deadlocks. + +.. kernel-render:: DOT + :alt: Indefinite Fencing Dependency Cycle + :caption: Indefinite Fencing Dependency Cycle + + digraph "Fencing Cycle" { + node [shape=box bgcolor=grey style=filled] + kernel [label="Kernel DMA Fences"] + userspace [label="userspace controlled fences"] + kernel -> userspace [label="memory management"] + userspace -> kernel [label="Future fence, fence proxy, ..."] + + { rank=same; kernel userspace } + } + +This means that the kernel might accidentally create deadlocks +through memory management dependencies which userspace is unaware of, which +randomly hangs workloads until the timeout kicks in. Workloads, which from +userspace's perspective, do not contain a deadlock. In such a mixed fencing +architecture there is no single entity with knowledge of all dependencies. +Therefore preventing such deadlocks from within the kernel is not possible. + +The only solution to avoid dependencies loops is by not allowing indefinite +fences in the kernel. This means: + +* No future fences, proxy fences or userspace fences imported as DMA fences, + with or without a timeout. + +* No DMA fences that signal end of batchbuffer for command submission where + userspace is allowed to use userspace fencing or long running compute + workloads. This also means no implicit fencing for shared buffers in these + cases. + +Recoverable Hardware Page Faults Implications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Modern hardware supports recoverable page faults, which has a lot of +implications for DMA fences. + +First, a pending page fault obviously holds up the work that's running on the +accelerator and a memory allocation is usually required to resolve the fault. +But memory allocations are not allowed to gate completion of DMA fences, which +means any workload using recoverable page faults cannot use DMA fences for +synchronization. Synchronization fences controlled by userspace must be used +instead. + +On GPUs this poses a problem, because current desktop compositor protocols on +Linux rely on DMA fences, which means without an entirely new userspace stack +built on top of userspace fences, they cannot benefit from recoverable page +faults. Specifically this means implicit synchronization will not be possible. +The exception is when page faults are only used as migration hints and never to +on-demand fill a memory request. For now this means recoverable page +faults on GPUs are limited to pure compute workloads. + +Furthermore GPUs usually have shared resources between the 3D rendering and +compute side, like compute units or command submission engines. If both a 3D +job with a DMA fence and a compute workload using recoverable page faults are +pending they could deadlock: + +- The 3D workload might need to wait for the compute job to finish and release + hardware resources first. + +- The compute workload might be stuck in a page fault, because the memory + allocation is waiting for the DMA fence of the 3D workload to complete. + +There are a few options to prevent this problem, one of which drivers need to +ensure: + +- Compute workloads can always be preempted, even when a page fault is pending + and not yet repaired. Not all hardware supports this. + +- DMA fence workloads and workloads which need page fault handling have + independent hardware resources to guarantee forward progress. This could be + achieved through e.g. through dedicated engines and minimal compute unit + reservations for DMA fence workloads. + +- The reservation approach could be further refined by only reserving the + hardware resources for DMA fence workloads when they are in-flight. This must + cover the time from when the DMA fence is visible to other threads up to + moment when fence is completed through dma_fence_signal(). + +- As a last resort, if the hardware provides no useful reservation mechanics, + all workloads must be flushed from the GPU when switching between jobs + requiring DMA fences or jobs requiring page fault handling: This means all DMA + fences must complete before a compute job with page fault handling can be + inserted into the scheduler queue. And vice versa, before a DMA fence can be + made visible anywhere in the system, all compute workloads must be preempted + to guarantee all pending GPU page faults are flushed. + +- Only a fairly theoretical option would be to untangle these dependencies when + allocating memory to repair hardware page faults, either through separate + memory blocks or runtime tracking of the full dependency graph of all DMA + fences. This results very wide impact on the kernel, since resolving the page + on the CPU side can itself involve a page fault. It is much more feasible and + robust to limit the impact of handling hardware page faults to the specific + driver. + +Note that workloads that run on independent hardware like copy engines or other +GPUs do not have any impact. This allows us to keep using DMA fences internally +in the kernel even for resolving hardware page faults, e.g. by using copy +engines to clear or copy memory needed to resolve the page fault. + +In some ways this page fault problem is a special case of the `Infinite DMA +Fences` discussions: Infinite fences from compute workloads are allowed to +depend on DMA fences, but not the other way around. And not even the page fault +problem is new, because some other CPU thread in userspace might +hit a page fault which holds up a userspace fence - supporting page faults on +GPUs doesn't anything fundamentally new. diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst new file mode 100644 index 0000000000..ecf139f73d --- /dev/null +++ b/Documentation/driver-api/dmaengine/client.rst @@ -0,0 +1,379 @@ +==================== +DMA Engine API Guide +==================== + +Vinod Koul <vinod dot koul at intel.com> + +.. note:: For DMA Engine usage in async_tx please see: + ``Documentation/crypto/async-tx-api.rst`` + + +Below is a guide to device driver writers on how to use the Slave-DMA API of the +DMA Engine. This is applicable only for slave DMA usage only. + +DMA usage +========= + +The slave DMA usage consists of following steps: + +- Allocate a DMA slave channel + +- Set slave and controller specific parameters + +- Get a descriptor for transaction + +- Submit the transaction + +- Issue pending requests and wait for callback notification + +The details of these operations are: + +1. Allocate a DMA slave channel + + Channel allocation is slightly different in the slave DMA context, + client drivers typically need a channel from a particular DMA + controller only and even in some cases a specific channel is desired. + To request a channel dma_request_chan() API is used. + + Interface: + + .. code-block:: c + + struct dma_chan *dma_request_chan(struct device *dev, const char *name); + + Which will find and return the ``name`` DMA channel associated with the 'dev' + device. The association is done via DT, ACPI or board file based + dma_slave_map matching table. + + A channel allocated via this interface is exclusive to the caller, + until dma_release_channel() is called. + +2. Set slave and controller specific parameters + + Next step is always to pass some specific information to the DMA + driver. Most of the generic information which a slave DMA can use + is in struct dma_slave_config. This allows the clients to specify + DMA direction, DMA addresses, bus widths, DMA burst lengths etc + for the peripheral. + + If some DMA controllers have more parameters to be sent then they + should try to embed struct dma_slave_config in their controller + specific structure. That gives flexibility to client to pass more + parameters, if required. + + Interface: + + .. code-block:: c + + int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) + + Please see the dma_slave_config structure definition in dmaengine.h + for a detailed explanation of the struct members. Please note + that the 'direction' member will be going away as it duplicates the + direction given in the prepare call. + +3. Get a descriptor for transaction + + For slave usage the various modes of slave transfers supported by the + DMA-engine are: + + - slave_sg: DMA a list of scatter gather buffers from/to a peripheral + + - dma_cyclic: Perform a cyclic DMA operation from/to a peripheral till the + operation is explicitly stopped. + + - interleaved_dma: This is common to Slave as well as M2M clients. For slave + address of devices' fifo could be already known to the driver. + Various types of operations could be expressed by setting + appropriate values to the 'dma_interleaved_template' members. Cyclic + interleaved DMA transfers are also possible if supported by the channel by + setting the DMA_PREP_REPEAT transfer flag. + + A non-NULL return of this transfer API represents a "descriptor" for + the given transaction. + + Interface: + + .. code-block:: c + + struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags); + + struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_data_direction direction); + + struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( + struct dma_chan *chan, struct dma_interleaved_template *xt, + unsigned long flags); + + The peripheral driver is expected to have mapped the scatterlist for + the DMA operation prior to calling dmaengine_prep_slave_sg(), and must + keep the scatterlist mapped until the DMA operation has completed. + The scatterlist must be mapped using the DMA struct device. + If a mapping needs to be synchronized later, dma_sync_*_for_*() must be + called using the DMA struct device, too. + So, normal setup should look like this: + + .. code-block:: c + + struct device *dma_dev = dmaengine_get_dma_device(chan); + + nr_sg = dma_map_sg(dma_dev, sgl, sg_len); + if (nr_sg == 0) + /* error */ + + desc = dmaengine_prep_slave_sg(chan, sgl, nr_sg, direction, flags); + + Once a descriptor has been obtained, the callback information can be + added and the descriptor must then be submitted. Some DMA engine + drivers may hold a spinlock between a successful preparation and + submission so it is important that these two operations are closely + paired. + + .. note:: + + Although the async_tx API specifies that completion callback + routines cannot submit any new operations, this is not the + case for slave/cyclic DMA. + + For slave DMA, the subsequent transaction may not be available + for submission prior to callback function being invoked, so + slave DMA callbacks are permitted to prepare and submit a new + transaction. + + For cyclic DMA, a callback function may wish to terminate the + DMA via dmaengine_terminate_async(). + + Therefore, it is important that DMA engine drivers drop any + locks before calling the callback function which may cause a + deadlock. + + Note that callbacks will always be invoked from the DMA + engines tasklet, never from interrupt context. + + **Optional: per descriptor metadata** + + DMAengine provides two ways for metadata support. + + DESC_METADATA_CLIENT + + The metadata buffer is allocated/provided by the client driver and it is + attached to the descriptor. + + .. code-block:: c + + int dmaengine_desc_attach_metadata(struct dma_async_tx_descriptor *desc, + void *data, size_t len); + + DESC_METADATA_ENGINE + + The metadata buffer is allocated/managed by the DMA driver. The client + driver can ask for the pointer, maximum size and the currently used size of + the metadata and can directly update or read it. + + Because the DMA driver manages the memory area containing the metadata, + clients must make sure that they do not try to access or get the pointer + after their transfer completion callback has run for the descriptor. + If no completion callback has been defined for the transfer, then the + metadata must not be accessed after issue_pending. + In other words: if the aim is to read back metadata after the transfer is + completed, then the client must use completion callback. + + .. code-block:: c + + void *dmaengine_desc_get_metadata_ptr(struct dma_async_tx_descriptor *desc, + size_t *payload_len, size_t *max_len); + + int dmaengine_desc_set_metadata_len(struct dma_async_tx_descriptor *desc, + size_t payload_len); + + Client drivers can query if a given mode is supported with: + + .. code-block:: c + + bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan, + enum dma_desc_metadata_mode mode); + + Depending on the used mode client drivers must follow different flow. + + DESC_METADATA_CLIENT + + - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + construct the metadata in the client's buffer + 2. use dmaengine_desc_attach_metadata() to attach the buffer to the + descriptor + 3. submit the transfer + + - DMA_DEV_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + 2. use dmaengine_desc_attach_metadata() to attach the buffer to the + descriptor + 3. submit the transfer + 4. when the transfer is completed, the metadata should be available in the + attached buffer + + DESC_METADATA_ENGINE + + - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + 2. use dmaengine_desc_get_metadata_ptr() to get the pointer to the + engine's metadata area + 3. update the metadata at the pointer + 4. use dmaengine_desc_set_metadata_len() to tell the DMA engine the + amount of data the client has placed into the metadata buffer + 5. submit the transfer + + - DMA_DEV_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + 2. submit the transfer + 3. on transfer completion, use dmaengine_desc_get_metadata_ptr() to get + the pointer to the engine's metadata area + 4. read out the metadata from the pointer + + .. note:: + + When DESC_METADATA_ENGINE mode is used the metadata area for the descriptor + is no longer valid after the transfer has been completed (valid up to the + point when the completion callback returns if used). + + Mixed use of DESC_METADATA_CLIENT / DESC_METADATA_ENGINE is not allowed, + client drivers must use either of the modes per descriptor. + +4. Submit the transaction + + Once the descriptor has been prepared and the callback information + added, it must be placed on the DMA engine drivers pending queue. + + Interface: + + .. code-block:: c + + dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc) + + This returns a cookie can be used to check the progress of DMA engine + activity via other DMA engine calls not covered in this document. + + dmaengine_submit() will not start the DMA operation, it merely adds + it to the pending queue. For this, see step 5, dma_async_issue_pending. + + .. note:: + + After calling ``dmaengine_submit()`` the submitted transfer descriptor + (``struct dma_async_tx_descriptor``) belongs to the DMA engine. + Consequently, the client must consider invalid the pointer to that + descriptor. + +5. Issue pending DMA requests and wait for callback notification + + The transactions in the pending queue can be activated by calling the + issue_pending API. If channel is idle then the first transaction in + queue is started and subsequent ones queued up. + + On completion of each DMA operation, the next in queue is started and + a tasklet triggered. The tasklet will then call the client driver + completion callback routine for notification, if set. + + Interface: + + .. code-block:: c + + void dma_async_issue_pending(struct dma_chan *chan); + +Further APIs +------------ + +1. Terminate APIs + + .. code-block:: c + + int dmaengine_terminate_sync(struct dma_chan *chan) + int dmaengine_terminate_async(struct dma_chan *chan) + int dmaengine_terminate_all(struct dma_chan *chan) /* DEPRECATED */ + + This causes all activity for the DMA channel to be stopped, and may + discard data in the DMA FIFO which hasn't been fully transferred. + No callback functions will be called for any incomplete transfers. + + Two variants of this function are available. + + dmaengine_terminate_async() might not wait until the DMA has been fully + stopped or until any running complete callbacks have finished. But it is + possible to call dmaengine_terminate_async() from atomic context or from + within a complete callback. dmaengine_synchronize() must be called before it + is safe to free the memory accessed by the DMA transfer or free resources + accessed from within the complete callback. + + dmaengine_terminate_sync() will wait for the transfer and any running + complete callbacks to finish before it returns. But the function must not be + called from atomic context or from within a complete callback. + + dmaengine_terminate_all() is deprecated and should not be used in new code. + +2. Pause API + + .. code-block:: c + + int dmaengine_pause(struct dma_chan *chan) + + This pauses activity on the DMA channel without data loss. + +3. Resume API + + .. code-block:: c + + int dmaengine_resume(struct dma_chan *chan) + + Resume a previously paused DMA channel. It is invalid to resume a + channel which is not currently paused. + +4. Check Txn complete + + .. code-block:: c + + enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) + + This can be used to check the status of the channel. Please see + the documentation in include/linux/dmaengine.h for a more complete + description of this API. + + This can be used in conjunction with dma_async_is_complete() and + the cookie returned from dmaengine_submit() to check for + completion of a specific DMA transaction. + + .. note:: + + Not all DMA engine drivers can return reliable information for + a running DMA channel. It is recommended that DMA engine users + pause or stop (via dmaengine_terminate_all()) the channel before + using this API. + +5. Synchronize termination API + + .. code-block:: c + + void dmaengine_synchronize(struct dma_chan *chan) + + Synchronize the termination of the DMA channel to the current context. + + This function should be used after dmaengine_terminate_async() to synchronize + the termination of the DMA channel to the current context. The function will + wait for the transfer and any running complete callbacks to finish before it + returns. + + If dmaengine_terminate_async() is used to stop the DMA channel this function + must be called before it is safe to free memory accessed by previously + submitted descriptors or to free any resources accessed within the complete + callback of previously submitted descriptors. + + The behavior of this function is undefined if dma_async_issue_pending() has + been called between dmaengine_terminate_async() and this function. diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst new file mode 100644 index 0000000000..e2a63cefd7 --- /dev/null +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -0,0 +1,232 @@ +============== +DMA Test Guide +============== + +Andy Shevchenko <andriy.shevchenko@linux.intel.com> + +This small document introduces how to test DMA drivers using dmatest module. + +The dmatest module tests DMA memcpy, memset, XOR and RAID6 P+Q operations using +various lengths and various offsets into the source and destination buffers. It +will initialize both buffers with a repeatable pattern and verify that the DMA +engine copies the requested region and nothing more. It will also verify that +the bytes aren't swapped around, and that the source buffer isn't modified. + +The dmatest module can be configured to test a specific channel. It can also +test multiple channels at the same time, and it can start multiple threads +competing for the same channel. + +.. note:: + The test suite works only on the channels that have at least one + capability of the following: DMA_MEMCPY (memory-to-memory), DMA_MEMSET + (const-to-memory or memory-to-memory, when emulated), DMA_XOR, DMA_PQ. + +.. note:: + In case of any related questions use the official mailing list + dmaengine@vger.kernel.org. + +Part 1 - How to build the test module +===================================== + +The menuconfig contains an option that could be found by following path: + + Device Drivers -> DMA Engine support -> DMA Test client + +In the configuration file the option called CONFIG_DMATEST. The dmatest could +be built as module or inside kernel. Let's consider those cases. + +Part 2 - When dmatest is built as a module +========================================== + +Example of usage:: + + % modprobe dmatest timeout=2000 iterations=1 channel=dma0chan0 run=1 + +...or:: + + % modprobe dmatest + % echo 2000 > /sys/module/dmatest/parameters/timeout + % echo 1 > /sys/module/dmatest/parameters/iterations + % echo dma0chan0 > /sys/module/dmatest/parameters/channel + % echo 1 > /sys/module/dmatest/parameters/run + +...or on the kernel command line:: + + dmatest.timeout=2000 dmatest.iterations=1 dmatest.channel=dma0chan0 dmatest.run=1 + +Example of multi-channel test usage (new in the 5.0 kernel):: + + % modprobe dmatest + % echo 2000 > /sys/module/dmatest/parameters/timeout + % echo 1 > /sys/module/dmatest/parameters/iterations + % echo dma0chan0 > /sys/module/dmatest/parameters/channel + % echo dma0chan1 > /sys/module/dmatest/parameters/channel + % echo dma0chan2 > /sys/module/dmatest/parameters/channel + % echo 1 > /sys/module/dmatest/parameters/run + +.. note:: + For all tests, starting in the 5.0 kernel, either single- or multi-channel, + the channel parameter(s) must be set after all other parameters. It is at + that time that the existing parameter values are acquired for use by the + thread(s). All other parameters are shared. Therefore, if changes are made + to any of the other parameters, and an additional channel specified, the + (shared) parameters used for all threads will use the new values. + After the channels are specified, each thread is set as pending. All threads + begin execution when the run parameter is set to 1. + +.. hint:: + A list of available channels can be found by running the following command:: + + % ls -1 /sys/class/dma/ + +Once started a message like " dmatest: Added 1 threads using dma0chan0" is +emitted. A thread for that specific channel is created and is now pending, the +pending thread is started once run is to 1. + +Note that running a new test will not stop any in progress test. + +The following command returns the state of the test. :: + + % cat /sys/module/dmatest/parameters/run + +To wait for test completion userspace can poll 'run' until it is false, or use +the wait parameter. Specifying 'wait=1' when loading the module causes module +initialization to pause until a test run has completed, while reading +/sys/module/dmatest/parameters/wait waits for any running test to complete +before returning. For example, the following scripts wait for 42 tests +to complete before exiting. Note that if 'iterations' is set to 'infinite' then +waiting is disabled. + +Example:: + + % modprobe dmatest run=1 iterations=42 wait=1 + % modprobe -r dmatest + +...or:: + + % modprobe dmatest run=1 iterations=42 + % cat /sys/module/dmatest/parameters/wait + % modprobe -r dmatest + +Part 3 - When built-in in the kernel +==================================== + +The module parameters that is supplied to the kernel command line will be used +for the first performed test. After user gets a control, the test could be +re-run with the same or different parameters. For the details see the above +section `Part 2 - When dmatest is built as a module`_. + +In both cases the module parameters are used as the actual values for the test +case. You always could check them at run-time by running :: + + % grep -H . /sys/module/dmatest/parameters/* + +Part 4 - Gathering the test results +=================================== + +Test results are printed to the kernel log buffer with the format:: + + "dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)" + +Example of output:: + + % dmesg | tail -n 1 + dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0) + +The message format is unified across the different types of errors. A +number in the parentheses represents additional information, e.g. error +code, error counter, or status. A test thread also emits a summary line at +completion listing the number of tests executed, number that failed, and a +result code. + +Example:: + + % dmesg | tail -n 1 + dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0) + +The details of a data miscompare error are also emitted, but do not follow the +above format. + +Part 5 - Handling channel allocation +==================================== + +Allocating Channels +------------------- + +Channels do not need to be configured prior to starting a test run. Attempting +to run the test without configuring the channels will result in testing any +channels that are available. + +Example:: + + % echo 1 > /sys/module/dmatest/parameters/run + dmatest: No channels configured, continue with any + +Channels are registered using the "channel" parameter. Channels can be requested by their +name, once requested, the channel is registered and a pending thread is added to the test list. + +Example:: + + % echo dma0chan2 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan2 + +More channels can be added by repeating the example above. +Reading back the channel parameter will return the name of last channel that was added successfully. + +Example:: + + % echo dma0chan1 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan1 + % echo dma0chan2 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan2 + % cat /sys/module/dmatest/parameters/channel + dma0chan2 + +Another method of requesting channels is to request a channel with an empty string, Doing so +will request all channels available to be tested: + +Example:: + + % echo "" > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan0 + dmatest: Added 1 threads using dma0chan3 + dmatest: Added 1 threads using dma0chan4 + dmatest: Added 1 threads using dma0chan5 + dmatest: Added 1 threads using dma0chan6 + dmatest: Added 1 threads using dma0chan7 + dmatest: Added 1 threads using dma0chan8 + +At any point during the test configuration, reading the "test_list" parameter will +print the list of currently pending tests. + +Example:: + + % cat /sys/module/dmatest/parameters/test_list + dmatest: 1 threads using dma0chan0 + dmatest: 1 threads using dma0chan3 + dmatest: 1 threads using dma0chan4 + dmatest: 1 threads using dma0chan5 + dmatest: 1 threads using dma0chan6 + dmatest: 1 threads using dma0chan7 + dmatest: 1 threads using dma0chan8 + +Note: Channels will have to be configured for each test run as channel configurations do not +carry across to the next test run. + +Releasing Channels +------------------- + +Channels can be freed by setting run to 0. + +Example:: + + % echo dma0chan1 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan1 + % cat /sys/class/dma/dma0chan1/in_use + 1 + % echo 0 > /sys/module/dmatest/parameters/run + % cat /sys/class/dma/dma0chan1/in_use + 0 + +Channels allocated by previous test runs are automatically freed when a new +channel is requested after completing a successful test run. diff --git a/Documentation/driver-api/dmaengine/index.rst b/Documentation/driver-api/dmaengine/index.rst new file mode 100644 index 0000000000..bdc45d8b4c --- /dev/null +++ b/Documentation/driver-api/dmaengine/index.rst @@ -0,0 +1,55 @@ +======================= +DMAEngine documentation +======================= + +DMAEngine documentation provides documents for various aspects of DMAEngine +framework. + +DMAEngine development documentation +----------------------------------- + +This book helps with DMAengine internal APIs and guide for DMAEngine device +driver writers. + +.. toctree:: + :maxdepth: 1 + + provider + +DMAEngine client documentation +------------------------------ + +This book is a guide to device driver writers on how to use the Slave-DMA +API of the DMAEngine. This is applicable only for slave DMA usage only. + +.. toctree:: + :maxdepth: 1 + + client + +DMA Test documentation +---------------------- + +This book introduces how to test DMA drivers using dmatest module. + +.. toctree:: + :maxdepth: 1 + + dmatest + +PXA DMA documentation +---------------------- + +This book adds some notes about PXA DMA + +.. toctree:: + :maxdepth: 1 + + pxa_dma + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst new file mode 100644 index 0000000000..ceac2a300e --- /dev/null +++ b/Documentation/driver-api/dmaengine/provider.rst @@ -0,0 +1,647 @@ +================================== +DMAengine controller documentation +================================== + +Hardware Introduction +===================== + +Most of the Slave DMA controllers have the same general principles of +operations. + +They have a given number of channels to use for the DMA transfers, and +a given number of requests lines. + +Requests and channels are pretty much orthogonal. Channels can be used +to serve several to any requests. To simplify, channels are the +entities that will be doing the copy, and requests what endpoints are +involved. + +The request lines actually correspond to physical lines going from the +DMA-eligible devices to the controller itself. Whenever the device +will want to start a transfer, it will assert a DMA request (DRQ) by +asserting that request line. + +A very simple DMA controller would only take into account a single +parameter: the transfer size. At each clock cycle, it would transfer a +byte of data from one buffer to another, until the transfer size has +been reached. + +That wouldn't work well in the real world, since slave devices might +require a specific number of bits to be transferred in a single +cycle. For example, we may want to transfer as much data as the +physical bus allows to maximize performances when doing a simple +memory copy operation, but our audio device could have a narrower FIFO +that requires data to be written exactly 16 or 24 bits at a time. This +is why most if not all of the DMA controllers can adjust this, using a +parameter called the transfer width. + +Moreover, some DMA controllers, whenever the RAM is used as a source +or destination, can group the reads or writes in memory into a buffer, +so instead of having a lot of small memory accesses, which is not +really efficient, you'll get several bigger transfers. This is done +using a parameter called the burst size, that defines how many single +reads/writes it's allowed to do without the controller splitting the +transfer into smaller sub-transfers. + +Our theoretical DMA controller would then only be able to do transfers +that involve a single contiguous block of data. However, some of the +transfers we usually have are not, and want to copy data from +non-contiguous buffers to a contiguous buffer, which is called +scatter-gather. + +DMAEngine, at least for mem2dev transfers, require support for +scatter-gather. So we're left with two cases here: either we have a +quite simple DMA controller that doesn't support it, and we'll have to +implement it in software, or we have a more advanced DMA controller, +that implements in hardware scatter-gather. + +The latter are usually programmed using a collection of chunks to +transfer, and whenever the transfer is started, the controller will go +over that collection, doing whatever we programmed there. + +This collection is usually either a table or a linked list. You will +then push either the address of the table and its number of elements, +or the first item of the list to one channel of the DMA controller, +and whenever a DRQ will be asserted, it will go through the collection +to know where to fetch the data from. + +Either way, the format of this collection is completely dependent on +your hardware. Each DMA controller will require a different structure, +but all of them will require, for every chunk, at least the source and +destination addresses, whether it should increment these addresses or +not and the three parameters we saw earlier: the burst size, the +transfer width and the transfer size. + +The one last thing is that usually, slave devices won't issue DRQ by +default, and you have to enable this in your slave device driver first +whenever you're willing to use DMA. + +These were just the general memory-to-memory (also called mem2mem) or +memory-to-device (mem2dev) kind of transfers. Most devices often +support other kind of transfers or memory operations that dmaengine +support and will be detailed later in this document. + +DMA Support in Linux +==================== + +Historically, DMA controller drivers have been implemented using the +async TX API, to offload operations such as memory copy, XOR, +cryptography, etc., basically any memory to memory operation. + +Over time, the need for memory to device transfers arose, and +dmaengine was extended. Nowadays, the async TX API is written as a +layer on top of dmaengine, and acts as a client. Still, dmaengine +accommodates that API in some cases, and made some design choices to +ensure that it stayed compatible. + +For more information on the Async TX API, please look the relevant +documentation file in Documentation/crypto/async-tx-api.rst. + +DMAEngine APIs +============== + +``struct dma_device`` Initialization +------------------------------------ + +Just like any other kernel framework, the whole DMAEngine registration +relies on the driver filling a structure and registering against the +framework. In our case, that structure is dma_device. + +The first thing you need to do in your driver is to allocate this +structure. Any of the usual memory allocators will do, but you'll also +need to initialize a few fields in there: + +- ``channels``: should be initialized as a list using the + INIT_LIST_HEAD macro for example + +- ``src_addr_widths``: + should contain a bitmask of the supported source transfer width + +- ``dst_addr_widths``: + should contain a bitmask of the supported destination transfer width + +- ``directions``: + should contain a bitmask of the supported slave directions + (i.e. excluding mem2mem transfers) + +- ``residue_granularity``: + granularity of the transfer residue reported to dma_set_residue. + This can be either: + + - Descriptor: + your device doesn't support any kind of residue + reporting. The framework will only know that a particular + transaction descriptor is done. + + - Segment: + your device is able to report which chunks have been transferred + + - Burst: + your device is able to report which burst have been transferred + +- ``dev``: should hold the pointer to the ``struct device`` associated + to your current driver instance. + +Supported transaction types +--------------------------- + +The next thing you need is to set which transaction types your device +(and driver) supports. + +Our ``dma_device structure`` has a field called cap_mask that holds the +various types of transaction supported, and you need to modify this +mask using the dma_cap_set function, with various flags depending on +transaction types you support as an argument. + +All those capabilities are defined in the ``dma_transaction_type enum``, +in ``include/linux/dmaengine.h`` + +Currently, the types available are: + +- DMA_MEMCPY + + - The device is able to do memory to memory copies + + - No matter what the overall size of the combined chunks for source and + destination is, only as many bytes as the smallest of the two will be + transmitted. That means the number and size of the scatter-gather buffers in + both lists need not be the same, and that the operation functionally is + equivalent to a ``strncpy`` where the ``count`` argument equals the smallest + total size of the two scatter-gather list buffers. + + - It's usually used for copying pixel data between host memory and + memory-mapped GPU device memory, such as found on modern PCI video graphics + cards. The most immediate example is the OpenGL API function + ``glReadPielx()``, which might require a verbatim copy of a huge framebuffer + from local device memory onto host memory. + +- DMA_XOR + + - The device is able to perform XOR operations on memory areas + + - Used to accelerate XOR intensive tasks, such as RAID5 + +- DMA_XOR_VAL + + - The device is able to perform parity check using the XOR + algorithm against a memory buffer. + +- DMA_PQ + + - The device is able to perform RAID6 P+Q computations, P being a + simple XOR, and Q being a Reed-Solomon algorithm. + +- DMA_PQ_VAL + + - The device is able to perform parity check using RAID6 P+Q + algorithm against a memory buffer. + +- DMA_MEMSET + + - The device is able to fill memory with the provided pattern + + - The pattern is treated as a single byte signed value. + +- DMA_INTERRUPT + + - The device is able to trigger a dummy transfer that will + generate periodic interrupts + + - Used by the client drivers to register a callback that will be + called on a regular basis through the DMA controller interrupt + +- DMA_PRIVATE + + - The devices only supports slave transfers, and as such isn't + available for async transfers. + +- DMA_ASYNC_TX + + - Must not be set by the device, and will be set by the framework + if needed + + - TODO: What is it about? + +- DMA_SLAVE + + - The device can handle device to memory transfers, including + scatter-gather transfers. + + - While in the mem2mem case we were having two distinct types to + deal with a single chunk to copy or a collection of them, here, + we just have a single transaction type that is supposed to + handle both. + + - If you want to transfer a single contiguous memory buffer, + simply build a scatter list with only one item. + +- DMA_CYCLIC + + - The device can handle cyclic transfers. + + - A cyclic transfer is a transfer where the chunk collection will + loop over itself, with the last item pointing to the first. + + - It's usually used for audio transfers, where you want to operate + on a single ring buffer that you will fill with your audio data. + +- DMA_INTERLEAVE + + - The device supports interleaved transfer. + + - These transfers can transfer data from a non-contiguous buffer + to a non-contiguous buffer, opposed to DMA_SLAVE that can + transfer data from a non-contiguous data set to a continuous + destination buffer. + + - It's usually used for 2d content transfers, in which case you + want to transfer a portion of uncompressed data directly to the + display to print it + +- DMA_COMPLETION_NO_ORDER + + - The device does not support in order completion. + + - The driver should return DMA_OUT_OF_ORDER for device_tx_status if + the device is setting this capability. + + - All cookie tracking and checking API should be treated as invalid if + the device exports this capability. + + - At this point, this is incompatible with polling option for dmatest. + + - If this cap is set, the user is recommended to provide an unique + identifier for each descriptor sent to the DMA device in order to + properly track the completion. + +- DMA_REPEAT + + - The device supports repeated transfers. A repeated transfer, indicated by + the DMA_PREP_REPEAT transfer flag, is similar to a cyclic transfer in that + it gets automatically repeated when it ends, but can additionally be + replaced by the client. + + - This feature is limited to interleaved transfers, this flag should thus not + be set if the DMA_INTERLEAVE flag isn't set. This limitation is based on + the current needs of DMA clients, support for additional transfer types + should be added in the future if and when the need arises. + +- DMA_LOAD_EOT + + - The device supports replacing repeated transfers at end of transfer (EOT) + by queuing a new transfer with the DMA_PREP_LOAD_EOT flag set. + + - Support for replacing a currently running transfer at another point (such + as end of burst instead of end of transfer) will be added in the future + based on DMA clients needs, if and when the need arises. + +These various types will also affect how the source and destination +addresses change over time. + +Addresses pointing to RAM are typically incremented (or decremented) +after each transfer. In case of a ring buffer, they may loop +(DMA_CYCLIC). Addresses pointing to a device's register (e.g. a FIFO) +are typically fixed. + +Per descriptor metadata support +------------------------------- +Some data movement architecture (DMA controller and peripherals) uses metadata +associated with a transaction. The DMA controller role is to transfer the +payload and the metadata alongside. +The metadata itself is not used by the DMA engine itself, but it contains +parameters, keys, vectors, etc for peripheral or from the peripheral. + +The DMAengine framework provides a generic ways to facilitate the metadata for +descriptors. Depending on the architecture the DMA driver can implement either +or both of the methods and it is up to the client driver to choose which one +to use. + +- DESC_METADATA_CLIENT + + The metadata buffer is allocated/provided by the client driver and it is + attached (via the dmaengine_desc_attach_metadata() helper to the descriptor. + + From the DMA driver the following is expected for this mode: + + - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM + + The data from the provided metadata buffer should be prepared for the DMA + controller to be sent alongside of the payload data. Either by copying to a + hardware descriptor, or highly coupled packet. + + - DMA_DEV_TO_MEM + + On transfer completion the DMA driver must copy the metadata to the client + provided metadata buffer before notifying the client about the completion. + After the transfer completion, DMA drivers must not touch the metadata + buffer provided by the client. + +- DESC_METADATA_ENGINE + + The metadata buffer is allocated/managed by the DMA driver. The client driver + can ask for the pointer, maximum size and the currently used size of the + metadata and can directly update or read it. dmaengine_desc_get_metadata_ptr() + and dmaengine_desc_set_metadata_len() is provided as helper functions. + + From the DMA driver the following is expected for this mode: + + - get_metadata_ptr() + + Should return a pointer for the metadata buffer, the maximum size of the + metadata buffer and the currently used / valid (if any) bytes in the buffer. + + - set_metadata_len() + + It is called by the clients after it have placed the metadata to the buffer + to let the DMA driver know the number of valid bytes provided. + + Note: since the client will ask for the metadata pointer in the completion + callback (in DMA_DEV_TO_MEM case) the DMA driver must ensure that the + descriptor is not freed up prior the callback is called. + +Device operations +----------------- + +Our dma_device structure also requires a few function pointers in +order to implement the actual logic, now that we described what +operations we were able to perform. + +The functions that we have to fill in there, and hence have to +implement, obviously depend on the transaction types you reported as +supported. + +- ``device_alloc_chan_resources`` + +- ``device_free_chan_resources`` + + - These functions will be called whenever a driver will call + ``dma_request_channel`` or ``dma_release_channel`` for the first/last + time on the channel associated to that driver. + + - They are in charge of allocating/freeing all the needed + resources in order for that channel to be useful for your driver. + + - These functions can sleep. + +- ``device_prep_dma_*`` + + - These functions are matching the capabilities you registered + previously. + + - These functions all take the buffer or the scatterlist relevant + for the transfer being prepared, and should create a hardware + descriptor or a list of hardware descriptors from it + + - These functions can be called from an interrupt context + + - Any allocation you might do should be using the GFP_NOWAIT + flag, in order not to potentially sleep, but without depleting + the emergency pool either. + + - Drivers should try to pre-allocate any memory they might need + during the transfer setup at probe time to avoid putting to + much pressure on the nowait allocator. + + - It should return a unique instance of the + ``dma_async_tx_descriptor structure``, that further represents this + particular transfer. + + - This structure can be initialized using the function + ``dma_async_tx_descriptor_init``. + + - You'll also need to set two fields in this structure: + + - flags: + TODO: Can it be modified by the driver itself, or + should it be always the flags passed in the arguments + + - tx_submit: A pointer to a function you have to implement, + that is supposed to push the current transaction descriptor to a + pending queue, waiting for issue_pending to be called. + + - In this structure the function pointer callback_result can be + initialized in order for the submitter to be notified that a + transaction has completed. In the earlier code the function pointer + callback has been used. However it does not provide any status to the + transaction and will be deprecated. The result structure defined as + ``dmaengine_result`` that is passed in to callback_result + has two fields: + + - result: This provides the transfer result defined by + ``dmaengine_tx_result``. Either success or some error condition. + + - residue: Provides the residue bytes of the transfer for those that + support residue. + +- ``device_issue_pending`` + + - Takes the first transaction descriptor in the pending queue, + and starts the transfer. Whenever that transfer is done, it + should move to the next transaction in the list. + + - This function can be called in an interrupt context + +- ``device_tx_status`` + + - Should report the bytes left to go over on the given channel + + - Should only care about the transaction descriptor passed as + argument, not the currently active one on a given channel + + - The tx_state argument might be NULL + + - Should use dma_set_residue to report it + + - In the case of a cyclic transfer, it should only take into + account the total size of the cyclic buffer. + + - Should return DMA_OUT_OF_ORDER if the device does not support in order + completion and is completing the operation out of order. + + - This function can be called in an interrupt context. + +- device_config + + - Reconfigures the channel with the configuration given as argument + + - This command should NOT perform synchronously, or on any + currently queued transfers, but only on subsequent ones + + - In this case, the function will receive a ``dma_slave_config`` + structure pointer as an argument, that will detail which + configuration to use. + + - Even though that structure contains a direction field, this + field is deprecated in favor of the direction argument given to + the prep_* functions + + - This call is mandatory for slave operations only. This should NOT be + set or expected to be set for memcpy operations. + If a driver support both, it should use this call for slave + operations only and not for memcpy ones. + +- device_pause + + - Pauses a transfer on the channel + + - This command should operate synchronously on the channel, + pausing right away the work of the given channel + +- device_resume + + - Resumes a transfer on the channel + + - This command should operate synchronously on the channel, + resuming right away the work of the given channel + +- device_terminate_all + + - Aborts all the pending and ongoing transfers on the channel + + - For aborted transfers the complete callback should not be called + + - Can be called from atomic context or from within a complete + callback of a descriptor. Must not sleep. Drivers must be able + to handle this correctly. + + - Termination may be asynchronous. The driver does not have to + wait until the currently active transfer has completely stopped. + See device_synchronize. + +- device_synchronize + + - Must synchronize the termination of a channel to the current + context. + + - Must make sure that memory for previously submitted + descriptors is no longer accessed by the DMA controller. + + - Must make sure that all complete callbacks for previously + submitted descriptors have finished running and none are + scheduled to run. + + - May sleep. + + +Misc notes +========== + +(stuff that should be documented, but don't really know +where to put them) + +``dma_run_dependencies`` + +- Should be called at the end of an async TX transfer, and can be + ignored in the slave transfers case. + +- Makes sure that dependent operations are run before marking it + as complete. + +dma_cookie_t + +- it's a DMA transaction ID that will increment over time. + +- Not really relevant any more since the introduction of ``virt-dma`` + that abstracts it away. + +DMA_CTRL_ACK + +- If clear, the descriptor cannot be reused by provider until the + client acknowledges receipt, i.e. has a chance to establish any + dependency chains + +- This can be acked by invoking async_tx_ack() + +- If set, does not mean descriptor can be reused + +DMA_CTRL_REUSE + +- If set, the descriptor can be reused after being completed. It should + not be freed by provider if this flag is set. + +- The descriptor should be prepared for reuse by invoking + ``dmaengine_desc_set_reuse()`` which will set DMA_CTRL_REUSE. + +- ``dmaengine_desc_set_reuse()`` will succeed only when channel support + reusable descriptor as exhibited by capabilities + +- As a consequence, if a device driver wants to skip the + ``dma_map_sg()`` and ``dma_unmap_sg()`` in between 2 transfers, + because the DMA'd data wasn't used, it can resubmit the transfer right after + its completion. + +- Descriptor can be freed in few ways + + - Clearing DMA_CTRL_REUSE by invoking + ``dmaengine_desc_clear_reuse()`` and submitting for last txn + + - Explicitly invoking ``dmaengine_desc_free()``, this can succeed only + when DMA_CTRL_REUSE is already set + + - Terminating the channel + +- DMA_PREP_CMD + + - If set, the client driver tells DMA controller that passed data in DMA + API is command data. + + - Interpretation of command data is DMA controller specific. It can be + used for issuing commands to other peripherals/register reads/register + writes for which the descriptor should be in different format from + normal data descriptors. + +- DMA_PREP_REPEAT + + - If set, the transfer will be automatically repeated when it ends until a + new transfer is queued on the same channel with the DMA_PREP_LOAD_EOT flag. + If the next transfer to be queued on the channel does not have the + DMA_PREP_LOAD_EOT flag set, the current transfer will be repeated until the + client terminates all transfers. + + - This flag is only supported if the channel reports the DMA_REPEAT + capability. + +- DMA_PREP_LOAD_EOT + + - If set, the transfer will replace the transfer currently being executed at + the end of the transfer. + + - This is the default behaviour for non-repeated transfers, specifying + DMA_PREP_LOAD_EOT for non-repeated transfers will thus make no difference. + + - When using repeated transfers, DMA clients will usually need to set the + DMA_PREP_LOAD_EOT flag on all transfers, otherwise the channel will keep + repeating the last repeated transfer and ignore the new transfers being + queued. Failure to set DMA_PREP_LOAD_EOT will appear as if the channel was + stuck on the previous transfer. + + - This flag is only supported if the channel reports the DMA_LOAD_EOT + capability. + +General Design Notes +==================== + +Most of the DMAEngine drivers you'll see are based on a similar design +that handles the end of transfer interrupts in the handler, but defer +most work to a tasklet, including the start of a new transfer whenever +the previous transfer ended. + +This is a rather inefficient design though, because the inter-transfer +latency will be not only the interrupt latency, but also the +scheduling latency of the tasklet, which will leave the channel idle +in between, which will slow down the global transfer rate. + +You should avoid this kind of practice, and instead of electing a new +transfer in your tasklet, move that part to the interrupt handler in +order to have a shorter idle window (that we can't really avoid +anyway). + +Glossary +======== + +- Burst: A number of consecutive read or write operations that + can be queued to buffers before being flushed to memory. + +- Chunk: A contiguous collection of bursts + +- Transfer: A collection of chunks (be it contiguous or not) diff --git a/Documentation/driver-api/dmaengine/pxa_dma.rst b/Documentation/driver-api/dmaengine/pxa_dma.rst new file mode 100644 index 0000000000..442ee691a1 --- /dev/null +++ b/Documentation/driver-api/dmaengine/pxa_dma.rst @@ -0,0 +1,190 @@ +============================== +PXA/MMP - DMA Slave controller +============================== + +Constraints +=========== + +a) Transfers hot queuing +A driver submitting a transfer and issuing it should be granted the transfer +is queued even on a running DMA channel. +This implies that the queuing doesn't wait for the previous transfer end, +and that the descriptor chaining is not only done in the irq/tasklet code +triggered by the end of the transfer. +A transfer which is submitted and issued on a phy doesn't wait for a phy to +stop and restart, but is submitted on a "running channel". The other +drivers, especially mmp_pdma waited for the phy to stop before relaunching +a new transfer. + +b) All transfers having asked for confirmation should be signaled +Any issued transfer with DMA_PREP_INTERRUPT should trigger a callback call. +This implies that even if an irq/tasklet is triggered by end of tx1, but +at the time of irq/dma tx2 is already finished, tx1->complete() and +tx2->complete() should be called. + +c) Channel running state +A driver should be able to query if a channel is running or not. For the +multimedia case, such as video capture, if a transfer is submitted and then +a check of the DMA channel reports a "stopped channel", the transfer should +not be issued until the next "start of frame interrupt", hence the need to +know if a channel is in running or stopped state. + +d) Bandwidth guarantee +The PXA architecture has 4 levels of DMAs priorities : high, normal, low. +The high priorities get twice as much bandwidth as the normal, which get twice +as much as the low priorities. +A driver should be able to request a priority, especially the real-time +ones such as pxa_camera with (big) throughputs. + +Design +====== +a) Virtual channels +Same concept as in sa11x0 driver, ie. a driver was assigned a "virtual +channel" linked to the requestor line, and the physical DMA channel is +assigned on the fly when the transfer is issued. + +b) Transfer anatomy for a scatter-gather transfer + +:: + + +------------+-----+---------------+----------------+-----------------+ + | desc-sg[0] | ... | desc-sg[last] | status updater | finisher/linker | + +------------+-----+---------------+----------------+-----------------+ + +This structure is pointed by dma->sg_cpu. +The descriptors are used as follows : + + - desc-sg[i]: i-th descriptor, transferring the i-th sg + element to the video buffer scatter gather + + - status updater + Transfers a single u32 to a well known dma coherent memory to leave + a trace that this transfer is done. The "well known" is unique per + physical channel, meaning that a read of this value will tell which + is the last finished transfer at that point in time. + + - finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN + + - linker: has ddadr= desc-sg[0] of next transfer, dcmd=0 + +c) Transfers hot-chaining +Suppose the running chain is: + +:: + + Buffer 1 Buffer 2 + +---------+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+---+ + | | + +----+ + +After a call to dmaengine_submit(b3), the chain will look like: + +:: + + Buffer 1 Buffer 2 Buffer 3 + +---------+----+---+ +----+----+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+ + | | | | + +----+ +----+ + new_link + +If while new_link was created the DMA channel stopped, it is _not_ +restarted. Hot-chaining doesn't break the assumption that +dma_async_issue_pending() is to be used to ensure the transfer is actually started. + +One exception to this rule : + +- if Buffer1 and Buffer2 had all their addresses 8 bytes aligned + +- and if Buffer3 has at least one address not 4 bytes aligned + +- then hot-chaining cannot happen, as the channel must be stopped, the + "align bit" must be set, and the channel restarted As a consequence, + such a transfer tx_submit() will be queued on the submitted queue, and + this specific case if the DMA is already running in aligned mode. + +d) Transfers completion updater +Each time a transfer is completed on a channel, an interrupt might be +generated or not, up to the client's request. But in each case, the last +descriptor of a transfer, the "status updater", will write the latest +transfer being completed into the physical channel's completion mark. + +This will speed up residue calculation, for large transfers such as video +buffers which hold around 6k descriptors or more. This also allows without +any lock to find out what is the latest completed transfer in a running +DMA chain. + +e) Transfers completion, irq and tasklet +When a transfer flagged as "DMA_PREP_INTERRUPT" is finished, the dma irq +is raised. Upon this interrupt, a tasklet is scheduled for the physical +channel. + +The tasklet is responsible for : + +- reading the physical channel last updater mark + +- calling all the transfer callbacks of finished transfers, based on + that mark, and each transfer flags. + +If a transfer is completed while this handling is done, a dma irq will +be raised, and the tasklet will be scheduled once again, having a new +updater mark. + +f) Residue +Residue granularity will be descriptor based. The issued but not completed +transfers will be scanned for all of their descriptors against the +currently running descriptor. + +g) Most complicated case of driver's tx queues +The most tricky situation is when : + + - there are not "acked" transfers (tx0) + + - a driver submitted an aligned tx1, not chained + + - a driver submitted an aligned tx2 => tx2 is cold chained to tx1 + + - a driver issued tx1+tx2 => channel is running in aligned mode + + - a driver submitted an aligned tx3 => tx3 is hot-chained + + - a driver submitted an unaligned tx4 => tx4 is put in submitted queue, + not chained + + - a driver issued tx4 => tx4 is put in issued queue, not chained + + - a driver submitted an aligned tx5 => tx5 is put in submitted queue, not + chained + + - a driver submitted an aligned tx6 => tx6 is put in submitted queue, + cold chained to tx5 + + This translates into (after tx4 is issued) : + + - issued queue + + :: + + +-----+ +-----+ +-----+ +-----+ + | tx1 | | tx2 | | tx3 | | tx4 | + +---|-+ ^---|-+ ^-----+ +-----+ + | | | | + +---+ +---+ + - submitted queue + +-----+ +-----+ + | tx5 | | tx6 | + +---|-+ ^-----+ + | | + +---+ + +- completed queue : empty + +- allocated queue : tx0 + +It should be noted that after tx3 is completed, the channel is stopped, and +restarted in "unaligned mode" to handle tx4. + +Author: Robert Jarzmik <robert.jarzmik@free.fr> diff --git a/Documentation/driver-api/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst new file mode 100644 index 0000000000..7ea1d7a41e --- /dev/null +++ b/Documentation/driver-api/driver-model/binding.rst @@ -0,0 +1,98 @@ +============== +Driver Binding +============== + +Driver binding is the process of associating a device with a device +driver that can control it. Bus drivers have typically handled this +because there have been bus-specific structures to represent the +devices and the drivers. With generic device and device driver +structures, most of the binding can take place using common code. + + +Bus +~~~ + +The bus type structure contains a list of all devices that are on that bus +type in the system. When device_register is called for a device, it is +inserted into the end of this list. The bus object also contains a +list of all drivers of that bus type. When driver_register is called +for a driver, it is inserted at the end of this list. These are the +two events which trigger driver binding. + + +device_register +~~~~~~~~~~~~~~~ + +When a new device is added, the bus's list of drivers is iterated over +to find one that supports it. In order to determine that, the device +ID of the device must match one of the device IDs that the driver +supports. The format and semantics for comparing IDs is bus-specific. +Instead of trying to derive a complex state machine and matching +algorithm, it is up to the bus driver to provide a callback to compare +a device against the IDs of a driver. The bus returns 1 if a match was +found; 0 otherwise. + +int match(struct device * dev, struct device_driver * drv); + +If a match is found, the device's driver field is set to the driver +and the driver's probe callback is called. This gives the driver a +chance to verify that it really does support the hardware, and that +it's in a working state. + +Device Class +~~~~~~~~~~~~ + +Upon the successful completion of probe, the device is registered with +the class to which it belongs. Device drivers belong to one and only one +class, and that is set in the driver's devclass field. +devclass_add_device is called to enumerate the device within the class +and actually register it with the class, which happens with the +class's register_dev callback. + + +Driver +~~~~~~ + +When a driver is attached to a device, the device is inserted into the +driver's list of devices. + + +sysfs +~~~~~ + +A symlink is created in the bus's 'devices' directory that points to +the device's directory in the physical hierarchy. + +A symlink is created in the driver's 'devices' directory that points +to the device's directory in the physical hierarchy. + +A directory for the device is created in the class's directory. A +symlink is created in that directory that points to the device's +physical location in the sysfs tree. + +A symlink can be created (though this isn't done yet) in the device's +physical directory to either its class directory, or the class's +top-level directory. One can also be created to point to its driver's +directory also. + + +driver_register +~~~~~~~~~~~~~~~ + +The process is almost identical for when a new driver is added. +The bus's list of devices is iterated over to find a match. Devices +that already have a driver are skipped. All the devices are iterated +over, to bind as many devices as possible to the driver. + + +Removal +~~~~~~~ + +When a device is removed, the reference count for it will eventually +go to 0. When it does, the remove callback of the driver is called. It +is removed from the driver's list of devices and the reference count +of the driver is decremented. All symlinks between the two are removed. + +When a driver is removed, the list of devices that it supports is +iterated over, and the driver's remove callback is called for each +one. The device is removed from that list and the symlinks removed. diff --git a/Documentation/driver-api/driver-model/bus.rst b/Documentation/driver-api/driver-model/bus.rst new file mode 100644 index 0000000000..9709ab62a4 --- /dev/null +++ b/Documentation/driver-api/driver-model/bus.rst @@ -0,0 +1,146 @@ +========= +Bus Types +========= + +Definition +~~~~~~~~~~ +See the kerneldoc for the struct bus_type. + +int bus_register(struct bus_type * bus); + + +Declaration +~~~~~~~~~~~ + +Each bus type in the kernel (PCI, USB, etc) should declare one static +object of this type. They must initialize the name field, and may +optionally initialize the match callback:: + + struct bus_type pci_bus_type = { + .name = "pci", + .match = pci_bus_match, + }; + +The structure should be exported to drivers in a header file: + +extern struct bus_type pci_bus_type; + + +Registration +~~~~~~~~~~~~ + +When a bus driver is initialized, it calls bus_register. This +initializes the rest of the fields in the bus object and inserts it +into a global list of bus types. Once the bus object is registered, +the fields in it are usable by the bus driver. + + +Callbacks +~~~~~~~~~ + +match(): Attaching Drivers to Devices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The format of device ID structures and the semantics for comparing +them are inherently bus-specific. Drivers typically declare an array +of device IDs of devices they support that reside in a bus-specific +driver structure. + +The purpose of the match callback is to give the bus an opportunity to +determine if a particular driver supports a particular device by +comparing the device IDs the driver supports with the device ID of a +particular device, without sacrificing bus-specific functionality or +type-safety. + +When a driver is registered with the bus, the bus's list of devices is +iterated over, and the match callback is called for each device that +does not have a driver associated with it. + + + +Device and Driver Lists +~~~~~~~~~~~~~~~~~~~~~~~ + +The lists of devices and drivers are intended to replace the local +lists that many buses keep. They are lists of struct devices and +struct device_drivers, respectively. Bus drivers are free to use the +lists as they please, but conversion to the bus-specific type may be +necessary. + +The LDM core provides helper functions for iterating over each list:: + + int bus_for_each_dev(struct bus_type * bus, struct device * start, + void * data, + int (*fn)(struct device *, void *)); + + int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, + void * data, int (*fn)(struct device_driver *, void *)); + +These helpers iterate over the respective list, and call the callback +for each device or driver in the list. All list accesses are +synchronized by taking the bus's lock (read currently). The reference +count on each object in the list is incremented before the callback is +called; it is decremented after the next object has been obtained. The +lock is not held when calling the callback. + + +sysfs +~~~~~~~~ +There is a top-level directory named 'bus'. + +Each bus gets a directory in the bus directory, along with two default +directories:: + + /sys/bus/pci/ + |-- devices + `-- drivers + +Drivers registered with the bus get a directory in the bus's drivers +directory:: + + /sys/bus/pci/ + |-- devices + `-- drivers + |-- Intel ICH + |-- Intel ICH Joystick + |-- agpgart + `-- e100 + +Each device that is discovered on a bus of that type gets a symlink in +the bus's devices directory to the device's directory in the physical +hierarchy:: + + /sys/bus/pci/ + |-- devices + | |-- 00:00.0 -> ../../../root/pci0/00:00.0 + | |-- 00:01.0 -> ../../../root/pci0/00:01.0 + | `-- 00:02.0 -> ../../../root/pci0/00:02.0 + `-- drivers + + +Exporting Attributes +~~~~~~~~~~~~~~~~~~~~ + +:: + + struct bus_attribute { + struct attribute attr; + ssize_t (*show)(const struct bus_type *, char * buf); + ssize_t (*store)(const struct bus_type *, const char * buf, size_t count); + }; + +Bus drivers can export attributes using the BUS_ATTR_RW macro that works +similarly to the DEVICE_ATTR_RW macro for devices. For example, a +definition like this:: + + static BUS_ATTR_RW(debug); + +is equivalent to declaring:: + + static bus_attribute bus_attr_debug; + +This can then be used to add and remove the attribute from the bus's +sysfs directory using:: + + int bus_create_file(struct bus_type *, struct bus_attribute *); + void bus_remove_file(struct bus_type *, struct bus_attribute *); diff --git a/Documentation/driver-api/driver-model/design-patterns.rst b/Documentation/driver-api/driver-model/design-patterns.rst new file mode 100644 index 0000000000..41eb8f41f7 --- /dev/null +++ b/Documentation/driver-api/driver-model/design-patterns.rst @@ -0,0 +1,116 @@ +============================= +Device Driver Design Patterns +============================= + +This document describes a few common design patterns found in device drivers. +It is likely that subsystem maintainers will ask driver developers to +conform to these design patterns. + +1. State Container +2. container_of() + + +1. State Container +~~~~~~~~~~~~~~~~~~ + +While the kernel contains a few device drivers that assume that they will +only be probed() once on a certain system (singletons), it is custom to assume +that the device the driver binds to will appear in several instances. This +means that the probe() function and all callbacks need to be reentrant. + +The most common way to achieve this is to use the state container design +pattern. It usually has this form:: + + struct foo { + spinlock_t lock; /* Example member */ + (...) + }; + + static int foo_probe(...) + { + struct foo *foo; + + foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL); + if (!foo) + return -ENOMEM; + spin_lock_init(&foo->lock); + (...) + } + +This will create an instance of struct foo in memory every time probe() is +called. This is our state container for this instance of the device driver. +Of course it is then necessary to always pass this instance of the +state around to all functions that need access to the state and its members. + +For example, if the driver is registering an interrupt handler, you would +pass around a pointer to struct foo like this:: + + static irqreturn_t foo_handler(int irq, void *arg) + { + struct foo *foo = arg; + (...) + } + + static int foo_probe(...) + { + struct foo *foo; + + (...) + ret = request_irq(irq, foo_handler, 0, "foo", foo); + } + +This way you always get a pointer back to the correct instance of foo in +your interrupt handler. + + +2. container_of() +~~~~~~~~~~~~~~~~~ + +Continuing on the above example we add an offloaded work:: + + struct foo { + spinlock_t lock; + struct workqueue_struct *wq; + struct work_struct offload; + (...) + }; + + static void foo_work(struct work_struct *work) + { + struct foo *foo = container_of(work, struct foo, offload); + + (...) + } + + static irqreturn_t foo_handler(int irq, void *arg) + { + struct foo *foo = arg; + + queue_work(foo->wq, &foo->offload); + (...) + } + + static int foo_probe(...) + { + struct foo *foo; + + foo->wq = create_singlethread_workqueue("foo-wq"); + INIT_WORK(&foo->offload, foo_work); + (...) + } + +The design pattern is the same for an hrtimer or something similar that will +return a single argument which is a pointer to a struct member in the +callback. + +container_of() is a macro defined in <linux/kernel.h> + +What container_of() does is to obtain a pointer to the containing struct from +a pointer to a member by a simple subtraction using the offsetof() macro from +standard C, which allows something similar to object oriented behaviours. +Notice that the contained member must not be a pointer, but an actual member +for this to work. + +We can see here that we avoid having global pointers to our struct foo * +instance this way, while still keeping the number of parameters passed to the +work function to a single pointer. diff --git a/Documentation/driver-api/driver-model/device.rst b/Documentation/driver-api/driver-model/device.rst new file mode 100644 index 0000000000..0833be568b --- /dev/null +++ b/Documentation/driver-api/driver-model/device.rst @@ -0,0 +1,120 @@ +========================== +The Basic Device Structure +========================== + +See the kerneldoc for the struct device. + + +Programming Interface +~~~~~~~~~~~~~~~~~~~~~ +The bus driver that discovers the device uses this to register the +device with the core:: + + int device_register(struct device * dev); + +The bus should initialize the following fields: + + - parent + - name + - bus_id + - bus + +A device is removed from the core when its reference count goes to +0. The reference count can be adjusted using:: + + struct device * get_device(struct device * dev); + void put_device(struct device * dev); + +get_device() will return a pointer to the struct device passed to it +if the reference is not already 0 (if it's in the process of being +removed already). + +A driver can access the lock in the device structure using:: + + void lock_device(struct device * dev); + void unlock_device(struct device * dev); + + +Attributes +~~~~~~~~~~ + +:: + + struct device_attribute { + struct attribute attr; + ssize_t (*show)(struct device *dev, struct device_attribute *attr, + char *buf); + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); + }; + +Attributes of devices can be exported by a device driver through sysfs. + +Please see Documentation/filesystems/sysfs.rst for more information +on how sysfs works. + +As explained in Documentation/core-api/kobject.rst, device attributes must be +created before the KOBJ_ADD uevent is generated. The only way to realize +that is by defining an attribute group. + +Attributes are declared using a macro called DEVICE_ATTR:: + + #define DEVICE_ATTR(name,mode,show,store) + +Example::: + + static DEVICE_ATTR(type, 0444, type_show, NULL); + static DEVICE_ATTR(power, 0644, power_show, power_store); + +Helper macros are available for common values of mode, so the above examples +can be simplified to::: + + static DEVICE_ATTR_RO(type); + static DEVICE_ATTR_RW(power); + +This declares two structures of type struct device_attribute with respective +names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be +organized as follows into a group:: + + static struct attribute *dev_attrs[] = { + &dev_attr_type.attr, + &dev_attr_power.attr, + NULL, + }; + + static struct attribute_group dev_group = { + .attrs = dev_attrs, + }; + + static const struct attribute_group *dev_groups[] = { + &dev_group, + NULL, + }; + +A helper macro is available for the common case of a single group, so the +above two structures can be declared using::: + + ATTRIBUTE_GROUPS(dev); + +This array of groups can then be associated with a device by setting the +group pointer in struct device before device_register() is invoked:: + + dev->groups = dev_groups; + device_register(dev); + +The device_register() function will use the 'groups' pointer to create the +device attributes and the device_unregister() function will use this pointer +to remove the device attributes. + +Word of warning: While the kernel allows device_create_file() and +device_remove_file() to be called on a device at any time, userspace has +strict expectations on when attributes get created. When a new device is +registered in the kernel, a uevent is generated to notify userspace (like +udev) that a new device is available. If attributes are added after the +device is registered, then userspace won't get notified and userspace will +not know about the new attributes. + +This is important for device driver that need to publish additional +attributes for a device at driver probe time. If the device driver simply +calls device_create_file() on the device structure passed to it, then +userspace will never be notified of the new attributes. diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst new file mode 100644 index 0000000000..8be086b3f8 --- /dev/null +++ b/Documentation/driver-api/driver-model/devres.rst @@ -0,0 +1,462 @@ +================================ +Devres - Managed Device Resource +================================ + +Tejun Heo <teheo@suse.de> + +First draft 10 January 2007 + +.. contents + + 1. Intro : Huh? Devres? + 2. Devres : Devres in a nutshell + 3. Devres Group : Group devres'es and release them together + 4. Details : Life time rules, calling context, ... + 5. Overhead : How much do we have to pay for this? + 6. List of managed interfaces: Currently implemented managed interfaces + + +1. Intro +-------- + +devres came up while trying to convert libata to use iomap. Each +iomapped address should be kept and unmapped on driver detach. For +example, a plain SFF ATA controller (that is, good old PCI IDE) in +native mode makes use of 5 PCI BARs and all of them should be +maintained. + +As with many other device drivers, libata low level drivers have +sufficient bugs in ->remove and ->probe failure path. Well, yes, +that's probably because libata low level driver developers are lazy +bunch, but aren't all low level driver developers? After spending a +day fiddling with braindamaged hardware with no document or +braindamaged document, if it's finally working, well, it's working. + +For one reason or another, low level drivers don't receive as much +attention or testing as core code, and bugs on driver detach or +initialization failure don't happen often enough to be noticeable. +Init failure path is worse because it's much less travelled while +needs to handle multiple entry points. + +So, many low level drivers end up leaking resources on driver detach +and having half broken failure path implementation in ->probe() which +would leak resources or even cause oops when failure occurs. iomap +adds more to this mix. So do msi and msix. + + +2. Devres +--------- + +devres is basically linked list of arbitrarily sized memory areas +associated with a struct device. Each devres entry is associated with +a release function. A devres can be released in several ways. No +matter what, all devres entries are released on driver detach. On +release, the associated release function is invoked and then the +devres entry is freed. + +Managed interface is created for resources commonly used by device +drivers using devres. For example, coherent DMA memory is acquired +using dma_alloc_coherent(). The managed version is called +dmam_alloc_coherent(). It is identical to dma_alloc_coherent() except +for the DMA memory allocated using it is managed and will be +automatically released on driver detach. Implementation looks like +the following:: + + struct dma_devres { + size_t size; + void *vaddr; + dma_addr_t dma_handle; + }; + + static void dmam_coherent_release(struct device *dev, void *res) + { + struct dma_devres *this = res; + + dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle); + } + + dmam_alloc_coherent(dev, size, dma_handle, gfp) + { + struct dma_devres *dr; + void *vaddr; + + dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp); + ... + + /* alloc DMA memory as usual */ + vaddr = dma_alloc_coherent(...); + ... + + /* record size, vaddr, dma_handle in dr */ + dr->vaddr = vaddr; + ... + + devres_add(dev, dr); + + return vaddr; + } + +If a driver uses dmam_alloc_coherent(), the area is guaranteed to be +freed whether initialization fails half-way or the device gets +detached. If most resources are acquired using managed interface, a +driver can have much simpler init and exit code. Init path basically +looks like the following:: + + my_init_one() + { + struct mydev *d; + + d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->ring = dmam_alloc_coherent(...); + if (!d->ring) + return -ENOMEM; + + if (check something) + return -EINVAL; + ... + + return register_to_upper_layer(d); + } + +And exit path:: + + my_remove_one() + { + unregister_from_upper_layer(d); + shutdown_my_hardware(); + } + +As shown above, low level drivers can be simplified a lot by using +devres. Complexity is shifted from less maintained low level drivers +to better maintained higher layer. Also, as init failure path is +shared with exit path, both can get more testing. + +Note though that when converting current calls or assignments to +managed devm_* versions it is up to you to check if internal operations +like allocating memory, have failed. Managed resources pertains to the +freeing of these resources *only* - all other checks needed are still +on you. In some cases this may mean introducing checks that were not +necessary before moving to the managed devm_* calls. + + +3. Devres group +--------------- + +Devres entries can be grouped using devres group. When a group is +released, all contained normal devres entries and properly nested +groups are released. One usage is to rollback series of acquired +resources on failure. For example:: + + if (!devres_open_group(dev, NULL, GFP_KERNEL)) + return -ENOMEM; + + acquire A; + if (failed) + goto err; + + acquire B; + if (failed) + goto err; + ... + + devres_remove_group(dev, NULL); + return 0; + + err: + devres_release_group(dev, NULL); + return err_code; + +As resource acquisition failure usually means probe failure, constructs +like above are usually useful in midlayer driver (e.g. libata core +layer) where interface function shouldn't have side effect on failure. +For LLDs, just returning error code suffices in most cases. + +Each group is identified by `void *id`. It can either be explicitly +specified by @id argument to devres_open_group() or automatically +created by passing NULL as @id as in the above example. In both +cases, devres_open_group() returns the group's id. The returned id +can be passed to other devres functions to select the target group. +If NULL is given to those functions, the latest open group is +selected. + +For example, you can do something like the following:: + + int my_midlayer_create_something() + { + if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL)) + return -ENOMEM; + + ... + + devres_close_group(dev, my_midlayer_create_something); + return 0; + } + + void my_midlayer_destroy_something() + { + devres_release_group(dev, my_midlayer_create_something); + } + + +4. Details +---------- + +Lifetime of a devres entry begins on devres allocation and finishes +when it is released or destroyed (removed and freed) - no reference +counting. + +devres core guarantees atomicity to all basic devres operations and +has support for single-instance devres types (atomic +lookup-and-add-if-not-found). Other than that, synchronizing +concurrent accesses to allocated devres data is caller's +responsibility. This is usually non-issue because bus ops and +resource allocations already do the job. + +For an example of single-instance devres type, read pcim_iomap_table() +in lib/devres.c. + +All devres interface functions can be called without context if the +right gfp mask is given. + + +5. Overhead +----------- + +Each devres bookkeeping info is allocated together with requested data +area. With debug option turned off, bookkeeping info occupies 16 +bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded +up to ull alignment). If singly linked list is used, it can be +reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit). + +Each devres group occupies 8 pointers. It can be reduced to 6 if +singly linked list is used. + +Memory space overhead on ahci controller with two ports is between 300 +and 400 bytes on 32bit machine after naive conversion (we can +certainly invest a bit more effort into libata core layer). + + +6. List of managed interfaces +----------------------------- + +CLOCK + devm_clk_get() + devm_clk_get_optional() + devm_clk_put() + devm_clk_bulk_get() + devm_clk_bulk_get_all() + devm_clk_bulk_get_optional() + devm_get_clk_from_child() + devm_clk_hw_register() + devm_of_clk_add_hw_provider() + devm_clk_hw_register_clkdev() + +DMA + dmaenginem_async_device_register() + dmam_alloc_coherent() + dmam_alloc_attrs() + dmam_free_coherent() + dmam_pool_create() + dmam_pool_destroy() + +DRM + devm_drm_dev_alloc() + +GPIO + devm_gpiod_get() + devm_gpiod_get_array() + devm_gpiod_get_array_optional() + devm_gpiod_get_index() + devm_gpiod_get_index_optional() + devm_gpiod_get_optional() + devm_gpiod_put() + devm_gpiod_unhinge() + devm_gpiochip_add_data() + devm_gpio_request() + devm_gpio_request_one() + +I2C + devm_i2c_add_adapter() + devm_i2c_new_dummy_device() + +IIO + devm_iio_device_alloc() + devm_iio_device_register() + devm_iio_dmaengine_buffer_setup() + devm_iio_kfifo_buffer_setup() + devm_iio_kfifo_buffer_setup_ext() + devm_iio_map_array_register() + devm_iio_triggered_buffer_setup() + devm_iio_triggered_buffer_setup_ext() + devm_iio_trigger_alloc() + devm_iio_trigger_register() + devm_iio_channel_get() + devm_iio_channel_get_all() + devm_iio_hw_consumer_alloc() + devm_fwnode_iio_channel_get_by_name() + +INPUT + devm_input_allocate_device() + +IO region + devm_release_mem_region() + devm_release_region() + devm_release_resource() + devm_request_mem_region() + devm_request_free_mem_region() + devm_request_region() + devm_request_resource() + +IOMAP + devm_ioport_map() + devm_ioport_unmap() + devm_ioremap() + devm_ioremap_uc() + devm_ioremap_wc() + devm_ioremap_resource() : checks resource, requests memory region, ioremaps + devm_ioremap_resource_wc() + devm_platform_ioremap_resource() : calls devm_ioremap_resource() for platform device + devm_platform_ioremap_resource_byname() + devm_platform_get_and_ioremap_resource() + devm_iounmap() + pcim_iomap() + pcim_iomap_regions() : do request_region() and iomap() on multiple BARs + pcim_iomap_table() : array of mapped addresses indexed by BAR + pcim_iounmap() + +IRQ + devm_free_irq() + devm_request_any_context_irq() + devm_request_irq() + devm_request_threaded_irq() + devm_irq_alloc_descs() + devm_irq_alloc_desc() + devm_irq_alloc_desc_at() + devm_irq_alloc_desc_from() + devm_irq_alloc_descs_from() + devm_irq_alloc_generic_chip() + devm_irq_setup_generic_chip() + devm_irq_domain_create_sim() + +LED + devm_led_classdev_register() + devm_led_classdev_register_ext() + devm_led_classdev_unregister() + devm_led_trigger_register() + devm_of_led_get() + +MDIO + devm_mdiobus_alloc() + devm_mdiobus_alloc_size() + devm_mdiobus_register() + devm_of_mdiobus_register() + +MEM + devm_free_pages() + devm_get_free_pages() + devm_kasprintf() + devm_kcalloc() + devm_kfree() + devm_kmalloc() + devm_kmalloc_array() + devm_kmemdup() + devm_krealloc() + devm_krealloc_array() + devm_kstrdup() + devm_kstrdup_const() + devm_kvasprintf() + devm_kzalloc() + +MFD + devm_mfd_add_devices() + +MUX + devm_mux_chip_alloc() + devm_mux_chip_register() + devm_mux_control_get() + devm_mux_state_get() + +NET + devm_alloc_etherdev() + devm_alloc_etherdev_mqs() + devm_register_netdev() + +PER-CPU MEM + devm_alloc_percpu() + devm_free_percpu() + +PCI + devm_pci_alloc_host_bridge() : managed PCI host bridge allocation + devm_pci_remap_cfgspace() : ioremap PCI configuration space + devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource + pcim_enable_device() : after success, all PCI ops become managed + pcim_pin_device() : keep PCI device enabled after release + +PHY + devm_usb_get_phy() + devm_usb_get_phy_by_node() + devm_usb_get_phy_by_phandle() + devm_usb_put_phy() + +PINCTRL + devm_pinctrl_get() + devm_pinctrl_put() + devm_pinctrl_get_select() + devm_pinctrl_register() + devm_pinctrl_register_and_init() + devm_pinctrl_unregister() + +POWER + devm_reboot_mode_register() + devm_reboot_mode_unregister() + +PWM + devm_pwmchip_add() + devm_pwm_get() + devm_fwnode_pwm_get() + +REGULATOR + devm_regulator_bulk_register_supply_alias() + devm_regulator_bulk_get() + devm_regulator_bulk_get_const() + devm_regulator_bulk_get_enable() + devm_regulator_bulk_put() + devm_regulator_get() + devm_regulator_get_enable() + devm_regulator_get_enable_optional() + devm_regulator_get_exclusive() + devm_regulator_get_optional() + devm_regulator_irq_helper() + devm_regulator_put() + devm_regulator_register() + devm_regulator_register_notifier() + devm_regulator_register_supply_alias() + devm_regulator_unregister_notifier() + +RESET + devm_reset_control_get() + devm_reset_controller_register() + +RTC + devm_rtc_device_register() + devm_rtc_allocate_device() + devm_rtc_register_device() + devm_rtc_nvmem_register() + +SERDEV + devm_serdev_device_open() + +SLAVE DMA ENGINE + devm_acpi_dma_controller_register() + devm_acpi_dma_controller_free() + +SPI + devm_spi_alloc_master() + devm_spi_alloc_slave() + devm_spi_register_master() + +WATCHDOG + devm_watchdog_register_device() diff --git a/Documentation/driver-api/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst new file mode 100644 index 0000000000..06f818b1d6 --- /dev/null +++ b/Documentation/driver-api/driver-model/driver.rst @@ -0,0 +1,286 @@ +============== +Device Drivers +============== + +See the kerneldoc for the struct device_driver. + +Allocation +~~~~~~~~~~ + +Device drivers are statically allocated structures. Though there may +be multiple devices in a system that a driver supports, struct +device_driver represents the driver as a whole (not a particular +device instance). + +Initialization +~~~~~~~~~~~~~~ + +The driver must initialize at least the name and bus fields. It should +also initialize the devclass field (when it arrives), so it may obtain +the proper linkage internally. It should also initialize as many of +the callbacks as possible, though each is optional. + +Declaration +~~~~~~~~~~~ + +As stated above, struct device_driver objects are statically +allocated. Below is an example declaration of the eepro100 +driver. This declaration is hypothetical only; it relies on the driver +being converted completely to the new model:: + + static struct device_driver eepro100_driver = { + .name = "eepro100", + .bus = &pci_bus_type, + + .probe = eepro100_probe, + .remove = eepro100_remove, + .suspend = eepro100_suspend, + .resume = eepro100_resume, + }; + +Most drivers will not be able to be converted completely to the new +model because the bus they belong to has a bus-specific structure with +bus-specific fields that cannot be generalized. + +The most common example of this are device ID structures. A driver +typically defines an array of device IDs that it supports. The format +of these structures and the semantics for comparing device IDs are +completely bus-specific. Defining them as bus-specific entities would +sacrifice type-safety, so we keep bus-specific structures around. + +Bus-specific drivers should include a generic struct device_driver in +the definition of the bus-specific driver. Like this:: + + struct pci_driver { + const struct pci_device_id *id_table; + struct device_driver driver; + }; + +A definition that included bus-specific fields would look like +(using the eepro100 driver again):: + + static struct pci_driver eepro100_driver = { + .id_table = eepro100_pci_tbl, + .driver = { + .name = "eepro100", + .bus = &pci_bus_type, + .probe = eepro100_probe, + .remove = eepro100_remove, + .suspend = eepro100_suspend, + .resume = eepro100_resume, + }, + }; + +Some may find the syntax of embedded struct initialization awkward or +even a bit ugly. So far, it's the best way we've found to do what we want... + +Registration +~~~~~~~~~~~~ + +:: + + int driver_register(struct device_driver *drv); + +The driver registers the structure on startup. For drivers that have +no bus-specific fields (i.e. don't have a bus-specific driver +structure), they would use driver_register and pass a pointer to their +struct device_driver object. + +Most drivers, however, will have a bus-specific structure and will +need to register with the bus using something like pci_driver_register. + +It is important that drivers register their driver structure as early as +possible. Registration with the core initializes several fields in the +struct device_driver object, including the reference count and the +lock. These fields are assumed to be valid at all times and may be +used by the device model core or the bus driver. + + +Transition Bus Drivers +~~~~~~~~~~~~~~~~~~~~~~ + +By defining wrapper functions, the transition to the new model can be +made easier. Drivers can ignore the generic structure altogether and +let the bus wrapper fill in the fields. For the callbacks, the bus can +define generic callbacks that forward the call to the bus-specific +callbacks of the drivers. + +This solution is intended to be only temporary. In order to get class +information in the driver, the drivers must be modified anyway. Since +converting drivers to the new model should reduce some infrastructural +complexity and code size, it is recommended that they are converted as +class information is added. + +Access +~~~~~~ + +Once the object has been registered, it may access the common fields of +the object, like the lock and the list of devices:: + + int driver_for_each_dev(struct device_driver *drv, void *data, + int (*callback)(struct device *dev, void *data)); + +The devices field is a list of all the devices that have been bound to +the driver. The LDM core provides a helper function to operate on all +the devices a driver controls. This helper locks the driver on each +node access, and does proper reference counting on each device as it +accesses it. + + +sysfs +~~~~~ + +When a driver is registered, a sysfs directory is created in its +bus's directory. In this directory, the driver can export an interface +to userspace to control operation of the driver on a global basis; +e.g. toggling debugging output in the driver. + +A future feature of this directory will be a 'devices' directory. This +directory will contain symlinks to the directories of devices it +supports. + + + +Callbacks +~~~~~~~~~ + +:: + + int (*probe) (struct device *dev); + +The probe() entry is called in task context, with the bus's rwsem locked +and the driver partially bound to the device. Drivers commonly use +container_of() to convert "dev" to a bus-specific type, both in probe() +and other routines. That type often provides device resource data, such +as pci_dev.resource[] or platform_device.resources, which is used in +addition to dev->platform_data to initialize the driver. + +This callback holds the driver-specific logic to bind the driver to a +given device. That includes verifying that the device is present, that +it's a version the driver can handle, that driver data structures can +be allocated and initialized, and that any hardware can be initialized. +Drivers often store a pointer to their state with dev_set_drvdata(). +When the driver has successfully bound itself to that device, then probe() +returns zero and the driver model code will finish its part of binding +the driver to that device. + +A driver's probe() may return a negative errno value to indicate that +the driver did not bind to this device, in which case it should have +released all resources it allocated. + +Optionally, probe() may return -EPROBE_DEFER if the driver depends on +resources that are not yet available (e.g., supplied by a driver that +hasn't initialized yet). The driver core will put the device onto the +deferred probe list and will try to call it again later. If a driver +must defer, it should return -EPROBE_DEFER as early as possible to +reduce the amount of time spent on setup work that will need to be +unwound and reexecuted at a later time. + +.. warning:: + -EPROBE_DEFER must not be returned if probe() has already created + child devices, even if those child devices are removed again + in a cleanup path. If -EPROBE_DEFER is returned after a child + device has been registered, it may result in an infinite loop of + .probe() calls to the same driver. + +:: + + void (*sync_state) (struct device *dev); + +sync_state is called only once for a device. It's called when all the consumer +devices of the device have successfully probed. The list of consumers of the +device is obtained by looking at the device links connecting that device to its +consumer devices. + +The first attempt to call sync_state() is made during late_initcall_sync() to +give firmware and drivers time to link devices to each other. During the first +attempt at calling sync_state(), if all the consumers of the device at that +point in time have already probed successfully, sync_state() is called right +away. If there are no consumers of the device during the first attempt, that +too is considered as "all consumers of the device have probed" and sync_state() +is called right away. + +If during the first attempt at calling sync_state() for a device, there are +still consumers that haven't probed successfully, the sync_state() call is +postponed and reattempted in the future only when one or more consumers of the +device probe successfully. If during the reattempt, the driver core finds that +there are one or more consumers of the device that haven't probed yet, then +sync_state() call is postponed again. + +A typical use case for sync_state() is to have the kernel cleanly take over +management of devices from the bootloader. For example, if a device is left on +and at a particular hardware configuration by the bootloader, the device's +driver might need to keep the device in the boot configuration until all the +consumers of the device have probed. Once all the consumers of the device have +probed, the device's driver can synchronize the hardware state of the device to +match the aggregated software state requested by all the consumers. Hence the +name sync_state(). + +While obvious examples of resources that can benefit from sync_state() include +resources such as regulator, sync_state() can also be useful for complex +resources like IOMMUs. For example, IOMMUs with multiple consumers (devices +whose addresses are remapped by the IOMMU) might need to keep their mappings +fixed at (or additive to) the boot configuration until all its consumers have +probed. + +While the typical use case for sync_state() is to have the kernel cleanly take +over management of devices from the bootloader, the usage of sync_state() is +not restricted to that. Use it whenever it makes sense to take an action after +all the consumers of a device have probed:: + + int (*remove) (struct device *dev); + +remove is called to unbind a driver from a device. This may be +called if a device is physically removed from the system, if the +driver module is being unloaded, during a reboot sequence, or +in other cases. + +It is up to the driver to determine if the device is present or +not. It should free any resources allocated specifically for the +device; i.e. anything in the device's driver_data field. + +If the device is still present, it should quiesce the device and place +it into a supported low-power state. + +:: + + int (*suspend) (struct device *dev, pm_message_t state); + +suspend is called to put the device in a low power state. + +:: + + int (*resume) (struct device *dev); + +Resume is used to bring a device back from a low power state. + + +Attributes +~~~~~~~~~~ + +:: + + struct driver_attribute { + struct attribute attr; + ssize_t (*show)(struct device_driver *driver, char *buf); + ssize_t (*store)(struct device_driver *, const char *buf, size_t count); + }; + +Device drivers can export attributes via their sysfs directories. +Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO +macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO +macros. + +Example:: + + DRIVER_ATTR_RW(debug); + +This is equivalent to declaring:: + + struct driver_attribute driver_attr_debug; + +This can then be used to add and remove the attribute from the +driver's directory using:: + + int driver_create_file(struct device_driver *, const struct driver_attribute *); + void driver_remove_file(struct device_driver *, const struct driver_attribute *); diff --git a/Documentation/driver-api/driver-model/index.rst b/Documentation/driver-api/driver-model/index.rst new file mode 100644 index 0000000000..4831bdd92e --- /dev/null +++ b/Documentation/driver-api/driver-model/index.rst @@ -0,0 +1,23 @@ +============ +Driver Model +============ + +.. toctree:: + :maxdepth: 1 + + binding + bus + design-patterns + device + devres + driver + overview + platform + porting + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst new file mode 100644 index 0000000000..e98d0ab4a9 --- /dev/null +++ b/Documentation/driver-api/driver-model/overview.rst @@ -0,0 +1,124 @@ +============================= +The Linux Kernel Device Model +============================= + +Patrick Mochel <mochel@digitalimplant.org> + +Drafted 26 August 2002 +Updated 31 January 2006 + + +Overview +~~~~~~~~ + +The Linux Kernel Driver Model is a unification of all the disparate driver +models that were previously used in the kernel. It is intended to augment the +bus-specific drivers for bridges and devices by consolidating a set of data +and operations into globally accessible data structures. + +Traditional driver models implemented some sort of tree-like structure +(sometimes just a list) for the devices they control. There wasn't any +uniformity across the different bus types. + +The current driver model provides a common, uniform data model for describing +a bus and the devices that can appear under the bus. The unified bus +model includes a set of common attributes which all busses carry, and a set +of common callbacks, such as device discovery during bus probing, bus +shutdown, bus power management, etc. + +The common device and bridge interface reflects the goals of the modern +computer: namely the ability to do seamless device "plug and play", power +management, and hot plug. In particular, the model dictated by Intel and +Microsoft (namely ACPI) ensures that almost every device on almost any bus +on an x86-compatible system can work within this paradigm. Of course, +not every bus is able to support all such operations, although most +buses support most of those operations. + + +Downstream Access +~~~~~~~~~~~~~~~~~ + +Common data fields have been moved out of individual bus layers into a common +data structure. These fields must still be accessed by the bus layers, +and sometimes by the device-specific drivers. + +Other bus layers are encouraged to do what has been done for the PCI layer. +struct pci_dev now looks like this:: + + struct pci_dev { + ... + + struct device dev; /* Generic device interface */ + ... + }; + +Note first that the struct device dev within the struct pci_dev is +statically allocated. This means only one allocation on device discovery. + +Note also that that struct device dev is not necessarily defined at the +front of the pci_dev structure. This is to make people think about what +they're doing when switching between the bus driver and the global driver, +and to discourage meaningless and incorrect casts between the two. + +The PCI bus layer freely accesses the fields of struct device. It knows about +the structure of struct pci_dev, and it should know the structure of struct +device. Individual PCI device drivers that have been converted to the current +driver model generally do not and should not touch the fields of struct device, +unless there is a compelling reason to do so. + +The above abstraction prevents unnecessary pain during transitional phases. +If it were not done this way, then when a field was renamed or removed, every +downstream driver would break. On the other hand, if only the bus layer +(and not the device layer) accesses the struct device, it is only the bus +layer that needs to change. + + +User Interface +~~~~~~~~~~~~~~ + +By virtue of having a complete hierarchical view of all the devices in the +system, exporting a complete hierarchical view to userspace becomes relatively +easy. This has been accomplished by implementing a special purpose virtual +file system named sysfs. + +Almost all mainstream Linux distros mount this filesystem automatically; you +can see some variation of the following in the output of the "mount" command:: + + $ mount + ... + none on /sys type sysfs (rw,noexec,nosuid,nodev) + ... + $ + +The auto-mounting of sysfs is typically accomplished by an entry similar to +the following in the /etc/fstab file:: + + none /sys sysfs defaults 0 0 + +or something similar in the /lib/init/fstab file on Debian-based systems:: + + none /sys sysfs nodev,noexec,nosuid 0 0 + +If sysfs is not automatically mounted, you can always do it manually with:: + + # mount -t sysfs sysfs /sys + +Whenever a device is inserted into the tree, a directory is created for it. +This directory may be populated at each layer of discovery - the global layer, +the bus layer, or the device layer. + +The global layer currently creates two files - 'name' and 'power'. The +former only reports the name of the device. The latter reports the +current power state of the device. It will also be used to set the current +power state. + +The bus layer may also create files for the devices it finds while probing the +bus. For example, the PCI layer currently creates 'irq' and 'resource' files +for each PCI device. + +A device-specific driver may also export files in its directory to expose +device-specific data or tunable interfaces. + +More information about the sysfs directory layout can be found in +the other documents in this directory and in the file +Documentation/filesystems/sysfs.rst. diff --git a/Documentation/driver-api/driver-model/platform.rst b/Documentation/driver-api/driver-model/platform.rst new file mode 100644 index 0000000000..1fe5c6c619 --- /dev/null +++ b/Documentation/driver-api/driver-model/platform.rst @@ -0,0 +1,246 @@ +============================ +Platform Devices and Drivers +============================ + +See <linux/platform_device.h> for the driver model interface to the +platform bus: platform_device, and platform_driver. This pseudo-bus +is used to connect devices on busses with minimal infrastructure, +like those used to integrate peripherals on many system-on-chip +processors, or some "legacy" PC interconnects; as opposed to large +formally specified ones like PCI or USB. + + +Platform devices +~~~~~~~~~~~~~~~~ +Platform devices are devices that typically appear as autonomous +entities in the system. This includes legacy port-based devices and +host bridges to peripheral buses, and most controllers integrated +into system-on-chip platforms. What they usually have in common +is direct addressing from a CPU bus. Rarely, a platform_device will +be connected through a segment of some other kind of bus; but its +registers will still be directly addressable. + +Platform devices are given a name, used in driver binding, and a +list of resources such as addresses and IRQs:: + + struct platform_device { + const char *name; + u32 id; + struct device dev; + u32 num_resources; + struct resource *resource; + }; + + +Platform drivers +~~~~~~~~~~~~~~~~ +Platform drivers follow the standard driver model convention, where +discovery/enumeration is handled outside the drivers, and drivers +provide probe() and remove() methods. They support power management +and shutdown notifications using the standard conventions:: + + struct platform_driver { + int (*probe)(struct platform_device *); + int (*remove)(struct platform_device *); + void (*shutdown)(struct platform_device *); + int (*suspend)(struct platform_device *, pm_message_t state); + int (*suspend_late)(struct platform_device *, pm_message_t state); + int (*resume_early)(struct platform_device *); + int (*resume)(struct platform_device *); + struct device_driver driver; + }; + +Note that probe() should in general verify that the specified device hardware +actually exists; sometimes platform setup code can't be sure. The probing +can use device resources, including clocks, and device platform_data. + +Platform drivers register themselves the normal way:: + + int platform_driver_register(struct platform_driver *drv); + +Or, in common situations where the device is known not to be hot-pluggable, +the probe() routine can live in an init section to reduce the driver's +runtime memory footprint:: + + int platform_driver_probe(struct platform_driver *drv, + int (*probe)(struct platform_device *)) + +Kernel modules can be composed of several platform drivers. The platform core +provides helpers to register and unregister an array of drivers:: + + int __platform_register_drivers(struct platform_driver * const *drivers, + unsigned int count, struct module *owner); + void platform_unregister_drivers(struct platform_driver * const *drivers, + unsigned int count); + +If one of the drivers fails to register, all drivers registered up to that +point will be unregistered in reverse order. Note that there is a convenience +macro that passes THIS_MODULE as owner parameter:: + + #define platform_register_drivers(drivers, count) + + +Device Enumeration +~~~~~~~~~~~~~~~~~~ +As a rule, platform specific (and often board-specific) setup code will +register platform devices:: + + int platform_device_register(struct platform_device *pdev); + + int platform_add_devices(struct platform_device **pdevs, int ndev); + +The general rule is to register only those devices that actually exist, +but in some cases extra devices might be registered. For example, a kernel +might be configured to work with an external network adapter that might not +be populated on all boards, or likewise to work with an integrated controller +that some boards might not hook up to any peripherals. + +In some cases, boot firmware will export tables describing the devices +that are populated on a given board. Without such tables, often the +only way for system setup code to set up the correct devices is to build +a kernel for a specific target board. Such board-specific kernels are +common with embedded and custom systems development. + +In many cases, the memory and IRQ resources associated with the platform +device are not enough to let the device's driver work. Board setup code +will often provide additional information using the device's platform_data +field to hold additional information. + +Embedded systems frequently need one or more clocks for platform devices, +which are normally kept off until they're actively needed (to save power). +System setup also associates those clocks with the device, so that +calls to clk_get(&pdev->dev, clock_name) return them as needed. + + +Legacy Drivers: Device Probing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some drivers are not fully converted to the driver model, because they take +on a non-driver role: the driver registers its platform device, rather than +leaving that for system infrastructure. Such drivers can't be hotplugged +or coldplugged, since those mechanisms require device creation to be in a +different system component than the driver. + +The only "good" reason for this is to handle older system designs which, like +original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware +configuration. Newer systems have largely abandoned that model, in favor of +bus-level support for dynamic configuration (PCI, USB), or device tables +provided by the boot firmware (e.g. PNPACPI on x86). There are too many +conflicting options about what might be where, and even educated guesses by +an operating system will be wrong often enough to make trouble. + +This style of driver is discouraged. If you're updating such a driver, +please try to move the device enumeration to a more appropriate location, +outside the driver. This will usually be cleanup, since such drivers +tend to already have "normal" modes, such as ones using device nodes that +were created by PNP or by platform device setup. + +None the less, there are some APIs to support such legacy drivers. Avoid +using these calls except with such hotplug-deficient drivers:: + + struct platform_device *platform_device_alloc( + const char *name, int id); + +You can use platform_device_alloc() to dynamically allocate a device, which +you will then initialize with resources and platform_device_register(). +A better solution is usually:: + + struct platform_device *platform_device_register_simple( + const char *name, int id, + struct resource *res, unsigned int nres); + +You can use platform_device_register_simple() as a one-step call to allocate +and register a device. + + +Device Naming and Driver Binding +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The platform_device.dev.bus_id is the canonical name for the devices. +It's built from two components: + + * platform_device.name ... which is also used to for driver matching. + + * platform_device.id ... the device instance number, or else "-1" + to indicate there's only one. + +These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and +"serial/3" indicates bus_id "serial.3"; both would use the platform_driver +named "serial". While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id) +and use the platform_driver called "my_rtc". + +Driver binding is performed automatically by the driver core, invoking +driver probe() after finding a match between device and driver. If the +probe() succeeds, the driver and device are bound as usual. There are +three different ways to find such a match: + + - Whenever a device is registered, the drivers for that bus are + checked for matches. Platform devices should be registered very + early during system boot. + + - When a driver is registered using platform_driver_register(), all + unbound devices on that bus are checked for matches. Drivers + usually register later during booting, or by module loading. + + - Registering a driver using platform_driver_probe() works just like + using platform_driver_register(), except that the driver won't + be probed later if another device registers. (Which is OK, since + this interface is only for use with non-hotpluggable devices.) + + +Early Platform Devices and Drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The early platform interfaces provide platform data to platform device +drivers early on during the system boot. The code is built on top of the +early_param() command line parsing and can be executed very early on. + +Example: "earlyprintk" class early serial console in 6 steps + +1. Registering early platform device data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code registers platform device data using the function +early_platform_add_devices(). In the case of early serial console this +should be hardware configuration for the serial port. Devices registered +at this point will later on be matched against early platform drivers. + +2. Parsing kernel command line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code calls parse_early_param() to parse the kernel +command line. This will execute all matching early_param() callbacks. +User specified early platform devices will be registered at this point. +For the early serial console case the user can specify port on the +kernel command line as "earlyprintk=serial.0" where "earlyprintk" is +the class string, "serial" is the name of the platform driver and +0 is the platform device id. If the id is -1 then the dot and the +id can be omitted. + +3. Installing early platform drivers belonging to a certain class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code may optionally force registration of all early +platform drivers belonging to a certain class using the function +early_platform_driver_register_all(). User specified devices from +step 2 have priority over these. This step is omitted by the serial +driver example since the early serial driver code should be disabled +unless the user has specified port on the kernel command line. + +4. Early platform driver registration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Compiled-in platform drivers making use of early_platform_init() are +automatically registered during step 2 or 3. The serial driver example +should use early_platform_init("earlyprintk", &platform_driver). + +5. Probing of early platform drivers belonging to a certain class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code calls early_platform_driver_probe() to match +registered early platform devices associated with a certain class with +registered early platform drivers. Matched devices will get probed(). +This step can be executed at any point during the early boot. As soon +as possible may be good for the serial port case. + +6. Inside the early platform driver probe() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The driver code needs to take special care during early boot, especially +when it comes to memory allocation and interrupt registration. The code +in the probe() function can use is_early_platform_device() to check if +it is called at early platform device or at the regular platform device +time. The early serial driver performs register_console() at this point. + +For further information, see <linux/platform_device.h>. diff --git a/Documentation/driver-api/driver-model/porting.rst b/Documentation/driver-api/driver-model/porting.rst new file mode 100644 index 0000000000..931ea879af --- /dev/null +++ b/Documentation/driver-api/driver-model/porting.rst @@ -0,0 +1,448 @@ +======================================= +Porting Drivers to the New Driver Model +======================================= + +Patrick Mochel + +7 January 2003 + + +Overview + +Please refer to `Documentation/driver-api/driver-model/*.rst` for definitions of +various driver types and concepts. + +Most of the work of porting devices drivers to the new model happens +at the bus driver layer. This was intentional, to minimize the +negative effect on kernel drivers, and to allow a gradual transition +of bus drivers. + +In a nutshell, the driver model consists of a set of objects that can +be embedded in larger, bus-specific objects. Fields in these generic +objects can replace fields in the bus-specific objects. + +The generic objects must be registered with the driver model core. By +doing so, they will exported via the sysfs filesystem. sysfs can be +mounted by doing:: + + # mount -t sysfs sysfs /sys + + + +The Process + +Step 0: Read include/linux/device.h for object and function definitions. + +Step 1: Registering the bus driver. + + +- Define a struct bus_type for the bus driver:: + + struct bus_type pci_bus_type = { + .name = "pci", + }; + + +- Register the bus type. + + This should be done in the initialization function for the bus type, + which is usually the module_init(), or equivalent, function:: + + static int __init pci_driver_init(void) + { + return bus_register(&pci_bus_type); + } + + subsys_initcall(pci_driver_init); + + + The bus type may be unregistered (if the bus driver may be compiled + as a module) by doing:: + + bus_unregister(&pci_bus_type); + + +- Export the bus type for others to use. + + Other code may wish to reference the bus type, so declare it in a + shared header file and export the symbol. + +From include/linux/pci.h:: + + extern struct bus_type pci_bus_type; + + +From file the above code appears in:: + + EXPORT_SYMBOL(pci_bus_type); + + + +- This will cause the bus to show up in /sys/bus/pci/ with two + subdirectories: 'devices' and 'drivers':: + + # tree -d /sys/bus/pci/ + /sys/bus/pci/ + |-- devices + `-- drivers + + + +Step 2: Registering Devices. + +struct device represents a single device. It mainly contains metadata +describing the relationship the device has to other entities. + + +- Embed a struct device in the bus-specific device type:: + + + struct pci_dev { + ... + struct device dev; /* Generic device interface */ + ... + }; + + It is recommended that the generic device not be the first item in + the struct to discourage programmers from doing mindless casts + between the object types. Instead macros, or inline functions, + should be created to convert from the generic object type:: + + + #define to_pci_dev(n) container_of(n, struct pci_dev, dev) + + or + + static inline struct pci_dev * to_pci_dev(struct kobject * kobj) + { + return container_of(n, struct pci_dev, dev); + } + + This allows the compiler to verify type-safety of the operations + that are performed (which is Good). + + +- Initialize the device on registration. + + When devices are discovered or registered with the bus type, the + bus driver should initialize the generic device. The most important + things to initialize are the bus_id, parent, and bus fields. + + The bus_id is an ASCII string that contains the device's address on + the bus. The format of this string is bus-specific. This is + necessary for representing devices in sysfs. + + parent is the physical parent of the device. It is important that + the bus driver sets this field correctly. + + The driver model maintains an ordered list of devices that it uses + for power management. This list must be in order to guarantee that + devices are shutdown before their physical parents, and vice versa. + The order of this list is determined by the parent of registered + devices. + + Also, the location of the device's sysfs directory depends on a + device's parent. sysfs exports a directory structure that mirrors + the device hierarchy. Accurately setting the parent guarantees that + sysfs will accurately represent the hierarchy. + + The device's bus field is a pointer to the bus type the device + belongs to. This should be set to the bus_type that was declared + and initialized before. + + Optionally, the bus driver may set the device's name and release + fields. + + The name field is an ASCII string describing the device, like + + "ATI Technologies Inc Radeon QD" + + The release field is a callback that the driver model core calls + when the device has been removed, and all references to it have + been released. More on this in a moment. + + +- Register the device. + + Once the generic device has been initialized, it can be registered + with the driver model core by doing:: + + device_register(&dev->dev); + + It can later be unregistered by doing:: + + device_unregister(&dev->dev); + + This should happen on buses that support hotpluggable devices. + If a bus driver unregisters a device, it should not immediately free + it. It should instead wait for the driver model core to call the + device's release method, then free the bus-specific object. + (There may be other code that is currently referencing the device + structure, and it would be rude to free the device while that is + happening). + + + When the device is registered, a directory in sysfs is created. + The PCI tree in sysfs looks like:: + + /sys/devices/pci0/ + |-- 00:00.0 + |-- 00:01.0 + | `-- 01:00.0 + |-- 00:02.0 + | `-- 02:1f.0 + | `-- 03:00.0 + |-- 00:1e.0 + | `-- 04:04.0 + |-- 00:1f.0 + |-- 00:1f.1 + | |-- ide0 + | | |-- 0.0 + | | `-- 0.1 + | `-- ide1 + | `-- 1.0 + |-- 00:1f.2 + |-- 00:1f.3 + `-- 00:1f.5 + + Also, symlinks are created in the bus's 'devices' directory + that point to the device's directory in the physical hierarchy:: + + /sys/bus/pci/devices/ + |-- 00:00.0 -> ../../../devices/pci0/00:00.0 + |-- 00:01.0 -> ../../../devices/pci0/00:01.0 + |-- 00:02.0 -> ../../../devices/pci0/00:02.0 + |-- 00:1e.0 -> ../../../devices/pci0/00:1e.0 + |-- 00:1f.0 -> ../../../devices/pci0/00:1f.0 + |-- 00:1f.1 -> ../../../devices/pci0/00:1f.1 + |-- 00:1f.2 -> ../../../devices/pci0/00:1f.2 + |-- 00:1f.3 -> ../../../devices/pci0/00:1f.3 + |-- 00:1f.5 -> ../../../devices/pci0/00:1f.5 + |-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0 + |-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0 + |-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0 + `-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0 + + + +Step 3: Registering Drivers. + +struct device_driver is a simple driver structure that contains a set +of operations that the driver model core may call. + + +- Embed a struct device_driver in the bus-specific driver. + + Just like with devices, do something like:: + + struct pci_driver { + ... + struct device_driver driver; + }; + + +- Initialize the generic driver structure. + + When the driver registers with the bus (e.g. doing pci_register_driver()), + initialize the necessary fields of the driver: the name and bus + fields. + + +- Register the driver. + + After the generic driver has been initialized, call:: + + driver_register(&drv->driver); + + to register the driver with the core. + + When the driver is unregistered from the bus, unregister it from the + core by doing:: + + driver_unregister(&drv->driver); + + Note that this will block until all references to the driver have + gone away. Normally, there will not be any. + + +- Sysfs representation. + + Drivers are exported via sysfs in their bus's 'driver's directory. + For example:: + + /sys/bus/pci/drivers/ + |-- 3c59x + |-- Ensoniq AudioPCI + |-- agpgart-amdk7 + |-- e100 + `-- serial + + +Step 4: Define Generic Methods for Drivers. + +struct device_driver defines a set of operations that the driver model +core calls. Most of these operations are probably similar to +operations the bus already defines for drivers, but taking different +parameters. + +It would be difficult and tedious to force every driver on a bus to +simultaneously convert their drivers to generic format. Instead, the +bus driver should define single instances of the generic methods that +forward call to the bus-specific drivers. For instance:: + + + static int pci_device_remove(struct device * dev) + { + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + + if (drv) { + if (drv->remove) + drv->remove(pci_dev); + pci_dev->driver = NULL; + } + return 0; + } + + +The generic driver should be initialized with these methods before it +is registered:: + + /* initialize common driver fields */ + drv->driver.name = drv->name; + drv->driver.bus = &pci_bus_type; + drv->driver.probe = pci_device_probe; + drv->driver.resume = pci_device_resume; + drv->driver.suspend = pci_device_suspend; + drv->driver.remove = pci_device_remove; + + /* register with core */ + driver_register(&drv->driver); + + +Ideally, the bus should only initialize the fields if they are not +already set. This allows the drivers to implement their own generic +methods. + + +Step 5: Support generic driver binding. + +The model assumes that a device or driver can be dynamically +registered with the bus at any time. When registration happens, +devices must be bound to a driver, or drivers must be bound to all +devices that it supports. + +A driver typically contains a list of device IDs that it supports. The +bus driver compares these IDs to the IDs of devices registered with it. +The format of the device IDs, and the semantics for comparing them are +bus-specific, so the generic model does attempt to generalize them. + +Instead, a bus may supply a method in struct bus_type that does the +comparison:: + + int (*match)(struct device * dev, struct device_driver * drv); + +match should return positive value if the driver supports the device, +and zero otherwise. It may also return error code (for example +-EPROBE_DEFER) if determining that given driver supports the device is +not possible. + +When a device is registered, the bus's list of drivers is iterated +over. bus->match() is called for each one until a match is found. + +When a driver is registered, the bus's list of devices is iterated +over. bus->match() is called for each device that is not already +claimed by a driver. + +When a device is successfully bound to a driver, device->driver is +set, the device is added to a per-driver list of devices, and a +symlink is created in the driver's sysfs directory that points to the +device's physical directory:: + + /sys/bus/pci/drivers/ + |-- 3c59x + | `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0 + |-- Ensoniq AudioPCI + |-- agpgart-amdk7 + | `-- 00:00.0 -> ../../../../devices/pci0/00:00.0 + |-- e100 + | `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0 + `-- serial + + +This driver binding should replace the existing driver binding +mechanism the bus currently uses. + + +Step 6: Supply a hotplug callback. + +Whenever a device is registered with the driver model core, the +userspace program /sbin/hotplug is called to notify userspace. +Users can define actions to perform when a device is inserted or +removed. + +The driver model core passes several arguments to userspace via +environment variables, including + +- ACTION: set to 'add' or 'remove' +- DEVPATH: set to the device's physical path in sysfs. + +A bus driver may also supply additional parameters for userspace to +consume. To do this, a bus must implement the 'hotplug' method in +struct bus_type:: + + int (*hotplug) (struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size); + +This is called immediately before /sbin/hotplug is executed. + + +Step 7: Cleaning up the bus driver. + +The generic bus, device, and driver structures provide several fields +that can replace those defined privately to the bus driver. + +- Device list. + +struct bus_type contains a list of all devices registered with the bus +type. This includes all devices on all instances of that bus type. +An internal list that the bus uses may be removed, in favor of using +this one. + +The core provides an iterator to access these devices:: + + int bus_for_each_dev(struct bus_type * bus, struct device * start, + void * data, int (*fn)(struct device *, void *)); + + +- Driver list. + +struct bus_type also contains a list of all drivers registered with +it. An internal list of drivers that the bus driver maintains may +be removed in favor of using the generic one. + +The drivers may be iterated over, like devices:: + + int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, + void * data, int (*fn)(struct device_driver *, void *)); + + +Please see drivers/base/bus.c for more information. + + +- rwsem + +struct bus_type contains an rwsem that protects all core accesses to +the device and driver lists. This can be used by the bus driver +internally, and should be used when accessing the device or driver +lists the bus maintains. + + +- Device and driver fields. + +Some of the fields in struct device and struct device_driver duplicate +fields in the bus-specific representations of these objects. Feel free +to remove the bus-specific ones and favor the generic ones. Note +though, that this will likely mean fixing up all the drivers that +reference the bus-specific fields (though those should all be 1-line +changes). diff --git a/Documentation/driver-api/early-userspace/buffer-format.rst b/Documentation/driver-api/early-userspace/buffer-format.rst new file mode 100644 index 0000000000..7f74e301fd --- /dev/null +++ b/Documentation/driver-api/early-userspace/buffer-format.rst @@ -0,0 +1,119 @@ +======================= +initramfs buffer format +======================= + +Al Viro, H. Peter Anvin + +Last revision: 2002-01-13 + +Starting with kernel 2.5.x, the old "initial ramdisk" protocol is +getting {replaced/complemented} with the new "initial ramfs" +(initramfs) protocol. The initramfs contents is passed using the same +memory buffer protocol used by the initrd protocol, but the contents +is different. The initramfs buffer contains an archive which is +expanded into a ramfs filesystem; this document details the format of +the initramfs buffer format. + +The initramfs buffer format is based around the "newc" or "crc" CPIO +formats, and can be created with the cpio(1) utility. The cpio +archive can be compressed using gzip(1). One valid version of an +initramfs buffer is thus a single .cpio.gz file. + +The full format of the initramfs buffer is defined by the following +grammar, where:: + + * is used to indicate "0 or more occurrences of" + (|) indicates alternatives + + indicates concatenation + GZIP() indicates the gzip(1) of the operand + ALGN(n) means padding with null bytes to an n-byte boundary + + initramfs := ("\0" | cpio_archive | cpio_gzip_archive)* + + cpio_gzip_archive := GZIP(cpio_archive) + + cpio_archive := cpio_file* + (<nothing> | cpio_trailer) + + cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data + + cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4) + + +In human terms, the initramfs buffer contains a collection of +compressed and/or uncompressed cpio archives (in the "newc" or "crc" +formats); arbitrary amounts zero bytes (for padding) can be added +between members. + +The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is +not ignored; see "handling of hard links" below. + +The structure of the cpio_header is as follows (all fields contain +hexadecimal ASCII numbers fully padded with '0' on the left to the +full width of the field, for example, the integer 4780 is represented +by the ASCII string "000012ac"): + +============= ================== ============================================== +Field name Field size Meaning +============= ================== ============================================== +c_magic 6 bytes The string "070701" or "070702" +c_ino 8 bytes File inode number +c_mode 8 bytes File mode and permissions +c_uid 8 bytes File uid +c_gid 8 bytes File gid +c_nlink 8 bytes Number of links +c_mtime 8 bytes Modification time +c_filesize 8 bytes Size of data field +c_maj 8 bytes Major part of file device number +c_min 8 bytes Minor part of file device number +c_rmaj 8 bytes Major part of device node reference +c_rmin 8 bytes Minor part of device node reference +c_namesize 8 bytes Length of filename, including final \0 +c_chksum 8 bytes Checksum of data field if c_magic is 070702; + otherwise zero +============= ================== ============================================== + +The c_mode field matches the contents of st_mode returned by stat(2) +on Linux, and encodes the file type and file permissions. + +The c_filesize should be zero for any file which is not a regular file +or symlink. + +The c_chksum field contains a simple 32-bit unsigned sum of all the +bytes in the data field. cpio(1) refers to this as "crc", which is +clearly incorrect (a cyclic redundancy check is a different and +significantly stronger integrity check), however, this is the +algorithm used. + +If the filename is "TRAILER!!!" this is actually an end-of-archive +marker; the c_filesize for an end-of-archive marker must be zero. + + +Handling of hard links +====================== + +When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino) +tuple is looked up in a tuple buffer. If not found, it is entered in +the tuple buffer and the entry is created as usual; if found, a hard +link rather than a second copy of the file is created. It is not +necessary (but permitted) to include a second copy of the file +contents; if the file contents is not included, the c_filesize field +should be set to zero to indicate no data section follows. If data is +present, the previous instance of the file is overwritten; this allows +the data-carrying instance of a file to occur anywhere in the sequence +(GNU cpio is reported to attach the data to the last instance of a +file only.) + +c_filesize must not be zero for a symlink. + +When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is +reset. This permits archives which are generated independently to be +concatenated. + +To combine file data from different sources (without having to +regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of +the following techniques can be used: + +a) Separate the different file data sources with a "TRAILER!!!" + end-of-archive marker, or + +b) Make sure c_nlink == 1 for all nondirectory entries. diff --git a/Documentation/driver-api/early-userspace/early_userspace_support.rst b/Documentation/driver-api/early-userspace/early_userspace_support.rst new file mode 100644 index 0000000000..61bdeac1ba --- /dev/null +++ b/Documentation/driver-api/early-userspace/early_userspace_support.rst @@ -0,0 +1,154 @@ +======================= +Early userspace support +======================= + +Last update: 2004-12-20 tlh + + +"Early userspace" is a set of libraries and programs that provide +various pieces of functionality that are important enough to be +available while a Linux kernel is coming up, but that don't need to be +run inside the kernel itself. + +It consists of several major infrastructure components: + +- gen_init_cpio, a program that builds a cpio-format archive + containing a root filesystem image. This archive is compressed, and + the compressed image is linked into the kernel image. +- initramfs, a chunk of code that unpacks the compressed cpio image + midway through the kernel boot process. +- klibc, a userspace C library, currently packaged separately, that is + optimized for correctness and small size. + +The cpio file format used by initramfs is the "newc" (aka "cpio -H newc") +format, and is documented in the file "buffer-format.txt". There are +two ways to add an early userspace image: specify an existing cpio +archive to be used as the image or have the kernel build process build +the image from specifications. + +CPIO ARCHIVE method +------------------- + +You can create a cpio archive that contains the early userspace image. +Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it +will be used directly. Only a single cpio file may be specified in +CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in +combination with a cpio archive. + +IMAGE BUILDING method +--------------------- + +The kernel build process can also build an early userspace image from +source parts rather than supplying a cpio archive. This method provides +a way to create images with root-owned files even though the image was +built by an unprivileged user. + +The image is specified as one or more sources in +CONFIG_INITRAMFS_SOURCE. Sources can be either directories or files - +cpio archives are *not* allowed when building from sources. + +A source directory will have it and all of its contents packaged. The +specified directory name will be mapped to '/'. When packaging a +directory, limited user and group ID translation can be performed. +INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to +user root (0). INITRAMFS_ROOT_GID can be set to a group ID that needs +to be mapped to group root (0). + +A source file must be directives in the format required by the +usr/gen_init_cpio utility (run 'usr/gen_init_cpio -h' to get the +file format). The directives in the file will be passed directly to +usr/gen_init_cpio. + +When a combination of directories and files are specified then the +initramfs image will be an aggregate of all of them. In this way a user +can create a 'root-image' directory and install all files into it. +Because device-special files cannot be created by a unprivileged user, +special files can be listed in a 'root-files' file. Both 'root-image' +and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete +early userspace image can be built by an unprivileged user. + +As a technical note, when directories and files are specified, the +entire CONFIG_INITRAMFS_SOURCE is passed to +usr/gen_initramfs.sh. This means that CONFIG_INITRAMFS_SOURCE +can really be interpreted as any legal argument to +gen_initramfs.sh. If a directory is specified as an argument then +the contents are scanned, uid/gid translation is performed, and +usr/gen_init_cpio file directives are output. If a directory is +specified as an argument to usr/gen_initramfs.sh then the +contents of the file are simply copied to the output. All of the output +directives from directory scanning and file contents copying are +processed by usr/gen_init_cpio. + +See also 'usr/gen_initramfs.sh -h'. + +Where's this all leading? +========================= + +The klibc distribution contains some of the necessary software to make +early userspace useful. The klibc distribution is currently +maintained separately from the kernel. + +You can obtain somewhat infrequent snapshots of klibc from +https://www.kernel.org/pub/linux/libs/klibc/ + +For active users, you are better off using the klibc git +repository, at https://git.kernel.org/?p=libs/klibc/klibc.git + +The standalone klibc distribution currently provides three components, +in addition to the klibc library: + +- ipconfig, a program that configures network interfaces. It can + configure them statically, or use DHCP to obtain information + dynamically (aka "IP autoconfiguration"). +- nfsmount, a program that can mount an NFS filesystem. +- kinit, the "glue" that uses ipconfig and nfsmount to replace the old + support for IP autoconfig, mount a filesystem over NFS, and continue + system boot using that filesystem as root. + +kinit is built as a single statically linked binary to save space. + +Eventually, several more chunks of kernel functionality will hopefully +move to early userspace: + +- Almost all of init/do_mounts* (the beginning of this is already in + place) +- ACPI table parsing +- Insert unwieldy subsystem that doesn't really need to be in kernel + space here + +If kinit doesn't meet your current needs and you've got bytes to burn, +the klibc distribution includes a small Bourne-compatible shell (ash) +and a number of other utilities, so you can replace kinit and build +custom initramfs images that meet your needs exactly. + +For questions and help, you can sign up for the early userspace +mailing list at https://www.zytor.com/mailman/listinfo/klibc + +How does it work? +================= + +The kernel has currently 3 ways to mount the root filesystem: + +a) all required device and filesystem drivers compiled into the kernel, no + initrd. init/main.c:init() will call prepare_namespace() to mount the + final root filesystem, based on the root= option and optional init= to run + some other init binary than listed at the end of init/main.c:init(). + +b) some device and filesystem drivers built as modules and stored in an + initrd. The initrd must contain a binary '/linuxrc' which is supposed to + load these driver modules. It is also possible to mount the final root + filesystem via linuxrc and use the pivot_root syscall. The initrd is + mounted and executed via prepare_namespace(). + +c) using initramfs. The call to prepare_namespace() must be skipped. + This means that a binary must do all the work. Said binary can be stored + into initramfs either via modifying usr/gen_init_cpio.c or via the new + initrd format, an cpio archive. It must be called "/init". This binary + is responsible to do all the things prepare_namespace() would do. + + To maintain backwards compatibility, the /init binary will only run if it + comes via an initramfs cpio archive. If this is not the case, + init/main.c:init() will run prepare_namespace() to mount the final root + and exec one of the predefined init binaries. + +Bryan O'Sullivan <bos@serpentine.com> diff --git a/Documentation/driver-api/early-userspace/index.rst b/Documentation/driver-api/early-userspace/index.rst new file mode 100644 index 0000000000..149c1822f0 --- /dev/null +++ b/Documentation/driver-api/early-userspace/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +Early Userspace +=============== + +.. toctree:: + :maxdepth: 1 + + early_userspace_support + buffer-format + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst new file mode 100644 index 0000000000..f4f044b95c --- /dev/null +++ b/Documentation/driver-api/edac.rst @@ -0,0 +1,298 @@ +Error Detection And Correction (EDAC) Devices +============================================= + +Main Concepts used at the EDAC subsystem +---------------------------------------- + +There are several things to be aware of that aren't at all obvious, like +*sockets, *socket sets*, *banks*, *rows*, *chip-select rows*, *channels*, +etc... + +These are some of the many terms that are thrown about that don't always +mean what people think they mean (Inconceivable!). In the interest of +creating a common ground for discussion, terms and their definitions +will be established. + +* Memory devices + +The individual DRAM chips on a memory stick. These devices commonly +output 4 and 8 bits each (x4, x8). Grouping several of these in parallel +provides the number of bits that the memory controller expects: +typically 72 bits, in order to provide 64 bits + 8 bits of ECC data. + +* Memory Stick + +A printed circuit board that aggregates multiple memory devices in +parallel. In general, this is the Field Replaceable Unit (FRU) which +gets replaced, in the case of excessive errors. Most often it is also +called DIMM (Dual Inline Memory Module). + +* Memory Socket + +A physical connector on the motherboard that accepts a single memory +stick. Also called as "slot" on several datasheets. + +* Channel + +A memory controller channel, responsible to communicate with a group of +DIMMs. Each channel has its own independent control (command) and data +bus, and can be used independently or grouped with other channels. + +* Branch + +It is typically the highest hierarchy on a Fully-Buffered DIMM memory +controller. Typically, it contains two channels. Two channels at the +same branch can be used in single mode or in lockstep mode. When +lockstep is enabled, the cacheline is doubled, but it generally brings +some performance penalty. Also, it is generally not possible to point to +just one memory stick when an error occurs, as the error correction code +is calculated using two DIMMs instead of one. Due to that, it is capable +of correcting more errors than on single mode. + +* Single-channel + +The data accessed by the memory controller is contained into one dimm +only. E. g. if the data is 64 bits-wide, the data flows to the CPU using +one 64 bits parallel access. Typically used with SDR, DDR, DDR2 and DDR3 +memories. FB-DIMM and RAMBUS use a different concept for channel, so +this concept doesn't apply there. + +* Double-channel + +The data size accessed by the memory controller is interlaced into two +dimms, accessed at the same time. E. g. if the DIMM is 64 bits-wide (72 +bits with ECC), the data flows to the CPU using a 128 bits parallel +access. + +* Chip-select row + +This is the name of the DRAM signal used to select the DRAM ranks to be +accessed. Common chip-select rows for single channel are 64 bits, for +dual channel 128 bits. It may not be visible by the memory controller, +as some DIMM types have a memory buffer that can hide direct access to +it from the Memory Controller. + +* Single-Ranked stick + +A Single-ranked stick has 1 chip-select row of memory. Motherboards +commonly drive two chip-select pins to a memory stick. A single-ranked +stick, will occupy only one of those rows. The other will be unused. + +.. _doubleranked: + +* Double-Ranked stick + +A double-ranked stick has two chip-select rows which access different +sets of memory devices. The two rows cannot be accessed concurrently. + +* Double-sided stick + +**DEPRECATED TERM**, see :ref:`Double-Ranked stick <doubleranked>`. + +A double-sided stick has two chip-select rows which access different sets +of memory devices. The two rows cannot be accessed concurrently. +"Double-sided" is irrespective of the memory devices being mounted on +both sides of the memory stick. + +* Socket set + +All of the memory sticks that are required for a single memory access or +all of the memory sticks spanned by a chip-select row. A single socket +set has two chip-select rows and if double-sided sticks are used these +will occupy those chip-select rows. + +* Bank + +This term is avoided because it is unclear when needing to distinguish +between chip-select rows and socket sets. + +* High Bandwidth Memory (HBM) + +HBM is a new memory type with low power consumption and ultra-wide +communication lanes. It uses vertically stacked memory chips (DRAM dies) +interconnected by microscopic wires called "through-silicon vias," or +TSVs. + +Several stacks of HBM chips connect to the CPU or GPU through an ultra-fast +interconnect called the "interposer". Therefore, HBM's characteristics +are nearly indistinguishable from on-chip integrated RAM. + +Memory Controllers +------------------ + +Most of the EDAC core is focused on doing Memory Controller error detection. +The :c:func:`edac_mc_alloc`. It uses internally the struct ``mem_ctl_info`` +to describe the memory controllers, with is an opaque struct for the EDAC +drivers. Only the EDAC core is allowed to touch it. + +.. kernel-doc:: include/linux/edac.h + +.. kernel-doc:: drivers/edac/edac_mc.h + +PCI Controllers +--------------- + +The EDAC subsystem provides a mechanism to handle PCI controllers by calling +the :c:func:`edac_pci_alloc_ctl_info`. It will use the struct +:c:type:`edac_pci_ctl_info` to describe the PCI controllers. + +.. kernel-doc:: drivers/edac/edac_pci.h + +EDAC Blocks +----------- + +The EDAC subsystem also provides a generic mechanism to report errors on +other parts of the hardware via :c:func:`edac_device_alloc_ctl_info` function. + +The structures :c:type:`edac_dev_sysfs_block_attribute`, +:c:type:`edac_device_block`, :c:type:`edac_device_instance` and +:c:type:`edac_device_ctl_info` provide a generic or abstract 'edac_device' +representation at sysfs. + +This set of structures and the code that implements the APIs for the same, provide for registering EDAC type devices which are NOT standard memory or +PCI, like: + +- CPU caches (L1 and L2) +- DMA engines +- Core CPU switches +- Fabric switch units +- PCIe interface controllers +- other EDAC/ECC type devices that can be monitored for + errors, etc. + +It allows for a 2 level set of hierarchy. + +For example, a cache could be composed of L1, L2 and L3 levels of cache. +Each CPU core would have its own L1 cache, while sharing L2 and maybe L3 +caches. On such case, those can be represented via the following sysfs +nodes:: + + /sys/devices/system/edac/.. + + pci/ <existing pci directory (if available)> + mc/ <existing memory device directory> + cpu/cpu0/.. <L1 and L2 block directory> + /L1-cache/ce_count + /ue_count + /L2-cache/ce_count + /ue_count + cpu/cpu1/.. <L1 and L2 block directory> + /L1-cache/ce_count + /ue_count + /L2-cache/ce_count + /ue_count + ... + + the L1 and L2 directories would be "edac_device_block's" + +.. kernel-doc:: drivers/edac/edac_device.h + + +Heterogeneous system support +---------------------------- + +An AMD heterogeneous system is built by connecting the data fabrics of +both CPUs and GPUs via custom xGMI links. Thus, the data fabric on the +GPU nodes can be accessed the same way as the data fabric on CPU nodes. + +The MI200 accelerators are data center GPUs. They have 2 data fabrics, +and each GPU data fabric contains four Unified Memory Controllers (UMC). +Each UMC contains eight channels. Each UMC channel controls one 128-bit +HBM2e (2GB) channel (equivalent to 8 X 2GB ranks). This creates a total +of 4096-bits of DRAM data bus. + +While the UMC is interfacing a 16GB (8high X 2GB DRAM) HBM stack, each UMC +channel is interfacing 2GB of DRAM (represented as rank). + +Memory controllers on AMD GPU nodes can be represented in EDAC thusly: + + GPU DF / GPU Node -> EDAC MC + GPU UMC -> EDAC CSROW + GPU UMC channel -> EDAC CHANNEL + +For example: a heterogeneous system with 1 AMD CPU is connected to +4 MI200 (Aldebaran) GPUs using xGMI. + +Some more heterogeneous hardware details: + +- The CPU UMC (Unified Memory Controller) is mostly the same as the GPU UMC. + They have chip selects (csrows) and channels. However, the layouts are different + for performance, physical layout, or other reasons. +- CPU UMCs use 1 channel, In this case UMC = EDAC channel. This follows the + marketing speak. CPU has X memory channels, etc. +- CPU UMCs use up to 4 chip selects, So UMC chip select = EDAC CSROW. +- GPU UMCs use 1 chip select, So UMC = EDAC CSROW. +- GPU UMCs use 8 channels, So UMC channel = EDAC channel. + +The EDAC subsystem provides a mechanism to handle AMD heterogeneous +systems by calling system specific ops for both CPUs and GPUs. + +AMD GPU nodes are enumerated in sequential order based on the PCI +hierarchy, and the first GPU node is assumed to have a Node ID value +following those of the CPU nodes after latter are fully populated:: + + $ ls /sys/devices/system/edac/mc/ + mc0 - CPU MC node 0 + mc1 | + mc2 |- GPU card[0] => node 0(mc1), node 1(mc2) + mc3 | + mc4 |- GPU card[1] => node 0(mc3), node 1(mc4) + mc5 | + mc6 |- GPU card[2] => node 0(mc5), node 1(mc6) + mc7 | + mc8 |- GPU card[3] => node 0(mc7), node 1(mc8) + +For example, a heterogeneous system with one AMD CPU is connected to +four MI200 (Aldebaran) GPUs using xGMI. This topology can be represented +via the following sysfs entries:: + + /sys/devices/system/edac/mc/.. + + CPU # CPU node + ├── mc 0 + + GPU Nodes are enumerated sequentially after CPU nodes have been populated + GPU card 1 # Each MI200 GPU has 2 nodes/mcs + ├── mc 1 # GPU node 0 == mc1, Each MC node has 4 UMCs/CSROWs + │ ├── csrow 0 # UMC 0 + │ │ ├── channel 0 # Each UMC has 8 channels + │ │ ├── channel 1 # size of each channel is 2 GB, so each UMC has 16 GB + │ │ ├── channel 2 + │ │ ├── channel 3 + │ │ ├── channel 4 + │ │ ├── channel 5 + │ │ ├── channel 6 + │ │ ├── channel 7 + │ ├── csrow 1 # UMC 1 + │ │ ├── channel 0 + │ │ ├── .. + │ │ ├── channel 7 + │ ├── .. .. + │ ├── csrow 3 # UMC 3 + │ │ ├── channel 0 + │ │ ├── .. + │ │ ├── channel 7 + │ ├── rank 0 + │ ├── .. .. + │ ├── rank 31 # total 32 ranks/dimms from 4 UMCs + ├ + ├── mc 2 # GPU node 1 == mc2 + │ ├── .. # each GPU has total 64 GB + + GPU card 2 + ├── mc 3 + │ ├── .. + ├── mc 4 + │ ├── .. + + GPU card 3 + ├── mc 5 + │ ├── .. + ├── mc 6 + │ ├── .. + + GPU card 4 + ├── mc 7 + │ ├── .. + ├── mc 8 + │ ├── .. diff --git a/Documentation/driver-api/eisa.rst b/Documentation/driver-api/eisa.rst new file mode 100644 index 0000000000..3eac11b7eb --- /dev/null +++ b/Documentation/driver-api/eisa.rst @@ -0,0 +1,230 @@ +================ +EISA bus support +================ + +:Author: Marc Zyngier <maz@wild-wind.fr.eu.org> + +This document groups random notes about porting EISA drivers to the +new EISA/sysfs API. + +Starting from version 2.5.59, the EISA bus is almost given the same +status as other much more mainstream busses such as PCI or USB. This +has been possible through sysfs, which defines a nice enough set of +abstractions to manage busses, devices and drivers. + +Although the new API is quite simple to use, converting existing +drivers to the new infrastructure is not an easy task (mostly because +detection code is generally also used to probe ISA cards). Moreover, +most EISA drivers are among the oldest Linux drivers so, as you can +imagine, some dust has settled here over the years. + +The EISA infrastructure is made up of three parts: + + - The bus code implements most of the generic code. It is shared + among all the architectures that the EISA code runs on. It + implements bus probing (detecting EISA cards available on the bus), + allocates I/O resources, allows fancy naming through sysfs, and + offers interfaces for driver to register. + + - The bus root driver implements the glue between the bus hardware + and the generic bus code. It is responsible for discovering the + device implementing the bus, and setting it up to be latter probed + by the bus code. This can go from something as simple as reserving + an I/O region on x86, to the rather more complex, like the hppa + EISA code. This is the part to implement in order to have EISA + running on an "new" platform. + + - The driver offers the bus a list of devices that it manages, and + implements the necessary callbacks to probe and release devices + whenever told to. + +Every function/structure below lives in <linux/eisa.h>, which depends +heavily on <linux/device.h>. + +Bus root driver +=============== + +:: + + int eisa_root_register (struct eisa_root_device *root); + +The eisa_root_register function is used to declare a device as the +root of an EISA bus. The eisa_root_device structure holds a reference +to this device, as well as some parameters for probing purposes:: + + struct eisa_root_device { + struct device *dev; /* Pointer to bridge device */ + struct resource *res; + unsigned long bus_base_addr; + int slots; /* Max slot number */ + int force_probe; /* Probe even when no slot 0 */ + u64 dma_mask; /* from bridge device */ + int bus_nr; /* Set by eisa_root_register */ + struct resource eisa_root_res; /* ditto */ + }; + +============= ====================================================== +node used for eisa_root_register internal purpose +dev pointer to the root device +res root device I/O resource +bus_base_addr slot 0 address on this bus +slots max slot number to probe +force_probe Probe even when slot 0 is empty (no EISA mainboard) +dma_mask Default DMA mask. Usually the bridge device dma_mask. +bus_nr unique bus id, set by eisa_root_register +============= ====================================================== + +Driver +====== + +:: + + int eisa_driver_register (struct eisa_driver *edrv); + void eisa_driver_unregister (struct eisa_driver *edrv); + +Clear enough ? + +:: + + struct eisa_device_id { + char sig[EISA_SIG_LEN]; + unsigned long driver_data; + }; + + struct eisa_driver { + const struct eisa_device_id *id_table; + struct device_driver driver; + }; + +=============== ==================================================== +id_table an array of NULL terminated EISA id strings, + followed by an empty string. Each string can + optionally be paired with a driver-dependent value + (driver_data). + +driver a generic driver, such as described in + Documentation/driver-api/driver-model/driver.rst. Only .name, + .probe and .remove members are mandatory. +=============== ==================================================== + +An example is the 3c59x driver:: + + static struct eisa_device_id vortex_eisa_ids[] = { + { "TCM5920", EISA_3C592_OFFSET }, + { "TCM5970", EISA_3C597_OFFSET }, + { "" } + }; + + static struct eisa_driver vortex_eisa_driver = { + .id_table = vortex_eisa_ids, + .driver = { + .name = "3c59x", + .probe = vortex_eisa_probe, + .remove = vortex_eisa_remove + } + }; + +Device +====== + +The sysfs framework calls .probe and .remove functions upon device +discovery and removal (note that the .remove function is only called +when driver is built as a module). + +Both functions are passed a pointer to a 'struct device', which is +encapsulated in a 'struct eisa_device' described as follows:: + + struct eisa_device { + struct eisa_device_id id; + int slot; + int state; + unsigned long base_addr; + struct resource res[EISA_MAX_RESOURCES]; + u64 dma_mask; + struct device dev; /* generic device */ + }; + +======== ============================================================ +id EISA id, as read from device. id.driver_data is set from the + matching driver EISA id. +slot slot number which the device was detected on +state set of flags indicating the state of the device. Current + flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED. +res set of four 256 bytes I/O regions allocated to this device +dma_mask DMA mask set from the parent device. +dev generic device (see Documentation/driver-api/driver-model/device.rst) +======== ============================================================ + +You can get the 'struct eisa_device' from 'struct device' using the +'to_eisa_device' macro. + +Misc stuff +========== + +:: + + void eisa_set_drvdata (struct eisa_device *edev, void *data); + +Stores data into the device's driver_data area. + +:: + + void *eisa_get_drvdata (struct eisa_device *edev): + +Gets the pointer previously stored into the device's driver_data area. + +:: + + int eisa_get_region_index (void *addr); + +Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given +address. + +Kernel parameters +================= + +eisa_bus.enable_dev + A comma-separated list of slots to be enabled, even if the firmware + set the card as disabled. The driver must be able to properly + initialize the device in such conditions. + +eisa_bus.disable_dev + A comma-separated list of slots to be disabled, even if the firmware + set the card as enabled. The driver won't be called to handle this + device. + +virtual_root.force_probe + Force the probing code to probe EISA slots even when it cannot find an + EISA compliant mainboard (nothing appears on slot 0). Defaults to 0 + (don't force), and set to 1 (force probing) when either + CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set. + +Random notes +============ + +Converting an EISA driver to the new API mostly involves *deleting* +code (since probing is now in the core EISA code). Unfortunately, most +drivers share their probing routine between ISA, and EISA. Special +care must be taken when ripping out the EISA code, so other busses +won't suffer from these surgical strikes... + +You *must not* expect any EISA device to be detected when returning +from eisa_driver_register, since the chances are that the bus has not +yet been probed. In fact, that's what happens most of the time (the +bus root driver usually kicks in rather late in the boot process). +Unfortunately, most drivers are doing the probing by themselves, and +expect to have explored the whole machine when they exit their probe +routine. + +For example, switching your favorite EISA SCSI card to the "hotplug" +model is "the right thing"(tm). + +Thanks +====== + +I'd like to thank the following people for their help: + +- Xavier Benigni for lending me a wonderful Alpha Jensen, +- James Bottomley, Jeff Garzik for getting this stuff into the kernel, +- Andries Brouwer for contributing numerous EISA ids, +- Catrin Jones for coping with far too many machines at home. diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst new file mode 100644 index 0000000000..d3cfa73cbb --- /dev/null +++ b/Documentation/driver-api/firewire.rst @@ -0,0 +1,48 @@ +=========================================== +Firewire (IEEE 1394) driver Interface Guide +=========================================== + +Introduction and Overview +========================= + +The Linux FireWire subsystem adds some interfaces into the Linux system to + use/maintain+any resource on IEEE 1394 bus. + +The main purpose of these interfaces is to access address space on each node +on IEEE 1394 bus by ISO/IEC 13213 (IEEE 1212) procedure, and to control +isochronous resources on the bus by IEEE 1394 procedure. + +Two types of interfaces are added, according to consumers of the interface. A +set of userspace interfaces is available via `firewire character devices`. A set +of kernel interfaces is available via exported symbols in `firewire-core` module. + +Firewire char device data structures +==================================== + +.. include:: ../ABI/stable/firewire-cdev + :literal: + +.. kernel-doc:: include/uapi/linux/firewire-cdev.h + :internal: + +Firewire device probing and sysfs interfaces +============================================ + +.. include:: ../ABI/stable/sysfs-bus-firewire + :literal: + +.. kernel-doc:: drivers/firewire/core-device.c + :export: + +Firewire core transaction interfaces +==================================== + +.. kernel-doc:: drivers/firewire/core-transaction.c + :export: + +Firewire Isochronous I/O interfaces +=================================== + +.. kernel-doc:: drivers/firewire/core-iso.c + :export: + diff --git a/Documentation/driver-api/firmware/built-in-fw.rst b/Documentation/driver-api/firmware/built-in-fw.rst new file mode 100644 index 0000000000..bc1c961bac --- /dev/null +++ b/Documentation/driver-api/firmware/built-in-fw.rst @@ -0,0 +1,33 @@ +================= +Built-in firmware +================= + +Firmware can be built-in to the kernel, this means building the firmware +into vmlinux directly, to enable avoiding having to look for firmware from +the filesystem. Instead, firmware can be looked for inside the kernel +directly. You can enable built-in firmware using the kernel configuration +options: + + * CONFIG_EXTRA_FIRMWARE + * CONFIG_EXTRA_FIRMWARE_DIR + +There are a few reasons why you might want to consider building your firmware +into the kernel with CONFIG_EXTRA_FIRMWARE: + +* Speed +* Firmware is needed for accessing the boot device, and the user doesn't + want to stuff the firmware into the boot initramfs. + +Even if you have these needs there are a few reasons why you may not be +able to make use of built-in firmware: + +* Legalese - firmware is non-GPL compatible +* Some firmware may be optional +* Firmware upgrades are possible, therefore a new firmware would implicate + a complete kernel rebuild. +* Some firmware files may be really large in size. The remote-proc subsystem + is an example subsystem which deals with these sorts of firmware +* The firmware may need to be scraped out from some device specific location + dynamically, an example is calibration data for some WiFi chipsets. This + calibration data can be unique per sold device. + diff --git a/Documentation/driver-api/firmware/core.rst b/Documentation/driver-api/firmware/core.rst new file mode 100644 index 0000000000..803cd574bb --- /dev/null +++ b/Documentation/driver-api/firmware/core.rst @@ -0,0 +1,17 @@ +========================== +Firmware API core features +========================== + +The firmware API has a rich set of core features available. This section +documents these features. + +.. toctree:: + + fw_search_path + built-in-fw + firmware_cache + direct-fs-lookup + fallback-mechanisms + lookup-order + firmware-usage-guidelines + diff --git a/Documentation/driver-api/firmware/direct-fs-lookup.rst b/Documentation/driver-api/firmware/direct-fs-lookup.rst new file mode 100644 index 0000000000..e04353d1b0 --- /dev/null +++ b/Documentation/driver-api/firmware/direct-fs-lookup.rst @@ -0,0 +1,30 @@ +======================== +Direct filesystem lookup +======================== + +Direct filesystem lookup is the most common form of firmware lookup performed +by the kernel. The kernel looks for the firmware directly on the root +filesystem in the paths documented in the section 'Firmware search paths'. +The filesystem lookup is implemented in fw_get_filesystem_firmware(), it +uses common core kernel file loader facility kernel_read_file_from_path(). +The max path allowed is PATH_MAX -- currently this is 4096 characters. + +It is recommended you keep /lib/firmware paths on your root filesystem, +avoid having a separate partition for them in order to avoid possible +races with lookups and avoid uses of the custom fallback mechanisms +documented below. + +Firmware and initramfs +---------------------- + +Drivers which are built-in to the kernel should have the firmware integrated +also as part of the initramfs used to boot the kernel given that otherwise +a race is possible with loading the driver and the real rootfs not yet being +available. Stuffing the firmware into initramfs resolves this race issue, +however note that using initrd does not suffice to address the same race. + +There are circumstances that justify not wanting to include firmware into +initramfs, such as dealing with large firmware files for the +remote-proc subsystem. For such cases using a userspace fallback mechanism +is currently the only viable solution as only userspace can know for sure +when the real rootfs is ready and mounted. diff --git a/Documentation/driver-api/firmware/efi/index.rst b/Documentation/driver-api/firmware/efi/index.rst new file mode 100644 index 0000000000..4fe8abba9f --- /dev/null +++ b/Documentation/driver-api/firmware/efi/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============ +UEFI Support +============ + +UEFI stub library functions +=========================== + +.. kernel-doc:: drivers/firmware/efi/libstub/mem.c + :internal: diff --git a/Documentation/driver-api/firmware/fallback-mechanisms.rst b/Documentation/driver-api/firmware/fallback-mechanisms.rst new file mode 100644 index 0000000000..5f04c3bcdf --- /dev/null +++ b/Documentation/driver-api/firmware/fallback-mechanisms.rst @@ -0,0 +1,308 @@ +=================== +Fallback mechanisms +=================== + +A fallback mechanism is supported to allow to overcome failures to do a direct +filesystem lookup on the root filesystem or when the firmware simply cannot be +installed for practical reasons on the root filesystem. The kernel +configuration options related to supporting the firmware fallback mechanism are: + + * CONFIG_FW_LOADER_USER_HELPER: enables building the firmware fallback + mechanism. Most distributions enable this option today. If enabled but + CONFIG_FW_LOADER_USER_HELPER_FALLBACK is disabled, only the custom fallback + mechanism is available and for the request_firmware_nowait() call. + * CONFIG_FW_LOADER_USER_HELPER_FALLBACK: force enables each request to + enable the kobject uevent fallback mechanism on all firmware API calls + except request_firmware_direct(). Most distributions disable this option + today. The call request_firmware_nowait() allows for one alternative + fallback mechanism: if this kconfig option is enabled and your second + argument to request_firmware_nowait(), uevent, is set to false you are + informing the kernel that you have a custom fallback mechanism and it will + manually load the firmware. Read below for more details. + +Note that this means when having this configuration: + +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n + +the kobject uevent fallback mechanism will never take effect even +for request_firmware_nowait() when uevent is set to true. + +Justifying the firmware fallback mechanism +========================================== + +Direct filesystem lookups may fail for a variety of reasons. Known reasons for +this are worth itemizing and documenting as it justifies the need for the +fallback mechanism: + +* Race against access with the root filesystem upon bootup. + +* Races upon resume from suspend. This is resolved by the firmware cache, but + the firmware cache is only supported if you use uevents, and its not + supported for request_firmware_into_buf(). + +* Firmware is not accessible through typical means: + + * It cannot be installed into the root filesystem + * The firmware provides very unique device specific data tailored for + the unit gathered with local information. An example is calibration + data for WiFi chipsets for mobile devices. This calibration data is + not common to all units, but tailored per unit. Such information may + be installed on a separate flash partition other than where the root + filesystem is provided. + +Types of fallback mechanisms +============================ + +There are really two fallback mechanisms available using one shared sysfs +interface as a loading facility: + +* Kobject uevent fallback mechanism +* Custom fallback mechanism + +First lets document the shared sysfs loading facility. + +Firmware sysfs loading facility +=============================== + +In order to help device drivers upload firmware using a fallback mechanism +the firmware infrastructure creates a sysfs interface to enable userspace +to load and indicate when firmware is ready. The sysfs directory is created +via fw_create_instance(). This call creates a new struct device named after +the firmware requested, and establishes it in the device hierarchy by +associating the device used to make the request as the device's parent. +The sysfs directory's file attributes are defined and controlled through +the new device's class (firmware_class) and group (fw_dev_attr_groups). +This is actually where the original firmware_class module name came from, +given that originally the only firmware loading mechanism available was the +mechanism we now use as a fallback mechanism, which registers a struct class +firmware_class. Because the attributes exposed are part of the module name, the +module name firmware_class cannot be renamed in the future, to ensure backward +compatibility with old userspace. + +To load firmware using the sysfs interface we expose a loading indicator, +and a file upload firmware into: + + * /sys/$DEVPATH/loading + * /sys/$DEVPATH/data + +To upload firmware you will echo 1 onto the loading file to indicate +you are loading firmware. You then write the firmware into the data file, +and you notify the kernel the firmware is ready by echo'ing 0 onto +the loading file. + +The firmware device used to help load firmware using sysfs is only created if +direct firmware loading fails and if the fallback mechanism is enabled for your +firmware request, this is set up with :c:func:`firmware_fallback_sysfs`. It is +important to re-iterate that no device is created if a direct filesystem lookup +succeeded. + +Using:: + + echo 1 > /sys/$DEVPATH/loading + +Will clean any previous partial load at once and make the firmware API +return an error. When loading firmware the firmware_class grows a buffer +for the firmware in PAGE_SIZE increments to hold the image as it comes in. + +firmware_data_read() and firmware_loading_show() are just provided for the +test_firmware driver for testing, they are not called in normal use or +expected to be used regularly by userspace. + +firmware_fallback_sysfs +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/fallback.c + :functions: firmware_fallback_sysfs + +Firmware kobject uevent fallback mechanism +========================================== + +Since a device is created for the sysfs interface to help load firmware as a +fallback mechanism userspace can be informed of the addition of the device by +relying on kobject uevents. The addition of the device into the device +hierarchy means the fallback mechanism for firmware loading has been initiated. +For details of implementation refer to fw_load_sysfs_fallback(), in particular +on the use of dev_set_uevent_suppress() and kobject_uevent(). + +The kernel's kobject uevent mechanism is implemented in lib/kobject_uevent.c, +it issues uevents to userspace. As a supplement to kobject uevents Linux +distributions could also enable CONFIG_UEVENT_HELPER_PATH, which makes use of +core kernel's usermode helper (UMH) functionality to call out to a userspace +helper for kobject uevents. In practice though no standard distribution has +ever used the CONFIG_UEVENT_HELPER_PATH. If CONFIG_UEVENT_HELPER_PATH is +enabled this binary would be called each time kobject_uevent_env() gets called +in the kernel for each kobject uevent triggered. + +Different implementations have been supported in userspace to take advantage of +this fallback mechanism. When firmware loading was only possible using the +sysfs mechanism the userspace component "hotplug" provided the functionality of +monitoring for kobject events. Historically this was superseded be systemd's +udev, however firmware loading support was removed from udev as of systemd +commit be2ea723b1d0 ("udev: remove userspace firmware loading support") +as of v217 on August, 2014. This means most Linux distributions today are +not using or taking advantage of the firmware fallback mechanism provided +by kobject uevents. This is specially exacerbated due to the fact that most +distributions today disable CONFIG_FW_LOADER_USER_HELPER_FALLBACK. + +Refer to do_firmware_uevent() for details of the kobject event variables +setup. The variables currently passed to userspace with a "kobject add" +event are: + +* FIRMWARE=firmware name +* TIMEOUT=timeout value +* ASYNC=whether or not the API request was asynchronous + +By default DEVPATH is set by the internal kernel kobject infrastructure. +Below is an example simple kobject uevent script:: + + # Both $DEVPATH and $FIRMWARE are already provided in the environment. + MY_FW_DIR=/lib/firmware/ + echo 1 > /sys/$DEVPATH/loading + cat $MY_FW_DIR/$FIRMWARE > /sys/$DEVPATH/data + echo 0 > /sys/$DEVPATH/loading + +Firmware custom fallback mechanism +================================== + +Users of the request_firmware_nowait() call have yet another option available +at their disposal: rely on the sysfs fallback mechanism but request that no +kobject uevents be issued to userspace. The original logic behind this +was that utilities other than udev might be required to lookup firmware +in non-traditional paths -- paths outside of the listing documented in the +section 'Direct filesystem lookup'. This option is not available to any of +the other API calls as uevents are always forced for them. + +Since uevents are only meaningful if the fallback mechanism is enabled +in your kernel it would seem odd to enable uevents with kernels that do not +have the fallback mechanism enabled in their kernels. Unfortunately we also +rely on the uevent flag which can be disabled by request_firmware_nowait() to +also setup the firmware cache for firmware requests. As documented above, +the firmware cache is only set up if uevent is enabled for an API call. +Although this can disable the firmware cache for request_firmware_nowait() +calls, users of this API should not use it for the purposes of disabling +the cache as that was not the original purpose of the flag. Not setting +the uevent flag means you want to opt-in for the firmware fallback mechanism +but you want to suppress kobject uevents, as you have a custom solution which +will monitor for your device addition into the device hierarchy somehow and +load firmware for you through a custom path. + +Firmware fallback timeout +========================= + +The firmware fallback mechanism has a timeout. If firmware is not loaded +onto the sysfs interface by the timeout value an error is sent to the +driver. By default the timeout is set to 60 seconds if uevents are +desirable, otherwise MAX_JIFFY_OFFSET is used (max timeout possible). +The logic behind using MAX_JIFFY_OFFSET for non-uevents is that a custom +solution will have as much time as it needs to load firmware. + +You can customize the firmware timeout by echo'ing your desired timeout into +the following file: + +* /sys/class/firmware/timeout + +If you echo 0 into it means MAX_JIFFY_OFFSET will be used. The data type +for the timeout is an int. + +EFI embedded firmware fallback mechanism +======================================== + +On some devices the system's EFI code / ROM may contain an embedded copy +of firmware for some of the system's integrated peripheral devices and +the peripheral's Linux device-driver needs to access this firmware. + +Device drivers which need such firmware can use the +firmware_request_platform() function for this, note that this is a +separate fallback mechanism from the other fallback mechanisms and +this does not use the sysfs interface. + +A device driver which needs this can describe the firmware it needs +using an efi_embedded_fw_desc struct: + +.. kernel-doc:: include/linux/efi_embedded_fw.h + :functions: efi_embedded_fw_desc + +The EFI embedded-fw code works by scanning all EFI_BOOT_SERVICES_CODE memory +segments for an eight byte sequence matching prefix; if the prefix is found it +then does a sha256 over length bytes and if that matches makes a copy of length +bytes and adds that to its list with found firmwares. + +To avoid doing this somewhat expensive scan on all systems, dmi matching is +used. Drivers are expected to export a dmi_system_id array, with each entries' +driver_data pointing to an efi_embedded_fw_desc. + +To register this array with the efi-embedded-fw code, a driver needs to: + +1. Always be builtin to the kernel or store the dmi_system_id array in a + separate object file which always gets builtin. + +2. Add an extern declaration for the dmi_system_id array to + include/linux/efi_embedded_fw.h. + +3. Add the dmi_system_id array to the embedded_fw_table in + drivers/firmware/efi/embedded-firmware.c wrapped in a #ifdef testing that + the driver is being builtin. + +4. Add "select EFI_EMBEDDED_FIRMWARE if EFI_STUB" to its Kconfig entry. + +The firmware_request_platform() function will always first try to load firmware +with the specified name directly from the disk, so the EFI embedded-fw can +always be overridden by placing a file under /lib/firmware. + +Note that: + +1. The code scanning for EFI embedded-firmware runs near the end + of start_kernel(), just before calling rest_init(). For normal drivers and + subsystems using subsys_initcall() to register themselves this does not + matter. This means that code running earlier cannot use EFI + embedded-firmware. + +2. At the moment the EFI embedded-fw code assumes that firmwares always start at + an offset which is a multiple of 8 bytes, if this is not true for your case + send in a patch to fix this. + +3. At the moment the EFI embedded-fw code only works on x86 because other archs + free EFI_BOOT_SERVICES_CODE before the EFI embedded-fw code gets a chance to + scan it. + +4. The current brute-force scanning of EFI_BOOT_SERVICES_CODE is an ad-hoc + brute-force solution. There has been discussion to use the UEFI Platform + Initialization (PI) spec's Firmware Volume protocol. This has been rejected + because the FV Protocol relies on *internal* interfaces of the PI spec, and: + 1. The PI spec does not define peripheral firmware at all + 2. The internal interfaces of the PI spec do not guarantee any backward + compatibility. Any implementation details in FV may be subject to change, + and may vary system to system. Supporting the FV Protocol would be + difficult as it is purposely ambiguous. + +Example how to check for and extract embedded firmware +------------------------------------------------------ + +To check for, for example Silead touchscreen controller embedded firmware, +do the following: + +1. Boot the system with efi=debug on the kernel commandline + +2. cp /sys/kernel/debug/efi/boot_services_code? to your home dir + +3. Open the boot_services_code? files in a hex-editor, search for the + magic prefix for Silead firmware: F0 00 00 00 02 00 00 00, this gives you + the beginning address of the firmware inside the boot_services_code? file. + +4. The firmware has a specific pattern, it starts with a 8 byte page-address, + typically F0 00 00 00 02 00 00 00 for the first page followed by 32-bit + word-address + 32-bit value pairs. With the word-address incrementing 4 + bytes (1 word) for each pair until a page is complete. A complete page is + followed by a new page-address, followed by more word + value pairs. This + leads to a very distinct pattern. Scroll down until this pattern stops, + this gives you the end of the firmware inside the boot_services_code? file. + +5. "dd if=boot_services_code? of=firmware bs=1 skip=<begin-addr> count=<len>" + will extract the firmware for you. Inspect the firmware file in a + hexeditor to make sure you got the dd parameters correct. + +6. Copy it to /lib/firmware under the expected name to test it. + +7. If the extracted firmware works, you can use the found info to fill an + efi_embedded_fw_desc struct to describe it, run "sha256sum firmware" + to get the sha256sum to put in the sha256 field. diff --git a/Documentation/driver-api/firmware/firmware-usage-guidelines.rst b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst new file mode 100644 index 0000000000..fdcfce42c6 --- /dev/null +++ b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst @@ -0,0 +1,44 @@ +=================== +Firmware Guidelines +=================== + +Users switching to a newer kernel should *not* have to install newer +firmware files to keep their hardware working. At the same time updated +firmware files must not cause any regressions for users of older kernel +releases. + +Drivers that use firmware from linux-firmware should follow the rules in +this guide. (Where there is limited control of the firmware, +i.e. company doesn't support Linux, firmwares sourced from misc places, +then of course these rules will not apply strictly.) + +* Firmware files shall be designed in a way that it allows checking for + firmware ABI version changes. It is recommended that firmware files be + versioned with at least a major/minor version. It is suggested that + the firmware files in linux-firmware be named with some device + specific name, and just the major version. The firmware version should + be stored in the firmware header, or as an exception, as part of the + firmware file name, in order to let the driver detact any non-ABI + fixes/changes. The firmware files in linux-firmware should be + overwritten with the newest compatible major version. Newer major + version firmware shall remain compatible with all kernels that load + that major number. + +* If the kernel support for the hardware is normally inactive, or the + hardware isn't available for public consumption, this can + be ignored, until the first kernel release that enables that hardware. + This means no major version bumps without the kernel retaining + backwards compatibility for the older major versions. Minor version + bumps should not introduce new features that newer kernels depend on + non-optionally. + +* If a security fix needs lockstep firmware and kernel fixes in order to + be successful, then all supported major versions in the linux-firmware + repo that are required by currently supported stable/LTS kernels, + should be updated with the security fix. The kernel patches should + detect if the firmware is new enough to declare if the security issue + is fixed. All communications around security fixes should point at + both the firmware and kernel fixes. If a security fix requires + deprecating old major versions, then this should only be done as a + last option, and be stated clearly in all communications. + diff --git a/Documentation/driver-api/firmware/firmware_cache.rst b/Documentation/driver-api/firmware/firmware_cache.rst new file mode 100644 index 0000000000..417b9e8347 --- /dev/null +++ b/Documentation/driver-api/firmware/firmware_cache.rst @@ -0,0 +1,51 @@ +============== +Firmware cache +============== + +When Linux resumes from suspend some device drivers require firmware lookups to +re-initialize devices. During resume there may be a period of time during which +firmware lookups are not possible, during this short period of time firmware +requests will fail. Time is of essence though, and delaying drivers to wait for +the root filesystem for firmware delays user experience with device +functionality. In order to support these requirements the firmware +infrastructure implements a firmware cache for device drivers for most API +calls, automatically behind the scenes. + +The firmware cache makes using certain firmware API calls safe during a device +driver's suspend and resume callback. Users of these API calls needn't cache +the firmware by themselves for dealing with firmware loss during system resume. + +The firmware cache works by requesting for firmware prior to suspend and +caching it in memory. Upon resume device drivers using the firmware API will +have access to the firmware immediately, without having to wait for the root +filesystem to mount or dealing with possible race issues with lookups as the +root filesystem mounts. + +Some implementation details about the firmware cache setup: + +* The firmware cache is setup by adding a devres entry for each device that + uses all synchronous call except :c:func:`request_firmware_into_buf`. + +* If an asynchronous call is used the firmware cache is only set up for a + device if the second argument (uevent) to request_firmware_nowait() is + true. When uevent is true it requests that a kobject uevent be sent to + userspace for the firmware request through the sysfs fallback mechanism + if the firmware file is not found. + +* If the firmware cache is determined to be needed as per the above two + criteria the firmware cache is setup by adding a devres entry for the + device making the firmware request. + +* The firmware devres entry is maintained throughout the lifetime of the + device. This means that even if you release_firmware() the firmware cache + will still be used on resume from suspend. + +* The timeout for the fallback mechanism is temporarily reduced to 10 seconds + as the firmware cache is set up during suspend, the timeout is set back to + the old value you had configured after the cache is set up. + +* Upon suspend any pending non-uevent firmware requests are killed to avoid + stalling the kernel, this is done with kill_requests_without_uevent(). Kernel + calls requiring the non-uevent therefore need to implement their own firmware + cache mechanism but must not use the firmware API on suspend. + diff --git a/Documentation/driver-api/firmware/fw_search_path.rst b/Documentation/driver-api/firmware/fw_search_path.rst new file mode 100644 index 0000000000..d7cb1e8f00 --- /dev/null +++ b/Documentation/driver-api/firmware/fw_search_path.rst @@ -0,0 +1,31 @@ +===================== +Firmware search paths +===================== + +The following search paths are used to look for firmware on your +root filesystem. + +* fw_path_para - module parameter - default is empty so this is ignored +* /lib/firmware/updates/UTS_RELEASE/ +* /lib/firmware/updates/ +* /lib/firmware/UTS_RELEASE/ +* /lib/firmware/ + +The module parameter ''path'' can be passed to the firmware_class module +to activate the first optional custom fw_path_para. The custom path can +only be up to 256 characters long. The kernel parameter passed would be: + +* 'firmware_class.path=$CUSTOMIZED_PATH' + +There is an alternative to customize the path at run time after bootup, you +can use the file: + +* /sys/module/firmware_class/parameters/path + +You would echo into it your custom path and firmware requested will be searched +for there first. Be aware that newline characters will be taken into account +and may not produce the intended effects. For instance you might want to use: + +echo -n /path/to/script > /sys/module/firmware_class/parameters/path + +to ensure that your script is being used. diff --git a/Documentation/driver-api/firmware/fw_upload.rst b/Documentation/driver-api/firmware/fw_upload.rst new file mode 100644 index 0000000000..edf1d0c5e7 --- /dev/null +++ b/Documentation/driver-api/firmware/fw_upload.rst @@ -0,0 +1,127 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================== +Firmware Upload API +=================== + +A device driver that registers with the firmware loader will expose +persistent sysfs nodes to enable users to initiate firmware updates for +that device. It is the responsibility of the device driver and/or the +device itself to perform any validation on the data received. Firmware +upload uses the same *loading* and *data* sysfs files described in the +documentation for firmware fallback. It also adds additional sysfs files +to provide status on the transfer of the firmware image to the device. + +Register for firmware upload +============================ + +A device driver registers for firmware upload by calling +firmware_upload_register(). Among the parameter list is a name to +identify the device under /sys/class/firmware. A user may initiate a +firmware upload by echoing a 1 to the *loading* sysfs file for the target +device. Next, the user writes the firmware image to the *data* sysfs +file. After writing the firmware data, the user echos 0 to the *loading* +sysfs file to signal completion. Echoing 0 to *loading* also triggers the +transfer of the firmware to the lower-lever device driver in the context +of a kernel worker thread. + +To use the firmware upload API, write a driver that implements a set of +ops. The probe function calls firmware_upload_register() and the remove +function calls firmware_upload_unregister() such as:: + + static const struct fw_upload_ops m10bmc_ops = { + .prepare = m10bmc_sec_prepare, + .write = m10bmc_sec_write, + .poll_complete = m10bmc_sec_poll_complete, + .cancel = m10bmc_sec_cancel, + .cleanup = m10bmc_sec_cleanup, + }; + + static int m10bmc_sec_probe(struct platform_device *pdev) + { + const char *fw_name, *truncate; + struct m10bmc_sec *sec; + struct fw_upload *fwl; + unsigned int len; + + sec = devm_kzalloc(&pdev->dev, sizeof(*sec), GFP_KERNEL); + if (!sec) + return -ENOMEM; + + sec->dev = &pdev->dev; + sec->m10bmc = dev_get_drvdata(pdev->dev.parent); + dev_set_drvdata(&pdev->dev, sec); + + fw_name = dev_name(sec->dev); + truncate = strstr(fw_name, ".auto"); + len = (truncate) ? truncate - fw_name : strlen(fw_name); + sec->fw_name = kmemdup_nul(fw_name, len, GFP_KERNEL); + + fwl = firmware_upload_register(THIS_MODULE, sec->dev, sec->fw_name, + &m10bmc_ops, sec); + if (IS_ERR(fwl)) { + dev_err(sec->dev, "Firmware Upload driver failed to start\n"); + kfree(sec->fw_name); + return PTR_ERR(fwl); + } + + sec->fwl = fwl; + return 0; + } + + static int m10bmc_sec_remove(struct platform_device *pdev) + { + struct m10bmc_sec *sec = dev_get_drvdata(&pdev->dev); + + firmware_upload_unregister(sec->fwl); + kfree(sec->fw_name); + return 0; + } + +firmware_upload_register +------------------------ +.. kernel-doc:: drivers/base/firmware_loader/sysfs_upload.c + :identifiers: firmware_upload_register + +firmware_upload_unregister +-------------------------- +.. kernel-doc:: drivers/base/firmware_loader/sysfs_upload.c + :identifiers: firmware_upload_unregister + +Firmware Upload Ops +------------------- +.. kernel-doc:: include/linux/firmware.h + :identifiers: fw_upload_ops + +Firmware Upload Progress Codes +------------------------------ +The following progress codes are used internally by the firmware loader. +Corresponding strings are reported through the status sysfs node that +is described below and are documented in the ABI documentation. + +.. kernel-doc:: drivers/base/firmware_loader/sysfs_upload.h + :identifiers: fw_upload_prog + +Firmware Upload Error Codes +--------------------------- +The following error codes may be returned by the driver ops in case of +failure: + +.. kernel-doc:: include/linux/firmware.h + :identifiers: fw_upload_err + +Sysfs Attributes +================ + +In addition to the *loading* and *data* sysfs files, there are additional +sysfs files to monitor the status of the data transfer to the target +device and to determine the final pass/fail status of the transfer. +Depending on the device and the size of the firmware image, a firmware +update could take milliseconds or minutes. + +The additional sysfs files are: + +* status - provides an indication of the progress of a firmware update +* error - provides error information for a failed firmware update +* remaining_size - tracks the data transfer portion of an update +* cancel - echo 1 to this file to cancel the update diff --git a/Documentation/driver-api/firmware/index.rst b/Documentation/driver-api/firmware/index.rst new file mode 100644 index 0000000000..9d2c19dc8e --- /dev/null +++ b/Documentation/driver-api/firmware/index.rst @@ -0,0 +1,19 @@ +================== +Linux Firmware API +================== + +.. toctree:: + + introduction + core + efi/index + request_firmware + fw_upload + other_interfaces + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/firmware/introduction.rst b/Documentation/driver-api/firmware/introduction.rst new file mode 100644 index 0000000000..211cb44eb9 --- /dev/null +++ b/Documentation/driver-api/firmware/introduction.rst @@ -0,0 +1,27 @@ +============ +Introduction +============ + +The firmware API enables kernel code to request files required +for functionality from userspace, the uses vary: + +* Microcode for CPU errata +* Device driver firmware, required to be loaded onto device + microcontrollers +* Device driver information data (calibration data, EEPROM overrides), + some of which can be completely optional. + +Types of firmware requests +========================== + +There are two types of calls: + +* Synchronous +* Asynchronous + +Which one you use vary depending on your requirements, the rule of thumb +however is you should strive to use the asynchronous APIs unless you also +are already using asynchronous initialization mechanisms which will not +stall or delay boot. Even if loading firmware does not take a lot of time +processing firmware might, and this can still delay boot or initialization, +as such mechanisms such as asynchronous probe can help supplement drivers. diff --git a/Documentation/driver-api/firmware/lookup-order.rst b/Documentation/driver-api/firmware/lookup-order.rst new file mode 100644 index 0000000000..6064672a78 --- /dev/null +++ b/Documentation/driver-api/firmware/lookup-order.rst @@ -0,0 +1,20 @@ +===================== +Firmware lookup order +===================== + +Different functionality is available to enable firmware to be found. +Below is chronological order of how firmware will be looked for once +a driver issues a firmware API call. + +* The ''Built-in firmware'' is checked first, if the firmware is present we + return it immediately +* The ''Firmware cache'' is looked at next. If the firmware is found we + return it immediately +* The ''Direct filesystem lookup'' is performed next, if found we + return it immediately +* The ''Platform firmware fallback'' is performed next, but only when + firmware_request_platform() is used, if found we return it immediately +* If no firmware has been found and the fallback mechanism was enabled + the sysfs interface is created. After this either a kobject uevent + is issued or the custom firmware loading is relied upon for firmware + loading up to the timeout value. diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst new file mode 100644 index 0000000000..06ac89adaa --- /dev/null +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -0,0 +1,51 @@ +Other Firmware Interfaces +========================= + +DMI Interfaces +-------------- + +.. kernel-doc:: drivers/firmware/dmi_scan.c + :export: + +EDD Interfaces +-------------- + +.. kernel-doc:: drivers/firmware/edd.c + :internal: + +Generic System Framebuffers Interface +------------------------------------- + +.. kernel-doc:: drivers/firmware/sysfb.c + :export: + +Intel Stratix10 SoC Service Layer +--------------------------------- +Some features of the Intel Stratix10 SoC require a level of privilege +higher than the kernel is granted. Such secure features include +FPGA programming. In terms of the ARMv8 architecture, the kernel runs +at Exception Level 1 (EL1), access to the features requires +Exception Level 3 (EL3). + +The Intel Stratix10 SoC service layer provides an in kernel API for +drivers to request access to the secure features. The requests are queued +and processed one by one. ARM’s SMCCC is used to pass the execution +of the requests on to a secure monitor (EL3). + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_command_code + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_client_msg + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_command_config_type + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_cb_data + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_client + +.. kernel-doc:: drivers/firmware/stratix10-svc.c + :export: diff --git a/Documentation/driver-api/firmware/request_firmware.rst b/Documentation/driver-api/firmware/request_firmware.rst new file mode 100644 index 0000000000..0d6ea03299 --- /dev/null +++ b/Documentation/driver-api/firmware/request_firmware.rst @@ -0,0 +1,80 @@ +==================== +request_firmware API +==================== + +You would typically load firmware and then load it into your device somehow. +The typical firmware work flow is reflected below:: + + if(request_firmware(&fw_entry, $FIRMWARE, device) == 0) + copy_fw_to_device(fw_entry->data, fw_entry->size); + release_firmware(fw_entry); + +Synchronous firmware requests +============================= + +Synchronous firmware requests will wait until the firmware is found or until +an error is returned. + +request_firmware +---------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware + +firmware_request_nowarn +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: firmware_request_nowarn + +firmware_request_platform +------------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: firmware_request_platform + +request_firmware_direct +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware_direct + +request_firmware_into_buf +------------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware_into_buf + +Asynchronous firmware requests +============================== + +Asynchronous firmware requests allow driver code to not have to wait +until the firmware or an error is returned. Function callbacks are +provided so that when the firmware or an error is found the driver is +informed through the callback. request_firmware_nowait() cannot be called +in atomic contexts. + +request_firmware_nowait +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware_nowait + +Special optimizations on reboot +=============================== + +Some devices have an optimization in place to enable the firmware to be +retained during system reboot. When such optimizations are used the driver +author must ensure the firmware is still available on resume from suspend, +this can be done with firmware_request_cache() instead of requesting for the +firmware to be loaded. + +firmware_request_cache() +------------------------ +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: firmware_request_cache + +request firmware API expected driver use +======================================== + +Once an API call returns you process the firmware and then release the +firmware. For example if you used request_firmware() and it returns, +the driver has the firmware image accessible in fw_entry->{data,size}. +If something went wrong request_firmware() returns non-zero and fw_entry +is set to NULL. Once your driver is done with processing the firmware it +can call release_firmware(fw_entry) to release the firmware image +and any related resource. diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst new file mode 100644 index 0000000000..6042085340 --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-bridge.rst @@ -0,0 +1,22 @@ +FPGA Bridge +=========== + +API to implement a new FPGA bridge +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* struct fpga_bridge - The FPGA Bridge structure +* struct fpga_bridge_ops - Low level Bridge driver ops +* fpga_bridge_register() - Create and register a bridge +* fpga_bridge_unregister() - Unregister a bridge + +.. kernel-doc:: include/linux/fpga/fpga-bridge.h + :functions: fpga_bridge + +.. kernel-doc:: include/linux/fpga/fpga-bridge.h + :functions: fpga_bridge_ops + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridge_register + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridge_unregister diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst new file mode 100644 index 0000000000..49c0a95126 --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-mgr.rst @@ -0,0 +1,162 @@ +FPGA Manager +============ + +Overview +-------- + +The FPGA manager core exports a set of functions for programming an FPGA with +an image. The API is manufacturer agnostic. All manufacturer specifics are +hidden away in a low level driver which registers a set of ops with the core. +The FPGA image data itself is very manufacturer specific, but for our purposes +it's just binary data. The FPGA manager core won't parse it. + +The FPGA image to be programmed can be in a scatter gather list, a single +contiguous buffer, or a firmware file. Because allocating contiguous kernel +memory for the buffer should be avoided, users are encouraged to use a scatter +gather list instead if possible. + +The particulars for programming the image are presented in a structure (struct +fpga_image_info). This struct contains parameters such as pointers to the +FPGA image as well as image-specific particulars such as whether the image was +built for full or partial reconfiguration. + +How to support a new FPGA device +-------------------------------- + +To add another FPGA manager, write a driver that implements a set of ops. The +probe function calls fpga_mgr_register() or fpga_mgr_register_full(), such as:: + + static const struct fpga_manager_ops socfpga_fpga_ops = { + .write_init = socfpga_fpga_ops_configure_init, + .write = socfpga_fpga_ops_configure_write, + .write_complete = socfpga_fpga_ops_configure_complete, + .state = socfpga_fpga_ops_state, + }; + + static int socfpga_fpga_probe(struct platform_device *pdev) + { + struct device *dev = &pdev->dev; + struct socfpga_fpga_priv *priv; + struct fpga_manager *mgr; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + /* + * do ioremaps, get interrupts, etc. and save + * them in priv + */ + + mgr = fpga_mgr_register(dev, "Altera SOCFPGA FPGA Manager", + &socfpga_fpga_ops, priv); + if (IS_ERR(mgr)) + return PTR_ERR(mgr); + + platform_set_drvdata(pdev, mgr); + + return 0; + } + + static int socfpga_fpga_remove(struct platform_device *pdev) + { + struct fpga_manager *mgr = platform_get_drvdata(pdev); + + fpga_mgr_unregister(mgr); + + return 0; + } + +Alternatively, the probe function could call one of the resource managed +register functions, devm_fpga_mgr_register() or devm_fpga_mgr_register_full(). +When these functions are used, the parameter syntax is the same, but the call +to fpga_mgr_unregister() should be removed. In the above example, the +socfpga_fpga_remove() function would not be required. + +The ops will implement whatever device specific register writes are needed to +do the programming sequence for this particular FPGA. These ops return 0 for +success or negative error codes otherwise. + +The programming sequence is:: + 1. .parse_header (optional, may be called once or multiple times) + 2. .write_init + 3. .write or .write_sg (may be called once or multiple times) + 4. .write_complete + +The .parse_header function will set header_size and data_size to +struct fpga_image_info. Before parse_header call, header_size is initialized +with initial_header_size. If flag skip_header of fpga_manager_ops is true, +.write function will get image buffer starting at header_size offset from the +beginning. If data_size is set, .write function will get data_size bytes of +the image buffer, otherwise .write will get data up to the end of image buffer. +This will not affect .write_sg, .write_sg will still get whole image in +sg_table form. If FPGA image is already mapped as a single contiguous buffer, +whole buffer will be passed into .parse_header. If image is in scatter-gather +form, core code will buffer up at least .initial_header_size before the first +call of .parse_header, if it is not enough, .parse_header should set desired +size into info->header_size and return -EAGAIN, then it will be called again +with greater part of image buffer on the input. + +The .write_init function will prepare the FPGA to receive the image data. The +buffer passed into .write_init will be at least info->header_size bytes long; +if the whole bitstream is not immediately available then the core code will +buffer up at least this much before starting. + +The .write function writes a buffer to the FPGA. The buffer may be contain the +whole FPGA image or may be a smaller chunk of an FPGA image. In the latter +case, this function is called multiple times for successive chunks. This interface +is suitable for drivers which use PIO. + +The .write_sg version behaves the same as .write except the input is a sg_table +scatter list. This interface is suitable for drivers which use DMA. + +The .write_complete function is called after all the image has been written +to put the FPGA into operating mode. + +The ops include a .state function which will determine the state the FPGA is in +and return a code of type enum fpga_mgr_states. It doesn't result in a change +in state. + +API for implementing a new FPGA Manager driver +---------------------------------------------- + +* ``fpga_mgr_states`` - Values for :c:expr:`fpga_manager->state`. +* struct fpga_manager - the FPGA manager struct +* struct fpga_manager_ops - Low level FPGA manager driver ops +* struct fpga_manager_info - Parameter structure for fpga_mgr_register_full() +* fpga_mgr_register_full() - Create and register an FPGA manager using the + fpga_mgr_info structure to provide the full flexibility of options +* fpga_mgr_register() - Create and register an FPGA manager using standard + arguments +* devm_fpga_mgr_register_full() - Resource managed version of + fpga_mgr_register_full() +* devm_fpga_mgr_register() - Resource managed version of fpga_mgr_register() +* fpga_mgr_unregister() - Unregister an FPGA manager + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_mgr_states + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_manager + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_manager_ops + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_manager_info + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_register_full + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_register + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: devm_fpga_mgr_register_full + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: devm_fpga_mgr_register + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_unregister diff --git a/Documentation/driver-api/fpga/fpga-programming.rst b/Documentation/driver-api/fpga/fpga-programming.rst new file mode 100644 index 0000000000..fb4da4240e --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-programming.rst @@ -0,0 +1,107 @@ +In-kernel API for FPGA Programming +================================== + +Overview +-------- + +The in-kernel API for FPGA programming is a combination of APIs from +FPGA manager, bridge, and regions. The actual function used to +trigger FPGA programming is fpga_region_program_fpga(). + +fpga_region_program_fpga() uses functionality supplied by +the FPGA manager and bridges. It will: + + * lock the region's mutex + * lock the mutex of the region's FPGA manager + * build a list of FPGA bridges if a method has been specified to do so + * disable the bridges + * program the FPGA using info passed in :c:expr:`fpga_region->info`. + * re-enable the bridges + * release the locks + +The struct fpga_image_info specifies what FPGA image to program. It is +allocated/freed by fpga_image_info_alloc() and freed with +fpga_image_info_free() + +How to program an FPGA using a region +------------------------------------- + +When the FPGA region driver probed, it was given a pointer to an FPGA manager +driver so it knows which manager to use. The region also either has a list of +bridges to control during programming or it has a pointer to a function that +will generate that list. Here's some sample code of what to do next:: + + #include <linux/fpga/fpga-mgr.h> + #include <linux/fpga/fpga-region.h> + + struct fpga_image_info *info; + int ret; + + /* + * First, alloc the struct with information about the FPGA image to + * program. + */ + info = fpga_image_info_alloc(dev); + if (!info) + return -ENOMEM; + + /* Set flags as needed, such as: */ + info->flags = FPGA_MGR_PARTIAL_RECONFIG; + + /* + * Indicate where the FPGA image is. This is pseudo-code; you're + * going to use one of these three. + */ + if (image is in a scatter gather table) { + + info->sgt = [your scatter gather table] + + } else if (image is in a buffer) { + + info->buf = [your image buffer] + info->count = [image buffer size] + + } else if (image is in a firmware file) { + + info->firmware_name = devm_kstrdup(dev, firmware_name, + GFP_KERNEL); + + } + + /* Add info to region and do the programming */ + region->info = info; + ret = fpga_region_program_fpga(region); + + /* Deallocate the image info if you're done with it */ + region->info = NULL; + fpga_image_info_free(info); + + if (ret) + return ret; + + /* Now enumerate whatever hardware has appeared in the FPGA. */ + +API for programming an FPGA +--------------------------- + +* fpga_region_program_fpga() - Program an FPGA +* fpga_image_info() - Specifies what FPGA image to program +* fpga_image_info_alloc() - Allocate an FPGA image info struct +* fpga_image_info_free() - Free an FPGA image info struct + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_program_fpga + +FPGA Manager flags + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :doc: FPGA Manager flags + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_image_info + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_image_info_alloc + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_image_info_free diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst new file mode 100644 index 0000000000..dc55d60a0b --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-region.rst @@ -0,0 +1,109 @@ +FPGA Region +=========== + +Overview +-------- + +This document is meant to be a brief overview of the FPGA region API usage. A +more conceptual look at regions can be found in the Device Tree binding +document [#f1]_. + +For the purposes of this API document, let's just say that a region associates +an FPGA Manager and a bridge (or bridges) with a reprogrammable region of an +FPGA or the whole FPGA. The API provides a way to register a region and to +program a region. + +Currently the only layer above fpga-region.c in the kernel is the Device Tree +support (of-fpga-region.c) described in [#f1]_. The DT support layer uses regions +to program the FPGA and then DT to handle enumeration. The common region code +is intended to be used by other schemes that have other ways of accomplishing +enumeration after programming. + +An fpga-region can be set up to know the following things: + + * which FPGA manager to use to do the programming + + * which bridges to disable before programming and enable afterwards. + +Additional info needed to program the FPGA image is passed in the struct +fpga_image_info including: + + * pointers to the image as either a scatter-gather buffer, a contiguous + buffer, or the name of firmware file + + * flags indicating specifics such as whether the image is for partial + reconfiguration. + +How to add a new FPGA region +---------------------------- + +An example of usage can be seen in the probe function of [#f2]_. + +.. [#f1] ../devicetree/bindings/fpga/fpga-region.txt +.. [#f2] ../../drivers/fpga/of-fpga-region.c + +API to add a new FPGA region +---------------------------- + +* struct fpga_region - The FPGA region struct +* struct fpga_region_info - Parameter structure for fpga_region_register_full() +* fpga_region_register_full() - Create and register an FPGA region using the + fpga_region_info structure to provide the full flexibility of options +* fpga_region_register() - Create and register an FPGA region using standard + arguments +* fpga_region_unregister() - Unregister an FPGA region + +The FPGA region's probe function will need to get a reference to the FPGA +Manager it will be using to do the programming. This usually would happen +during the region's probe function. + +* fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count +* of_fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count, + given a device node. +* fpga_mgr_put() - Put an FPGA manager + +The FPGA region will need to specify which bridges to control while programming +the FPGA. The region driver can build a list of bridges during probe time +(:c:expr:`fpga_region->bridge_list`) or it can have a function that creates +the list of bridges to program just before programming +(:c:expr:`fpga_region->get_bridges`). The FPGA bridge framework supplies the +following APIs to handle building or tearing down that list. + +* fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a + list +* of_fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a + list, given a device node +* fpga_bridges_put() - Given a list of bridges, put them + +.. kernel-doc:: include/linux/fpga/fpga-region.h + :functions: fpga_region + +.. kernel-doc:: include/linux/fpga/fpga-region.h + :functions: fpga_region_info + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_register_full + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_register + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_unregister + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_get + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: of_fpga_mgr_get + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_put + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridge_get_to_list + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: of_fpga_bridge_get_to_list + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridges_put diff --git a/Documentation/driver-api/fpga/index.rst b/Documentation/driver-api/fpga/index.rst new file mode 100644 index 0000000000..31a4773bd2 --- /dev/null +++ b/Documentation/driver-api/fpga/index.rst @@ -0,0 +1,15 @@ +============== +FPGA Subsystem +============== + +:Author: Alan Tull + +.. toctree:: + :maxdepth: 2 + + intro + fpga-mgr + fpga-bridge + fpga-region + fpga-programming + diff --git a/Documentation/driver-api/fpga/intro.rst b/Documentation/driver-api/fpga/intro.rst new file mode 100644 index 0000000000..f54c7dabcc --- /dev/null +++ b/Documentation/driver-api/fpga/intro.rst @@ -0,0 +1,54 @@ +Introduction +============ + +The FPGA subsystem supports reprogramming FPGAs dynamically under +Linux. Some of the core intentions of the FPGA subsystems are: + +* The FPGA subsystem is vendor agnostic. + +* The FPGA subsystem separates upper layers (userspace interfaces and + enumeration) from lower layers that know how to program a specific + FPGA. + +* Code should not be shared between upper and lower layers. This + should go without saying. If that seems necessary, there's probably + framework functionality that can be added that will benefit + other users. Write the linux-fpga mailing list and maintainers and + seek out a solution that expands the framework for broad reuse. + +* Generally, when adding code, think of the future. Plan for reuse. + +The framework in the kernel is divided into: + +FPGA Manager +------------ + +If you are adding a new FPGA or a new method of programming an FPGA, +this is the subsystem for you. Low level FPGA manager drivers contain +the knowledge of how to program a specific device. This subsystem +includes the framework in fpga-mgr.c and the low level drivers that +are registered with it. + +FPGA Bridge +----------- + +FPGA Bridges prevent spurious signals from going out of an FPGA or a +region of an FPGA during programming. They are disabled before +programming begins and re-enabled afterwards. An FPGA bridge may be +actual hard hardware that gates a bus to a CPU or a soft ("freeze") +bridge in FPGA fabric that surrounds a partial reconfiguration region +of an FPGA. This subsystem includes fpga-bridge.c and the low level +drivers that are registered with it. + +FPGA Region +----------- + +If you are adding a new interface to the FPGA framework, add it on top +of an FPGA region. + +The FPGA Region framework (fpga-region.c) associates managers and +bridges as reconfigurable regions. A region may refer to the whole +FPGA in full reconfiguration or to a partial reconfiguration region. + +The Device Tree FPGA Region support (of-fpga-region.c) handles +reprogramming FPGAs when device tree overlays are applied. diff --git a/Documentation/driver-api/frame-buffer.rst b/Documentation/driver-api/frame-buffer.rst new file mode 100644 index 0000000000..9dd3060f02 --- /dev/null +++ b/Documentation/driver-api/frame-buffer.rst @@ -0,0 +1,62 @@ +Frame Buffer Library +==================== + +The frame buffer drivers depend heavily on four data structures. These +structures are declared in include/linux/fb.h. They are fb_info, +fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs. The last +three can be made available to and from userland. + +fb_info defines the current state of a particular video card. Inside +fb_info, there exists a fb_ops structure which is a collection of +needed functions to make fbdev and fbcon work. fb_info is only visible +to the kernel. + +fb_var_screeninfo is used to describe the features of a video card +that are user defined. With fb_var_screeninfo, things such as depth +and the resolution may be defined. + +The next structure is fb_fix_screeninfo. This defines the properties +of a card that are created when a mode is set and can't be changed +otherwise. A good example of this is the start of the frame buffer +memory. This "locks" the address of the frame buffer memory, so that it +cannot be changed or moved. + +The last structure is fb_monospecs. In the old API, there was little +importance for fb_monospecs. This allowed for forbidden things such as +setting a mode of 800x600 on a fix frequency monitor. With the new API, +fb_monospecs prevents such things, and if used correctly, can prevent a +monitor from being cooked. fb_monospecs will not be useful until +kernels 2.5.x. + +Frame Buffer Memory +------------------- + +.. kernel-doc:: drivers/video/fbdev/core/fbmem.c + :export: + +Frame Buffer Colormap +--------------------- + +.. kernel-doc:: drivers/video/fbdev/core/fbcmap.c + :export: + +Frame Buffer Video Mode Database +-------------------------------- + +.. kernel-doc:: drivers/video/fbdev/core/modedb.c + :internal: + +.. kernel-doc:: drivers/video/fbdev/core/modedb.c + :export: + +Frame Buffer Macintosh Video Mode Database +------------------------------------------ + +.. kernel-doc:: drivers/video/fbdev/macmodes.c + :export: + +Frame Buffer Fonts +------------------ + +Refer to the file lib/fonts/fonts.c for more information. + diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst new file mode 100644 index 0000000000..71ccc30e58 --- /dev/null +++ b/Documentation/driver-api/generic-counter.rst @@ -0,0 +1,573 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +Generic Counter Interface +========================= + +Introduction +============ + +Counter devices are prevalent among a diverse spectrum of industries. +The ubiquitous presence of these devices necessitates a common interface +and standard of interaction and exposure. This driver API attempts to +resolve the issue of duplicate code found among existing counter device +drivers by introducing a generic counter interface for consumption. The +Generic Counter interface enables drivers to support and expose a common +set of components and functionality present in counter devices. + +Theory +====== + +Counter devices can vary greatly in design, but regardless of whether +some devices are quadrature encoder counters or tally counters, all +counter devices consist of a core set of components. This core set of +components, shared by all counter devices, is what forms the essence of +the Generic Counter interface. + +There are three core components to a counter: + +* Signal: + Stream of data to be evaluated by the counter. + +* Synapse: + Association of a Signal, and evaluation trigger, with a Count. + +* Count: + Accumulation of the effects of connected Synapses. + +SIGNAL +------ +A Signal represents a stream of data. This is the input data that is +evaluated by the counter to determine the count data; e.g. a quadrature +signal output line of a rotary encoder. Not all counter devices provide +user access to the Signal data, so exposure is optional for drivers. + +When the Signal data is available for user access, the Generic Counter +interface provides the following available signal values: + +* SIGNAL_LOW: + Signal line is in a low state. + +* SIGNAL_HIGH: + Signal line is in a high state. + +A Signal may be associated with one or more Counts. + +SYNAPSE +------- +A Synapse represents the association of a Signal with a Count. Signal +data affects respective Count data, and the Synapse represents this +relationship. + +The Synapse action mode specifies the Signal data condition that +triggers the respective Count's count function evaluation to update the +count data. The Generic Counter interface provides the following +available action modes: + +* None: + Signal does not trigger the count function. In Pulse-Direction count + function mode, this Signal is evaluated as Direction. + +* Rising Edge: + Low state transitions to high state. + +* Falling Edge: + High state transitions to low state. + +* Both Edges: + Any state transition. + +A counter is defined as a set of input signals associated with count +data that are generated by the evaluation of the state of the associated +input signals as defined by the respective count functions. Within the +context of the Generic Counter interface, a counter consists of Counts +each associated with a set of Signals, whose respective Synapse +instances represent the count function update conditions for the +associated Counts. + +A Synapse associates one Signal with one Count. + +COUNT +----- +A Count represents the accumulation of the effects of connected +Synapses; i.e. the count data for a set of Signals. The Generic +Counter interface represents the count data as a natural number. + +A Count has a count function mode which represents the update behavior +for the count data. The Generic Counter interface provides the following +available count function modes: + +* Increase: + Accumulated count is incremented. + +* Decrease: + Accumulated count is decremented. + +* Pulse-Direction: + Rising edges on signal A updates the respective count. The input level + of signal B determines direction. + +* Quadrature: + A pair of quadrature encoding signals are evaluated to determine + position and direction. The following Quadrature modes are available: + + - x1 A: + If direction is forward, rising edges on quadrature pair signal A + updates the respective count; if the direction is backward, falling + edges on quadrature pair signal A updates the respective count. + Quadrature encoding determines the direction. + + - x1 B: + If direction is forward, rising edges on quadrature pair signal B + updates the respective count; if the direction is backward, falling + edges on quadrature pair signal B updates the respective count. + Quadrature encoding determines the direction. + + - x2 A: + Any state transition on quadrature pair signal A updates the + respective count. Quadrature encoding determines the direction. + + - x2 B: + Any state transition on quadrature pair signal B updates the + respective count. Quadrature encoding determines the direction. + + - x4: + Any state transition on either quadrature pair signals updates the + respective count. Quadrature encoding determines the direction. + +A Count has a set of one or more associated Synapses. + +Paradigm +======== + +The most basic counter device may be expressed as a single Count +associated with a single Signal via a single Synapse. Take for example +a counter device which simply accumulates a count of rising edges on a +source input line:: + + Count Synapse Signal + ----- ------- ------ + +---------------------+ + | Data: Count | Rising Edge ________ + | Function: Increase | <------------- / Source \ + | | ____________ + +---------------------+ + +In this example, the Signal is a source input line with a pulsing +voltage, while the Count is a persistent count value which is repeatedly +incremented. The Signal is associated with the respective Count via a +Synapse. The increase function is triggered by the Signal data condition +specified by the Synapse -- in this case a rising edge condition on the +voltage input line. In summary, the counter device existence and +behavior is aptly represented by respective Count, Signal, and Synapse +components: a rising edge condition triggers an increase function on an +accumulating count datum. + +A counter device is not limited to a single Signal; in fact, in theory +many Signals may be associated with even a single Count. For example, a +quadrature encoder counter device can keep track of position based on +the states of two input lines:: + + Count Synapse Signal + ----- ------- ------ + +-------------------------+ + | Data: Position | Both Edges ___ + | Function: Quadrature x4 | <------------ / A \ + | | _______ + | | + | | Both Edges ___ + | | <------------ / B \ + | | _______ + +-------------------------+ + +In this example, two Signals (quadrature encoder lines A and B) are +associated with a single Count: a rising or falling edge on either A or +B triggers the "Quadrature x4" function which determines the direction +of movement and updates the respective position data. The "Quadrature +x4" function is likely implemented in the hardware of the quadrature +encoder counter device; the Count, Signals, and Synapses simply +represent this hardware behavior and functionality. + +Signals associated with the same Count can have differing Synapse action +mode conditions. For example, a quadrature encoder counter device +operating in a non-quadrature Pulse-Direction mode could have one input +line dedicated for movement and a second input line dedicated for +direction:: + + Count Synapse Signal + ----- ------- ------ + +---------------------------+ + | Data: Position | Rising Edge ___ + | Function: Pulse-Direction | <------------- / A \ (Movement) + | | _______ + | | + | | None ___ + | | <------------- / B \ (Direction) + | | _______ + +---------------------------+ + +Only Signal A triggers the "Pulse-Direction" update function, but the +instantaneous state of Signal B is still required in order to know the +direction so that the position data may be properly updated. Ultimately, +both Signals are associated with the same Count via two respective +Synapses, but only one Synapse has an active action mode condition which +triggers the respective count function while the other is left with a +"None" condition action mode to indicate its respective Signal's +availability for state evaluation despite its non-triggering mode. + +Keep in mind that the Signal, Synapse, and Count are abstract +representations which do not need to be closely married to their +respective physical sources. This allows the user of a counter to +divorce themselves from the nuances of physical components (such as +whether an input line is differential or single-ended) and instead focus +on the core idea of what the data and process represent (e.g. position +as interpreted from quadrature encoding data). + +Driver API +========== + +Driver authors may utilize the Generic Counter interface in their code +by including the include/linux/counter.h header file. This header file +provides several core data structures, function prototypes, and macros +for defining a counter device. + +.. kernel-doc:: include/linux/counter.h + :internal: + +.. kernel-doc:: drivers/counter/counter-core.c + :export: + +.. kernel-doc:: drivers/counter/counter-chrdev.c + :export: + +Driver Implementation +===================== + +To support a counter device, a driver must first allocate the available +Counter Signals via counter_signal structures. These Signals should +be stored as an array and set to the signals array member of an +allocated counter_device structure before the Counter is registered to +the system. + +Counter Counts may be allocated via counter_count structures, and +respective Counter Signal associations (Synapses) made via +counter_synapse structures. Associated counter_synapse structures are +stored as an array and set to the synapses array member of the +respective counter_count structure. These counter_count structures are +set to the counts array member of an allocated counter_device structure +before the Counter is registered to the system. + +Driver callbacks must be provided to the counter_device structure in +order to communicate with the device: to read and write various Signals +and Counts, and to set and get the "action mode" and "function mode" for +various Synapses and Counts respectively. + +A counter_device structure is allocated using counter_alloc() and then +registered to the system by passing it to the counter_add() function, and +unregistered by passing it to the counter_unregister function. There are +device managed variants of these functions: devm_counter_alloc() and +devm_counter_add(). + +The struct counter_comp structure is used to define counter extensions +for Signals, Synapses, and Counts. + +The "type" member specifies the type of high-level data (e.g. BOOL, +COUNT_DIRECTION, etc.) handled by this extension. The "``*_read``" and +"``*_write``" members can then be set by the counter device driver with +callbacks to handle that data using native C data types (i.e. u8, u64, +etc.). + +Convenience macros such as ``COUNTER_COMP_COUNT_U64`` are provided for +use by driver authors. In particular, driver authors are expected to use +the provided macros for standard Counter subsystem attributes in order +to maintain a consistent interface for userspace. For example, a counter +device driver may define several standard attributes like so:: + + struct counter_comp count_ext[] = { + COUNTER_COMP_DIRECTION(count_direction_read), + COUNTER_COMP_ENABLE(count_enable_read, count_enable_write), + COUNTER_COMP_CEILING(count_ceiling_read, count_ceiling_write), + }; + +This makes it simple to see, add, and modify the attributes that are +supported by this driver ("direction", "enable", and "ceiling") and to +maintain this code without getting lost in a web of struct braces. + +Callbacks must match the function type expected for the respective +component or extension. These function types are defined in the struct +counter_comp structure as the "``*_read``" and "``*_write``" union +members. + +The corresponding callback prototypes for the extensions mentioned in +the previous example above would be:: + + int count_direction_read(struct counter_device *counter, + struct counter_count *count, + enum counter_count_direction *direction); + int count_enable_read(struct counter_device *counter, + struct counter_count *count, u8 *enable); + int count_enable_write(struct counter_device *counter, + struct counter_count *count, u8 enable); + int count_ceiling_read(struct counter_device *counter, + struct counter_count *count, u64 *ceiling); + int count_ceiling_write(struct counter_device *counter, + struct counter_count *count, u64 ceiling); + +Determining the type of extension to create is a matter of scope. + +* Signal extensions are attributes that expose information/control + specific to a Signal. These types of attributes will exist under a + Signal's directory in sysfs. + + For example, if you have an invert feature for a Signal, you can have + a Signal extension called "invert" that toggles that feature: + /sys/bus/counter/devices/counterX/signalY/invert + +* Count extensions are attributes that expose information/control + specific to a Count. These type of attributes will exist under a + Count's directory in sysfs. + + For example, if you want to pause/unpause a Count from updating, you + can have a Count extension called "enable" that toggles such: + /sys/bus/counter/devices/counterX/countY/enable + +* Device extensions are attributes that expose information/control + non-specific to a particular Count or Signal. This is where you would + put your global features or other miscellaneous functionality. + + For example, if your device has an overtemp sensor, you can report the + chip overheated via a device extension called "error_overtemp": + /sys/bus/counter/devices/counterX/error_overtemp + +Subsystem Architecture +====================== + +Counter drivers pass and take data natively (i.e. ``u8``, ``u64``, etc.) +and the shared counter module handles the translation between the sysfs +interface. This guarantees a standard userspace interface for all +counter drivers, and enables a Generic Counter chrdev interface via a +generalized device driver ABI. + +A high-level view of how a count value is passed down from a counter +driver is exemplified by the following. The driver callbacks are first +registered to the Counter core component for use by the Counter +userspace interface components:: + + Driver callbacks registration: + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +----------------------------+ + | Counter device driver | + +----------------------------+ + | Processes data from device | + +----------------------------+ + | + ------------------- + / driver callbacks / + ------------------- + | + V + +----------------------+ + | Counter core | + +----------------------+ + | Routes device driver | + | callbacks to the | + | userspace interfaces | + +----------------------+ + | + ------------------- + / driver callbacks / + ------------------- + | + +---------------+---------------+ + | | + V V + +--------------------+ +---------------------+ + | Counter sysfs | | Counter chrdev | + +--------------------+ +---------------------+ + | Translates to the | | Translates to the | + | standard Counter | | standard Counter | + | sysfs output | | character device | + +--------------------+ +---------------------+ + +Thereafter, data can be transferred directly between the Counter device +driver and Counter userspace interface:: + + Count data request: + ~~~~~~~~~~~~~~~~~~~ + ---------------------- + / Counter device \ + +----------------------+ + | Count register: 0x28 | + +----------------------+ + | + ----------------- + / raw count data / + ----------------- + | + V + +----------------------------+ + | Counter device driver | + +----------------------------+ + | Processes data from device | + |----------------------------| + | Type: u64 | + | Value: 42 | + +----------------------------+ + | + ---------- + / u64 / + ---------- + | + +---------------+---------------+ + | | + V V + +--------------------+ +---------------------+ + | Counter sysfs | | Counter chrdev | + +--------------------+ +---------------------+ + | Translates to the | | Translates to the | + | standard Counter | | standard Counter | + | sysfs output | | character device | + |--------------------| |---------------------| + | Type: const char * | | Type: u64 | + | Value: "42" | | Value: 42 | + +--------------------+ +---------------------+ + | | + --------------- ----------------------- + / const char * / / struct counter_event / + --------------- ----------------------- + | | + | V + | +-----------+ + | | read | + | +-----------+ + | \ Count: 42 / + | ----------- + | + V + +--------------------------------------------------+ + | `/sys/bus/counter/devices/counterX/countY/count` | + +--------------------------------------------------+ + \ Count: "42" / + -------------------------------------------------- + +There are four primary components involved: + +Counter device driver +--------------------- +Communicates with the hardware device to read/write data; e.g. counter +drivers for quadrature encoders, timers, etc. + +Counter core +------------ +Registers the counter device driver to the system so that the respective +callbacks are called during userspace interaction. + +Counter sysfs +------------- +Translates counter data to the standard Counter sysfs interface format +and vice versa. + +Please refer to the ``Documentation/ABI/testing/sysfs-bus-counter`` file +for a detailed breakdown of the available Generic Counter interface +sysfs attributes. + +Counter chrdev +-------------- +Translates Counter events to the standard Counter character device; data +is transferred via standard character device read calls, while Counter +events are configured via ioctl calls. + +Sysfs Interface +=============== + +Several sysfs attributes are generated by the Generic Counter interface, +and reside under the ``/sys/bus/counter/devices/counterX`` directory, +where ``X`` is to the respective counter device id. Please see +``Documentation/ABI/testing/sysfs-bus-counter`` for detailed information +on each Generic Counter interface sysfs attribute. + +Through these sysfs attributes, programs and scripts may interact with +the Generic Counter paradigm Counts, Signals, and Synapses of respective +counter devices. + +Counter Character Device +======================== + +Counter character device nodes are created under the ``/dev`` directory +as ``counterX``, where ``X`` is the respective counter device id. +Defines for the standard Counter data types are exposed via the +userspace ``include/uapi/linux/counter.h`` file. + +Counter events +-------------- +Counter device drivers can support Counter events by utilizing the +``counter_push_event`` function:: + + void counter_push_event(struct counter_device *const counter, const u8 event, + const u8 channel); + +The event id is specified by the ``event`` parameter; the event channel +id is specified by the ``channel`` parameter. When this function is +called, the Counter data associated with the respective event is +gathered, and a ``struct counter_event`` is generated for each datum and +pushed to userspace. + +Counter events can be configured by users to report various Counter +data of interest. This can be conceptualized as a list of Counter +component read calls to perform. For example: + + +------------------------+------------------------+ + | COUNTER_EVENT_OVERFLOW | COUNTER_EVENT_INDEX | + +========================+========================+ + | Channel 0 | Channel 0 | + +------------------------+------------------------+ + | * Count 0 | * Signal 0 | + | * Count 1 | * Signal 0 Extension 0 | + | * Signal 3 | * Extension 4 | + | * Count 4 Extension 2 +------------------------+ + | * Signal 5 Extension 0 | Channel 1 | + | +------------------------+ + | | * Signal 4 | + | | * Signal 4 Extension 0 | + | | * Count 7 | + +------------------------+------------------------+ + +When ``counter_push_event(counter, COUNTER_EVENT_INDEX, 1)`` is called +for example, it will go down the list for the ``COUNTER_EVENT_INDEX`` +event channel 1 and execute the read callbacks for Signal 4, Signal 4 +Extension 0, and Count 7 -- the data returned for each is pushed to a +kfifo as a ``struct counter_event``, which userspace can retrieve via a +standard read operation on the respective character device node. + +Userspace +--------- +Userspace applications can configure Counter events via ioctl operations +on the Counter character device node. There following ioctl codes are +supported and provided by the ``linux/counter.h`` userspace header file: + +* :c:macro:`COUNTER_ADD_WATCH_IOCTL` + +* :c:macro:`COUNTER_ENABLE_EVENTS_IOCTL` + +* :c:macro:`COUNTER_DISABLE_EVENTS_IOCTL` + +To configure events to gather Counter data, users first populate a +``struct counter_watch`` with the relevant event id, event channel id, +and the information for the desired Counter component from which to +read, and then pass it via the ``COUNTER_ADD_WATCH_IOCTL`` ioctl +command. + +Note that an event can be watched without gathering Counter data by +setting the ``component.type`` member equal to +``COUNTER_COMPONENT_NONE``. With this configuration the Counter +character device will simply populate the event timestamps for those +respective ``struct counter_event`` elements and ignore the component +value. + +The ``COUNTER_ADD_WATCH_IOCTL`` command will buffer these Counter +watches. When ready, the ``COUNTER_ENABLE_EVENTS_IOCTL`` ioctl command +may be used to activate these Counter watches. + +Userspace applications can then execute a ``read`` operation (optionally +calling ``poll`` first) on the Counter character device node to retrieve +``struct counter_event`` elements with the desired data. diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst new file mode 100644 index 0000000000..b33aa04f21 --- /dev/null +++ b/Documentation/driver-api/gpio/board.rst @@ -0,0 +1,222 @@ +============= +GPIO Mappings +============= + +This document explains how GPIOs can be assigned to given devices and functions. + +Note that it only applies to the new descriptor-based interface. For a +description of the deprecated integer-based GPIO interface please refer to +legacy.rst (actually, there is no real mapping possible with the old +interface; you just fetch an integer from somewhere and request the +corresponding GPIO). + +All platforms can enable the GPIO library, but if the platform strictly +requires GPIO functionality to be present, it needs to select GPIOLIB from its +Kconfig. Then, how GPIOs are mapped depends on what the platform uses to +describe its hardware layout. Currently, mappings can be defined through device +tree, ACPI, and platform data. + +Device Tree +----------- +GPIOs can easily be mapped to devices and functions in the device tree. The +exact way to do it depends on the GPIO controller providing the GPIOs, see the +device tree bindings for your controller. + +GPIOs mappings are defined in the consumer device's node, in a property named +<function>-gpios, where <function> is the function the driver will request +through gpiod_get(). For example:: + + foo_device { + compatible = "acme,foo"; + ... + led-gpios = <&gpio 15 GPIO_ACTIVE_HIGH>, /* red */ + <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */ + <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */ + + power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + }; + +Properties named <function>-gpio are also considered valid and old bindings use +it but are only supported for compatibility reasons and should not be used for +newer bindings since it has been deprecated. + +This property will make GPIOs 15, 16 and 17 available to the driver under the +"led" function, and GPIO 1 as the "power" GPIO:: + + struct gpio_desc *red, *green, *blue, *power; + + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); + + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); + +The led GPIOs will be active high, while the power GPIO will be active low (i.e. +gpiod_is_active_low(power) will be true). + +The second parameter of the gpiod_get() functions, the con_id string, has to be +the <function>-prefix of the GPIO suffixes ("gpios" or "gpio", automatically +looked up by the gpiod functions internally) used in the device tree. With above +"led-gpios" example, use the prefix without the "-" as con_id parameter: "led". + +Internally, the GPIO subsystem prefixes the GPIO suffix ("gpios" or "gpio") +with the string passed in con_id to get the resulting string +(``snprintf(... "%s-%s", con_id, gpio_suffixes[]``). + +ACPI +---- +ACPI also supports function names for GPIOs in a similar fashion to DT. +The above DT example can be converted to an equivalent ACPI description +with the help of _DSD (Device Specific Data), introduced in ACPI 5.1:: + + Device (FOO) { + Name (_CRS, ResourceTemplate () { + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 15 } // red + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 16 } // green + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 17 } // blue + GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 1 } // power + }) + + Name (_DSD, Package () { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () { + "led-gpios", + Package () { + ^FOO, 0, 0, 1, + ^FOO, 1, 0, 1, + ^FOO, 2, 0, 1, + } + }, + Package () { "power-gpios", Package () { ^FOO, 3, 0, 0 } }, + } + }) + } + +For more information about the ACPI GPIO bindings see +Documentation/firmware-guide/acpi/gpio-properties.rst. + +Platform Data +------------- +Finally, GPIOs can be bound to devices and functions using platform data. Board +files that desire to do so need to include the following header:: + + #include <linux/gpio/machine.h> + +GPIOs are mapped by the means of tables of lookups, containing instances of the +gpiod_lookup structure. Two macros are defined to help declaring such mappings:: + + GPIO_LOOKUP(key, chip_hwnum, con_id, flags) + GPIO_LOOKUP_IDX(key, chip_hwnum, con_id, idx, flags) + +where + + - key is either the label of the gpiod_chip instance providing the GPIO, or + the GPIO line name + - chip_hwnum is the hardware number of the GPIO within the chip, or U16_MAX + to indicate that key is a GPIO line name + - con_id is the name of the GPIO function from the device point of view. It + can be NULL, in which case it will match any function. + - idx is the index of the GPIO within the function. + - flags is defined to specify the following properties: + * GPIO_ACTIVE_HIGH - GPIO line is active high + * GPIO_ACTIVE_LOW - GPIO line is active low + * GPIO_OPEN_DRAIN - GPIO line is set up as open drain + * GPIO_OPEN_SOURCE - GPIO line is set up as open source + * GPIO_PERSISTENT - GPIO line is persistent during + suspend/resume and maintains its value + * GPIO_TRANSITORY - GPIO line is transitory and may loose its + electrical state during suspend/resume + +In the future, these flags might be extended to support more properties. + +Note that: + 1. GPIO line names are not guaranteed to be globally unique, so the first + match found will be used. + 2. GPIO_LOOKUP() is just a shortcut to GPIO_LOOKUP_IDX() where idx = 0. + +A lookup table can then be defined as follows, with an empty entry defining its +end. The 'dev_id' field of the table is the identifier of the device that will +make use of these GPIOs. It can be NULL, in which case it will be matched for +calls to gpiod_get() with a NULL device. + +.. code-block:: c + + struct gpiod_lookup_table gpios_table = { + .dev_id = "foo.0", + .table = { + GPIO_LOOKUP_IDX("gpio.0", 15, "led", 0, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio.0", 16, "led", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio.0", 17, "led", 2, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio.0", 1, "power", GPIO_ACTIVE_LOW), + { }, + }, + }; + +And the table can be added by the board code as follows:: + + gpiod_add_lookup_table(&gpios_table); + +The driver controlling "foo.0" will then be able to obtain its GPIOs as follows:: + + struct gpio_desc *red, *green, *blue, *power; + + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); + + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); + +Since the "led" GPIOs are mapped as active-high, this example will switch their +signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped +as active-low, its actual signal will be 0 after this code. Contrary to the +legacy integer GPIO interface, the active-low property is handled during +mapping and is thus transparent to GPIO consumers. + +A set of functions such as gpiod_set_value() is available to work with +the new descriptor-oriented interface. + +Boards using platform data can also hog GPIO lines by defining GPIO hog tables. + +.. code-block:: c + + struct gpiod_hog gpio_hog_table[] = { + GPIO_HOG("gpio.0", 10, "foo", GPIO_ACTIVE_LOW, GPIOD_OUT_HIGH), + { } + }; + +And the table can be added to the board code as follows:: + + gpiod_add_hogs(gpio_hog_table); + +The line will be hogged as soon as the gpiochip is created or - in case the +chip was created earlier - when the hog table is registered. + +Arrays of pins +-------------- +In addition to requesting pins belonging to a function one by one, a device may +also request an array of pins assigned to the function. The way those pins are +mapped to the device determines if the array qualifies for fast bitmap +processing. If yes, a bitmap is passed over get/set array functions directly +between a caller and a respective .get/set_multiple() callback of a GPIO chip. + +In order to qualify for fast bitmap processing, the array must meet the +following requirements: + +- pin hardware number of array member 0 must also be 0, +- pin hardware numbers of consecutive array members which belong to the same + chip as member 0 does must also match their array indexes. + +Otherwise fast bitmap processing path is not used in order to avoid consecutive +pins which belong to the same chip but are not in hardware order being processed +separately. + +If the array applies for fast bitmap processing path, pins which belong to +different chips than member 0 does, as well as those with indexes different from +their hardware pin numbers, are excluded from the fast path, both input and +output. Moreover, open drain and open source pins are excluded from fast bitmap +output processing. diff --git a/Documentation/driver-api/gpio/bt8xxgpio.rst b/Documentation/driver-api/gpio/bt8xxgpio.rst new file mode 100644 index 0000000000..d7e75f1234 --- /dev/null +++ b/Documentation/driver-api/gpio/bt8xxgpio.rst @@ -0,0 +1,62 @@ +=================================================================== +A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio) +=================================================================== + +For advanced documentation, see https://bues.ch/cms/unmaintained/btgpio.html + +A generic digital 24-port PCI GPIO card can be built out of an ordinary +Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The +Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily +find them used for low prices on the net. + +The bt8xx chip does have 24 digital GPIO ports. +These ports are accessible via 24 pins on the SMD chip package. + + +How to physically access the GPIO pins +====================================== + +The are several ways to access these pins. One might unsolder the whole chip +and put it on a custom PCI board, or one might only unsolder each individual +GPIO pin and solder that to some tiny wire. As the chip package really is tiny +there are some advanced soldering skills needed in any case. + +The physical pinouts are drawn in the following ASCII art. +The GPIO pins are marked with G00-G23:: + + G G G G G G G G G G G G G G G G G G + 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 + | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + --------------------------------------------------------------------------- + --| ^ ^ |-- + --| pin 86 pin 67 |-- + --| |-- + --| pin 61 > |-- G18 + --| |-- G19 + --| |-- G20 + --| |-- G21 + --| |-- G22 + --| pin 56 > |-- G23 + --| |-- + --| Brooktree 878/879 |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| O |-- + --| |-- + --------------------------------------------------------------------------- + | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + ^ + This is pin 1 + diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst new file mode 100644 index 0000000000..de6fc79ad6 --- /dev/null +++ b/Documentation/driver-api/gpio/consumer.rst @@ -0,0 +1,468 @@ +================================== +GPIO Descriptor Consumer Interface +================================== + +This document describes the consumer interface of the GPIO framework. Note that +it describes the new descriptor-based interface. For a description of the +deprecated integer-based GPIO interface please refer to legacy.rst. + + +Guidelines for GPIOs consumers +============================== + +Drivers that can't work without standard GPIO calls should have Kconfig entries +that depend on GPIOLIB or select GPIOLIB. The functions that allow a driver to +obtain and use GPIOs are available by including the following file:: + + #include <linux/gpio/consumer.h> + +There are static inline stubs for all functions in the header file in the case +where GPIOLIB is disabled. When these stubs are called they will emit +warnings. These stubs are used for two use cases: + +- Simple compile coverage with e.g. COMPILE_TEST - it does not matter that + the current platform does not enable or select GPIOLIB because we are not + going to execute the system anyway. + +- Truly optional GPIOLIB support - where the driver does not really make use + of the GPIOs on certain compile-time configurations for certain systems, but + will use it under other compile-time configurations. In this case the + consumer must make sure not to call into these functions, or the user will + be met with console warnings that may be perceived as intimidating. + +All the functions that work with the descriptor-based GPIO interface are +prefixed with ``gpiod_``. The ``gpio_`` prefix is used for the legacy +interface. No other function in the kernel should use these prefixes. The use +of the legacy functions is strongly discouraged, new code should use +<linux/gpio/consumer.h> and descriptors exclusively. + + +Obtaining and Disposing GPIOs +============================= + +With the descriptor-based interface, GPIOs are identified with an opaque, +non-forgeable handler that must be obtained through a call to one of the +gpiod_get() functions. Like many other kernel subsystems, gpiod_get() takes the +device that will use the GPIO and the function the requested GPIO is supposed to +fulfill:: + + struct gpio_desc *gpiod_get(struct device *dev, const char *con_id, + enum gpiod_flags flags) + +If a function is implemented by using several GPIOs together (e.g. a simple LED +device that displays digits), an additional index argument can be specified:: + + struct gpio_desc *gpiod_get_index(struct device *dev, + const char *con_id, unsigned int idx, + enum gpiod_flags flags) + +For a more detailed description of the con_id parameter in the DeviceTree case +see Documentation/driver-api/gpio/board.rst + +The flags parameter is used to optionally specify a direction and initial value +for the GPIO. Values can be: + +* GPIOD_ASIS or 0 to not initialize the GPIO at all. The direction must be set + later with one of the dedicated functions. +* GPIOD_IN to initialize the GPIO as input. +* GPIOD_OUT_LOW to initialize the GPIO as output with a value of 0. +* GPIOD_OUT_HIGH to initialize the GPIO as output with a value of 1. +* GPIOD_OUT_LOW_OPEN_DRAIN same as GPIOD_OUT_LOW but also enforce the line + to be electrically used with open drain. +* GPIOD_OUT_HIGH_OPEN_DRAIN same as GPIOD_OUT_HIGH but also enforce the line + to be electrically used with open drain. + +Note that the initial value is *logical* and the physical line level depends on +whether the line is configured active high or active low (see +:ref:`active_low_semantics`). + +The two last flags are used for use cases where open drain is mandatory, such +as I2C: if the line is not already configured as open drain in the mappings +(see board.rst), then open drain will be enforced anyway and a warning will be +printed that the board configuration needs to be updated to match the use case. + +Both functions return either a valid GPIO descriptor, or an error code checkable +with IS_ERR() (they will never return a NULL pointer). -ENOENT will be returned +if and only if no GPIO has been assigned to the device/function/index triplet, +other error codes are used for cases where a GPIO has been assigned but an error +occurred while trying to acquire it. This is useful to discriminate between mere +errors and an absence of GPIO for optional GPIO parameters. For the common +pattern where a GPIO is optional, the gpiod_get_optional() and +gpiod_get_index_optional() functions can be used. These functions return NULL +instead of -ENOENT if no GPIO has been assigned to the requested function:: + + struct gpio_desc *gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + + struct gpio_desc *gpiod_get_index_optional(struct device *dev, + const char *con_id, + unsigned int index, + enum gpiod_flags flags) + +Note that gpio_get*_optional() functions (and their managed variants), unlike +the rest of gpiolib API, also return NULL when gpiolib support is disabled. +This is helpful to driver authors, since they do not need to special case +-ENOSYS return codes. System integrators should however be careful to enable +gpiolib on systems that need it. + +For a function using multiple GPIOs all of those can be obtained with one call:: + + struct gpio_descs *gpiod_get_array(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + +This function returns a struct gpio_descs which contains an array of +descriptors. It also contains a pointer to a gpiolib private structure which, +if passed back to get/set array functions, may speed up I/O processing:: + + struct gpio_descs { + struct gpio_array *info; + unsigned int ndescs; + struct gpio_desc *desc[]; + } + +The following function returns NULL instead of -ENOENT if no GPIOs have been +assigned to the requested function:: + + struct gpio_descs *gpiod_get_array_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + +Device-managed variants of these functions are also defined:: + + struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id, + enum gpiod_flags flags) + + struct gpio_desc *devm_gpiod_get_index(struct device *dev, + const char *con_id, + unsigned int idx, + enum gpiod_flags flags) + + struct gpio_desc *devm_gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + + struct gpio_desc *devm_gpiod_get_index_optional(struct device *dev, + const char *con_id, + unsigned int index, + enum gpiod_flags flags) + + struct gpio_descs *devm_gpiod_get_array(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + + struct gpio_descs *devm_gpiod_get_array_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + +A GPIO descriptor can be disposed of using the gpiod_put() function:: + + void gpiod_put(struct gpio_desc *desc) + +For an array of GPIOs this function can be used:: + + void gpiod_put_array(struct gpio_descs *descs) + +It is strictly forbidden to use a descriptor after calling these functions. +It is also not allowed to individually release descriptors (using gpiod_put()) +from an array acquired with gpiod_get_array(). + +The device-managed variants are, unsurprisingly:: + + void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) + + void devm_gpiod_put_array(struct device *dev, struct gpio_descs *descs) + + +Using GPIOs +=========== + +Setting Direction +----------------- +The first thing a driver must do with a GPIO is setting its direction. If no +direction-setting flags have been given to gpiod_get*(), this is done by +invoking one of the gpiod_direction_*() functions:: + + int gpiod_direction_input(struct gpio_desc *desc) + int gpiod_direction_output(struct gpio_desc *desc, int value) + +The return value is zero for success, else a negative errno. It should be +checked, since the get/set calls don't return errors and since misconfiguration +is possible. You should normally issue these calls from a task context. However, +for spinlock-safe GPIOs it is OK to use them before tasking is enabled, as part +of early board setup. + +For output GPIOs, the value provided becomes the initial output value. This +helps avoid signal glitching during system startup. + +A driver can also query the current direction of a GPIO:: + + int gpiod_get_direction(const struct gpio_desc *desc) + +This function returns 0 for output, 1 for input, or an error code in case of error. + +Be aware that there is no default direction for GPIOs. Therefore, **using a GPIO +without setting its direction first is illegal and will result in undefined +behavior!** + + +Spinlock-Safe GPIO Access +------------------------- +Most GPIO controllers can be accessed with memory read/write instructions. Those +don't need to sleep, and can safely be done from inside hard (non-threaded) IRQ +handlers and similar contexts. + +Use the following calls to access GPIOs from an atomic context:: + + int gpiod_get_value(const struct gpio_desc *desc); + void gpiod_set_value(struct gpio_desc *desc, int value); + +The values are boolean, zero for low, nonzero for high. When reading the value +of an output pin, the value returned should be what's seen on the pin. That +won't always match the specified output value, because of issues including +open-drain signaling and output latencies. + +The get/set calls do not return errors because "invalid GPIO" should have been +reported earlier from gpiod_direction_*(). However, note that not all platforms +can read the value of output pins; those that can't should always return zero. +Also, using these calls for GPIOs that can't safely be accessed without sleeping +(see below) is an error. + + +GPIO Access That May Sleep +-------------------------- +Some GPIO controllers must be accessed using message based buses like I2C or +SPI. Commands to read or write those GPIO values require waiting to get to the +head of a queue to transmit a command and get its response. This requires +sleeping, which can't be done from inside IRQ handlers. + +Platforms that support this type of GPIO distinguish them from other GPIOs by +returning nonzero from this call:: + + int gpiod_cansleep(const struct gpio_desc *desc) + +To access such GPIOs, a different set of accessors is defined:: + + int gpiod_get_value_cansleep(const struct gpio_desc *desc) + void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) + +Accessing such GPIOs requires a context which may sleep, for example a threaded +IRQ handler, and those accessors must be used instead of spinlock-safe +accessors without the cansleep() name suffix. + +Other than the fact that these accessors might sleep, and will work on GPIOs +that can't be accessed from hardIRQ handlers, these calls act the same as the +spinlock-safe calls. + + +.. _active_low_semantics: + +The active low and open drain semantics +--------------------------------------- +As a consumer should not have to care about the physical line level, all of the +gpiod_set_value_xxx() or gpiod_set_array_value_xxx() functions operate with +the *logical* value. With this they take the active low property into account. +This means that they check whether the GPIO is configured to be active low, +and if so, they manipulate the passed value before the physical line level is +driven. + +The same is applicable for open drain or open source output lines: those do not +actively drive their output high (open drain) or low (open source), they just +switch their output to a high impedance value. The consumer should not need to +care. (For details read about open drain in driver.rst.) + +With this, all the gpiod_set_(array)_value_xxx() functions interpret the +parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line +level will be driven accordingly. + +As an example, if the active low property for a dedicated GPIO is set, and the +gpiod_set_(array)_value_xxx() passes "asserted" ("1"), the physical line level +will be driven low. + +To summarize:: + + Function (example) line property physical line + gpiod_set_raw_value(desc, 0); don't care low + gpiod_set_raw_value(desc, 1); don't care high + gpiod_set_value(desc, 0); default (active high) low + gpiod_set_value(desc, 1); default (active high) high + gpiod_set_value(desc, 0); active low high + gpiod_set_value(desc, 1); active low low + gpiod_set_value(desc, 0); open drain low + gpiod_set_value(desc, 1); open drain high impedance + gpiod_set_value(desc, 0); open source high impedance + gpiod_set_value(desc, 1); open source high + +It is possible to override these semantics using the set_raw/get_raw functions +but it should be avoided as much as possible, especially by system-agnostic drivers +which should not need to care about the actual physical line level and worry about +the logical value instead. + + +Accessing raw GPIO values +------------------------- +Consumers exist that need to manage the logical state of a GPIO line, i.e. the value +their device will actually receive, no matter what lies between it and the GPIO +line. + +The following set of calls ignore the active-low or open drain property of a GPIO and +work on the raw line value:: + + int gpiod_get_raw_value(const struct gpio_desc *desc) + void gpiod_set_raw_value(struct gpio_desc *desc, int value) + int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) + void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) + int gpiod_direction_output_raw(struct gpio_desc *desc, int value) + +The active low state of a GPIO can also be queried and toggled using the +following calls:: + + int gpiod_is_active_low(const struct gpio_desc *desc) + void gpiod_toggle_active_low(struct gpio_desc *desc) + +Note that these functions should only be used with great moderation; a driver +should not have to care about the physical line level or open drain semantics. + + +Access multiple GPIOs with a single function call +------------------------------------------------- +The following functions get or set the values of an array of GPIOs:: + + int gpiod_get_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + int gpiod_get_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + int gpiod_get_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + int gpiod_get_raw_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + + int gpiod_set_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + int gpiod_set_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + int gpiod_set_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + int gpiod_set_raw_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + +The array can be an arbitrary set of GPIOs. The functions will try to access +GPIOs belonging to the same bank or chip simultaneously if supported by the +corresponding chip driver. In that case a significantly improved performance +can be expected. If simultaneous access is not possible the GPIOs will be +accessed sequentially. + +The functions take four arguments: + + * array_size - the number of array elements + * desc_array - an array of GPIO descriptors + * array_info - optional information obtained from gpiod_get_array() + * value_bitmap - a bitmap to store the GPIOs' values (get) or + a bitmap of values to assign to the GPIOs (set) + +The descriptor array can be obtained using the gpiod_get_array() function +or one of its variants. If the group of descriptors returned by that function +matches the desired group of GPIOs, those GPIOs can be accessed by simply using +the struct gpio_descs returned by gpiod_get_array():: + + struct gpio_descs *my_gpio_descs = gpiod_get_array(...); + gpiod_set_array_value(my_gpio_descs->ndescs, my_gpio_descs->desc, + my_gpio_descs->info, my_gpio_value_bitmap); + +It is also possible to access a completely arbitrary array of descriptors. The +descriptors may be obtained using any combination of gpiod_get() and +gpiod_get_array(). Afterwards the array of descriptors has to be setup +manually before it can be passed to one of the above functions. In that case, +array_info should be set to NULL. + +Note that for optimal performance GPIOs belonging to the same chip should be +contiguous within the array of descriptors. + +Still better performance may be achieved if array indexes of the descriptors +match hardware pin numbers of a single chip. If an array passed to a get/set +array function matches the one obtained from gpiod_get_array() and array_info +associated with the array is also passed, the function may take a fast bitmap +processing path, passing the value_bitmap argument directly to the respective +.get/set_multiple() callback of the chip. That allows for utilization of GPIO +banks as data I/O ports without much loss of performance. + +The return value of gpiod_get_array_value() and its variants is 0 on success +or negative on error. Note the difference to gpiod_get_value(), which returns +0 or 1 on success to convey the GPIO value. With the array functions, the GPIO +values are stored in value_array rather than passed back as return value. + + +GPIOs mapped to IRQs +-------------------- +GPIO lines can quite often be used as IRQs. You can get the IRQ number +corresponding to a given GPIO using the following call:: + + int gpiod_to_irq(const struct gpio_desc *desc) + +It will return an IRQ number, or a negative errno code if the mapping can't be +done (most likely because that particular GPIO cannot be used as IRQ). It is an +unchecked error to use a GPIO that wasn't set up as an input using +gpiod_direction_input(), or to use an IRQ number that didn't originally come +from gpiod_to_irq(). gpiod_to_irq() is not allowed to sleep. + +Non-error values returned from gpiod_to_irq() can be passed to request_irq() or +free_irq(). They will often be stored into IRQ resources for platform devices, +by the board-specific initialization code. Note that IRQ trigger options are +part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are system wakeup +capabilities. + + +GPIOs and ACPI +============== + +On ACPI systems, GPIOs are described by GpioIo()/GpioInt() resources listed by +the _CRS configuration objects of devices. Those resources do not provide +connection IDs (names) for GPIOs, so it is necessary to use an additional +mechanism for this purpose. + +Systems compliant with ACPI 5.1 or newer may provide a _DSD configuration object +which, among other things, may be used to provide connection IDs for specific +GPIOs described by the GpioIo()/GpioInt() resources in _CRS. If that is the +case, it will be handled by the GPIO subsystem automatically. However, if the +_DSD is not present, the mappings between GpioIo()/GpioInt() resources and GPIO +connection IDs need to be provided by device drivers. + +For details refer to Documentation/firmware-guide/acpi/gpio-properties.rst + + +Interacting With the Legacy GPIO Subsystem +========================================== +Many kernel subsystems and drivers still handle GPIOs using the legacy +integer-based interface. It is strongly recommended to update these to the new +gpiod interface. For cases where both interfaces need to be used, the following +two functions allow to convert a GPIO descriptor into the GPIO integer namespace +and vice-versa:: + + int desc_to_gpio(const struct gpio_desc *desc) + struct gpio_desc *gpio_to_desc(unsigned gpio) + +The GPIO number returned by desc_to_gpio() can safely be used as a parameter of +the gpio\_*() functions for as long as the GPIO descriptor `desc` is not freed. +All the same, a GPIO number passed to gpio_to_desc() must first be properly +acquired using e.g. gpio_request_one(), and the returned GPIO descriptor is only +considered valid until that GPIO number is released using gpio_free(). + +Freeing a GPIO obtained by one API with the other API is forbidden and an +unchecked error. diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst new file mode 100644 index 0000000000..bf6319cc53 --- /dev/null +++ b/Documentation/driver-api/gpio/driver.rst @@ -0,0 +1,778 @@ +===================== +GPIO Driver Interface +===================== + +This document serves as a guide for writers of GPIO chip drivers. + +Each GPIO controller driver needs to include the following header, which defines +the structures used to define a GPIO driver:: + + #include <linux/gpio/driver.h> + + +Internal Representation of GPIOs +================================ + +A GPIO chip handles one or more GPIO lines. To be considered a GPIO chip, the +lines must conform to the definition: General Purpose Input/Output. If the +line is not general purpose, it is not GPIO and should not be handled by a +GPIO chip. The use case is the indicative: certain lines in a system may be +called GPIO but serve a very particular purpose thus not meeting the criteria +of a general purpose I/O. On the other hand a LED driver line may be used as a +GPIO and should therefore still be handled by a GPIO chip driver. + +Inside a GPIO driver, individual GPIO lines are identified by their hardware +number, sometime also referred to as ``offset``, which is a unique number +between 0 and n-1, n being the number of GPIOs managed by the chip. + +The hardware GPIO number should be something intuitive to the hardware, for +example if a system uses a memory-mapped set of I/O-registers where 32 GPIO +lines are handled by one bit per line in a 32-bit register, it makes sense to +use hardware offsets 0..31 for these, corresponding to bits 0..31 in the +register. + +This number is purely internal: the hardware number of a particular GPIO +line is never made visible outside of the driver. + +On top of this internal number, each GPIO line also needs to have a global +number in the integer GPIO namespace so that it can be used with the legacy GPIO +interface. Each chip must thus have a "base" number (which can be automatically +assigned), and for each GPIO line the global number will be (base + hardware +number). Although the integer representation is considered deprecated, it still +has many users and thus needs to be maintained. + +So for example one platform could use global numbers 32-159 for GPIOs, with a +controller defining 128 GPIOs at a "base" of 32 ; while another platform uses +global numbers 0..63 with one set of GPIO controllers, 64-79 with another type +of GPIO controller, and on one particular board 80-95 with an FPGA. The legacy +numbers need not be contiguous; either of those platforms could also use numbers +2000-2063 to identify GPIO lines in a bank of I2C GPIO expanders. + + +Controller Drivers: gpio_chip +============================= + +In the gpiolib framework each GPIO controller is packaged as a "struct +gpio_chip" (see <linux/gpio/driver.h> for its complete definition) with members +common to each controller of that type, these should be assigned by the +driver code: + + - methods to establish GPIO line direction + - methods used to access GPIO line values + - method to set electrical configuration for a given GPIO line + - method to return the IRQ number associated to a given GPIO line + - flag saying whether calls to its methods may sleep + - optional line names array to identify lines + - optional debugfs dump method (showing extra state information) + - optional base number (will be automatically assigned if omitted) + - optional label for diagnostics and GPIO chip mapping using platform data + +The code implementing a gpio_chip should support multiple instances of the +controller, preferably using the driver model. That code will configure each +gpio_chip and issue gpiochip_add(), gpiochip_add_data(), or +devm_gpiochip_add_data(). Removing a GPIO controller should be rare; use +gpiochip_remove() when it is unavoidable. + +Often a gpio_chip is part of an instance-specific structure with states not +exposed by the GPIO interfaces, such as addressing, power management, and more. +Chips such as audio codecs will have complex non-GPIO states. + +Any debugfs dump method should normally ignore lines which haven't been +requested. They can use gpiochip_is_requested(), which returns either +NULL or the label associated with that GPIO line when it was requested. + +Realtime considerations: the GPIO driver should not use spinlock_t or any +sleepable APIs (like PM runtime) in its gpio_chip implementation (.get/.set +and direction control callbacks) if it is expected to call GPIO APIs from +atomic context on realtime kernels (inside hard IRQ handlers and similar +contexts). Normally this should not be required. + + +GPIO electrical configuration +----------------------------- + +GPIO lines can be configured for several electrical modes of operation by using +the .set_config() callback. Currently this API supports setting: + +- Debouncing +- Single-ended modes (open drain/open source) +- Pull up and pull down resistor enablement + +These settings are described below. + +The .set_config() callback uses the same enumerators and configuration +semantics as the generic pin control drivers. This is not a coincidence: it is +possible to assign the .set_config() to the function gpiochip_generic_config() +which will result in pinctrl_gpio_set_config() being called and eventually +ending up in the pin control back-end "behind" the GPIO controller, usually +closer to the actual pins. This way the pin controller can manage the below +listed GPIO configurations. + +If a pin controller back-end is used, the GPIO controller or hardware +description needs to provide "GPIO ranges" mapping the GPIO line offsets to pin +numbers on the pin controller so they can properly cross-reference each other. + + +GPIO lines with debounce support +-------------------------------- + +Debouncing is a configuration set to a pin indicating that it is connected to +a mechanical switch or button, or similar that may bounce. Bouncing means the +line is pulled high/low quickly at very short intervals for mechanical +reasons. This can result in the value being unstable or irqs firing repeatedly +unless the line is debounced. + +Debouncing in practice involves setting up a timer when something happens on +the line, wait a little while and then sample the line again, so see if it +still has the same value (low or high). This could also be repeated by a clever +state machine, waiting for a line to become stable. In either case, it sets +a certain number of milliseconds for debouncing, or just "on/off" if that time +is not configurable. + + +GPIO lines with open drain/source support +----------------------------------------- + +Open drain (CMOS) or open collector (TTL) means the line is not actively driven +high: instead you provide the drain/collector as output, so when the transistor +is not open, it will present a high-impedance (tristate) to the external rail:: + + + CMOS CONFIGURATION TTL CONFIGURATION + + ||--- out +--- out + in ----|| |/ + ||--+ in ----| + | |\ + GND GND + +This configuration is normally used as a way to achieve one of two things: + +- Level-shifting: to reach a logical level higher than that of the silicon + where the output resides. + +- Inverse wire-OR on an I/O line, for example a GPIO line, making it possible + for any driving stage on the line to drive it low even if any other output + to the same line is simultaneously driving it high. A special case of this + is driving the SCL and SDA lines of an I2C bus, which is by definition a + wire-OR bus. + +Both use cases require that the line be equipped with a pull-up resistor. This +resistor will make the line tend to high level unless one of the transistors on +the rail actively pulls it down. + +The level on the line will go as high as the VDD on the pull-up resistor, which +may be higher than the level supported by the transistor, achieving a +level-shift to the higher VDD. + +Integrated electronics often have an output driver stage in the form of a CMOS +"totem-pole" with one N-MOS and one P-MOS transistor where one of them drives +the line high and one of them drives the line low. This is called a push-pull +output. The "totem-pole" looks like so:: + + VDD + | + OD ||--+ + +--/ ---o|| P-MOS-FET + | ||--+ + IN --+ +----- out + | ||--+ + +--/ ----|| N-MOS-FET + OS ||--+ + | + GND + +The desired output signal (e.g. coming directly from some GPIO output register) +arrives at IN. The switches named "OD" and "OS" are normally closed, creating +a push-pull circuit. + +Consider the little "switches" named "OD" and "OS" that enable/disable the +P-MOS or N-MOS transistor right after the split of the input. As you can see, +either transistor will go totally numb if this switch is open. The totem-pole +is then halved and give high impedance instead of actively driving the line +high or low respectively. That is usually how software-controlled open +drain/source works. + +Some GPIO hardware come in open drain / open source configuration. Some are +hard-wired lines that will only support open drain or open source no matter +what: there is only one transistor there. Some are software-configurable: +by flipping a bit in a register the output can be configured as open drain +or open source, in practice by flicking open the switches labeled "OD" and "OS" +in the drawing above. + +By disabling the P-MOS transistor, the output can be driven between GND and +high impedance (open drain), and by disabling the N-MOS transistor, the output +can be driven between VDD and high impedance (open source). In the first case, +a pull-up resistor is needed on the outgoing rail to complete the circuit, and +in the second case, a pull-down resistor is needed on the rail. + +Hardware that supports open drain or open source or both, can implement a +special callback in the gpio_chip: .set_config() that takes a generic +pinconf packed value telling whether to configure the line as open drain, +open source or push-pull. This will happen in response to the +GPIO_OPEN_DRAIN or GPIO_OPEN_SOURCE flag set in the machine file, or coming +from other hardware descriptions. + +If this state can not be configured in hardware, i.e. if the GPIO hardware does +not support open drain/open source in hardware, the GPIO library will instead +use a trick: when a line is set as output, if the line is flagged as open +drain, and the IN output value is low, it will be driven low as usual. But +if the IN output value is set to high, it will instead *NOT* be driven high, +instead it will be switched to input, as input mode is an equivalent to +high impedance, thus achieving an "open drain emulation" of sorts: electrically +the behaviour will be identical, with the exception of possible hardware glitches +when switching the mode of the line. + +For open source configuration the same principle is used, just that instead +of actively driving the line low, it is set to input. + + +GPIO lines with pull up/down resistor support +--------------------------------------------- + +A GPIO line can support pull-up/down using the .set_config() callback. This +means that a pull up or pull-down resistor is available on the output of the +GPIO line, and this resistor is software controlled. + +In discrete designs, a pull-up or pull-down resistor is simply soldered on +the circuit board. This is not something we deal with or model in software. The +most you will think about these lines is that they will very likely be +configured as open drain or open source (see the section above). + +The .set_config() callback can only turn pull up or down on and off, and will +no have any semantic knowledge about the resistance used. It will only say +switch a bit in a register enabling or disabling pull-up or pull-down. + +If the GPIO line supports shunting in different resistance values for the +pull-up or pull-down resistor, the GPIO chip callback .set_config() will not +suffice. For these complex use cases, a combined GPIO chip and pin controller +need to be implemented, as the pin config interface of a pin controller +supports more versatile control over electrical properties and can handle +different pull-up or pull-down resistance values. + + +GPIO drivers providing IRQs +=========================== + +It is custom that GPIO drivers (GPIO chips) are also providing interrupts, +most often cascaded off a parent interrupt controller, and in some special +cases the GPIO logic is melded with a SoC's primary interrupt controller. + +The IRQ portions of the GPIO block are implemented using an irq_chip, using +the header <linux/irq.h>. So this combined driver is utilizing two sub- +systems simultaneously: gpio and irq. + +It is legal for any IRQ consumer to request an IRQ from any irqchip even if it +is a combined GPIO+IRQ driver. The basic premise is that gpio_chip and +irq_chip are orthogonal, and offering their services independent of each +other. + +gpiod_to_irq() is just a convenience function to figure out the IRQ for a +certain GPIO line and should not be relied upon to have been called before +the IRQ is used. + +Always prepare the hardware and make it ready for action in respective +callbacks from the GPIO and irq_chip APIs. Do not rely on gpiod_to_irq() having +been called first. + +We can divide GPIO irqchips in two broad categories: + +- CASCADED INTERRUPT CHIPS: this means that the GPIO chip has one common + interrupt output line, which is triggered by any enabled GPIO line on that + chip. The interrupt output line will then be routed to an parent interrupt + controller one level up, in the most simple case the systems primary + interrupt controller. This is modeled by an irqchip that will inspect bits + inside the GPIO controller to figure out which line fired it. The irqchip + part of the driver needs to inspect registers to figure this out and it + will likely also need to acknowledge that it is handling the interrupt + by clearing some bit (sometime implicitly, by just reading a status + register) and it will often need to set up the configuration such as + edge sensitivity (rising or falling edge, or high/low level interrupt for + example). + +- HIERARCHICAL INTERRUPT CHIPS: this means that each GPIO line has a dedicated + irq line to a parent interrupt controller one level up. There is no need + to inquire the GPIO hardware to figure out which line has fired, but it + may still be necessary to acknowledge the interrupt and set up configuration + such as edge sensitivity. + +Realtime considerations: a realtime compliant GPIO driver should not use +spinlock_t or any sleepable APIs (like PM runtime) as part of its irqchip +implementation. + +- spinlock_t should be replaced with raw_spinlock_t.[1] +- If sleepable APIs have to be used, these can be done from the .irq_bus_lock() + and .irq_bus_unlock() callbacks, as these are the only slowpath callbacks + on an irqchip. Create the callbacks if needed.[2] + + +Cascaded GPIO irqchips +---------------------- + +Cascaded GPIO irqchips usually fall in one of three categories: + +- CHAINED CASCADED GPIO IRQCHIPS: these are usually the type that is embedded on + an SoC. This means that there is a fast IRQ flow handler for the GPIOs that + gets called in a chain from the parent IRQ handler, most typically the + system interrupt controller. This means that the GPIO irqchip handler will + be called immediately from the parent irqchip, while holding the IRQs + disabled. The GPIO irqchip will then end up calling something like this + sequence in its interrupt handler:: + + static irqreturn_t foo_gpio_irq(int irq, void *data) + chained_irq_enter(...); + generic_handle_irq(...); + chained_irq_exit(...); + + Chained GPIO irqchips typically can NOT set the .can_sleep flag on + struct gpio_chip, as everything happens directly in the callbacks: no + slow bus traffic like I2C can be used. + + Realtime considerations: Note that chained IRQ handlers will not be forced + threaded on -RT. As a result, spinlock_t or any sleepable APIs (like PM + runtime) can't be used in a chained IRQ handler. + + If required (and if it can't be converted to the nested threaded GPIO irqchip, + see below) a chained IRQ handler can be converted to generic irq handler and + this way it will become a threaded IRQ handler on -RT and a hard IRQ handler + on non-RT (for example, see [3]). + + The generic_handle_irq() is expected to be called with IRQ disabled, + so the IRQ core will complain if it is called from an IRQ handler which is + forced to a thread. The "fake?" raw lock can be used to work around this + problem:: + + raw_spinlock_t wa_lock; + static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) + unsigned long wa_lock_flags; + raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags); + generic_handle_irq(irq_find_mapping(bank->chip.irq.domain, bit)); + raw_spin_unlock_irqrestore(&bank->wa_lock, wa_lock_flags); + +- GENERIC CHAINED GPIO IRQCHIPS: these are the same as "CHAINED GPIO irqchips", + but chained IRQ handlers are not used. Instead GPIO IRQs dispatching is + performed by generic IRQ handler which is configured using request_irq(). + The GPIO irqchip will then end up calling something like this sequence in + its interrupt handler:: + + static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id) + for each detected GPIO IRQ + generic_handle_irq(...); + + Realtime considerations: this kind of handlers will be forced threaded on -RT, + and as result the IRQ core will complain that generic_handle_irq() is called + with IRQ enabled and the same work-around as for "CHAINED GPIO irqchips" can + be applied. + +- NESTED THREADED GPIO IRQCHIPS: these are off-chip GPIO expanders and any + other GPIO irqchip residing on the other side of a sleeping bus such as I2C + or SPI. + + Of course such drivers that need slow bus traffic to read out IRQ status and + similar, traffic which may in turn incur other IRQs to happen, cannot be + handled in a quick IRQ handler with IRQs disabled. Instead they need to spawn + a thread and then mask the parent IRQ line until the interrupt is handled + by the driver. The hallmark of this driver is to call something like + this in its interrupt handler:: + + static irqreturn_t foo_gpio_irq(int irq, void *data) + ... + handle_nested_irq(irq); + + The hallmark of threaded GPIO irqchips is that they set the .can_sleep + flag on struct gpio_chip to true, indicating that this chip may sleep + when accessing the GPIOs. + + These kinds of irqchips are inherently realtime tolerant as they are + already set up to handle sleeping contexts. + + +Infrastructure helpers for GPIO irqchips +---------------------------------------- + +To help out in handling the set-up and management of GPIO irqchips and the +associated irqdomain and resource allocation callbacks. These are activated +by selecting the Kconfig symbol GPIOLIB_IRQCHIP. If the symbol +IRQ_DOMAIN_HIERARCHY is also selected, hierarchical helpers will also be +provided. A big portion of overhead code will be managed by gpiolib, +under the assumption that your interrupts are 1-to-1-mapped to the +GPIO line index: + +.. csv-table:: + :header: GPIO line offset, Hardware IRQ + + 0,0 + 1,1 + 2,2 + ...,... + ngpio-1, ngpio-1 + + +If some GPIO lines do not have corresponding IRQs, the bitmask valid_mask +and the flag need_valid_mask in gpio_irq_chip can be used to mask off some +lines as invalid for associating with IRQs. + +The preferred way to set up the helpers is to fill in the +struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip. +If you do this, the additional irq_chip will be set up by gpiolib at the +same time as setting up the rest of the GPIO functionality. The following +is a typical example of a chained cascaded interrupt handler using +the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions +call into the core gpiolib code: + +.. code-block:: c + + /* Typical state container */ + struct my_gpio { + struct gpio_chip gc; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, hwirq); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + gpiochip_enable_irq(gc, hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, + }; + + int irq; /* from platform etc */ + struct my_gpio *g; + struct gpio_irq_chip *girq; + + /* Get a pointer to the gpio_irq_chip */ + girq = &g->gc.irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); + girq->parent_handler = ftgpio_gpio_irq_handler; + girq->num_parents = 1; + girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents), + GFP_KERNEL); + if (!girq->parents) + return -ENOMEM; + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + girq->parents[0] = irq; + + return devm_gpiochip_add_data(dev, &g->gc, g); + +The helper supports using threaded interrupts as well. Then you just request +the interrupt separately and go with it: + +.. code-block:: c + + /* Typical state container */ + struct my_gpio { + struct gpio_chip gc; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, hwirq); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + gpiochip_enable_irq(gc, hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, + }; + + int irq; /* from platform etc */ + struct my_gpio *g; + struct gpio_irq_chip *girq; + + ret = devm_request_threaded_irq(dev, irq, NULL, + irq_thread_fn, IRQF_ONESHOT, "my-chip", g); + if (ret < 0) + return ret; + + /* Get a pointer to the gpio_irq_chip */ + girq = &g->gc.irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); + /* This will let us handle the parent IRQ in the driver */ + girq->parent_handler = NULL; + girq->num_parents = 0; + girq->parents = NULL; + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + + return devm_gpiochip_add_data(dev, &g->gc, g); + +The helper supports using hierarchical interrupt controllers as well. +In this case the typical set-up will look like this: + +.. code-block:: c + + /* Typical state container with dynamic irqchip */ + struct my_gpio { + struct gpio_chip gc; + struct fwnode_handle *fwnode; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, hwirq); + irq_mask_mask_parent(d); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + gpiochip_enable_irq(gc, hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + + irq_mask_unmask_parent(d); + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, + }; + + struct my_gpio *g; + struct gpio_irq_chip *girq; + + /* Get a pointer to the gpio_irq_chip */ + girq = &g->gc.irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + girq->fwnode = g->fwnode; + girq->parent_domain = parent; + girq->child_to_parent_hwirq = my_gpio_child_to_parent_hwirq; + + return devm_gpiochip_add_data(dev, &g->gc, g); + +As you can see pretty similar, but you do not supply a parent handler for +the IRQ, instead a parent irqdomain, an fwnode for the hardware and +a function .child_to_parent_hwirq() that has the purpose of looking up +the parent hardware irq from a child (i.e. this gpio chip) hardware irq. +As always it is good to look at examples in the kernel tree for advice +on how to find the required pieces. + +If there is a need to exclude certain GPIO lines from the IRQ domain handled by +these helpers, we can set .irq.need_valid_mask of the gpiochip before +devm_gpiochip_add_data() or gpiochip_add_data() is called. This allocates an +.irq.valid_mask with as many bits set as there are GPIO lines in the chip, each +bit representing line 0..n-1. Drivers can exclude GPIO lines by clearing bits +from this mask. The mask can be filled in the init_valid_mask() callback +that is part of the struct gpio_irq_chip. + +To use the helpers please keep the following in mind: + +- Make sure to assign all relevant members of the struct gpio_chip so that + the irqchip can initialize. E.g. .dev and .can_sleep shall be set up + properly. + +- Nominally set gpio_irq_chip.handler to handle_bad_irq. Then, if your irqchip + is cascaded, set the handler to handle_level_irq() and/or handle_edge_irq() + in the irqchip .set_type() callback depending on what your controller + supports and what is requested by the consumer. + + +Locking IRQ usage +----------------- + +Since GPIO and irq_chip are orthogonal, we can get conflicts between different +use cases. For example a GPIO line used for IRQs should be an input line, +it does not make sense to fire interrupts on an output GPIO. + +If there is competition inside the subsystem which side is using the +resource (a certain GPIO line and register for example) it needs to deny +certain operations and keep track of usage inside of the gpiolib subsystem. + +Input GPIOs can be used as IRQ signals. When this happens, a driver is requested +to mark the GPIO as being used as an IRQ:: + + int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset) + +This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock +is released:: + + void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset) + +When implementing an irqchip inside a GPIO driver, these two functions should +typically be called in the .startup() and .shutdown() callbacks from the +irqchip. + +When using the gpiolib irqchip helpers, these callbacks are automatically +assigned. + + +Disabling and enabling IRQs +--------------------------- + +In some (fringe) use cases, a driver may be using a GPIO line as input for IRQs, +but occasionally switch that line over to drive output and then back to being +an input with interrupts again. This happens on things like CEC (Consumer +Electronics Control). + +When a GPIO is used as an IRQ signal, then gpiolib also needs to know if +the IRQ is enabled or disabled. In order to inform gpiolib about this, +the irqchip driver should call:: + + void gpiochip_disable_irq(struct gpio_chip *chip, unsigned int offset) + +This allows drivers to drive the GPIO as an output while the IRQ is +disabled. When the IRQ is enabled again, a driver should call:: + + void gpiochip_enable_irq(struct gpio_chip *chip, unsigned int offset) + +When implementing an irqchip inside a GPIO driver, these two functions should +typically be called in the .irq_disable() and .irq_enable() callbacks from the +irqchip. + +When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks +are automatically assigned. This behaviour is deprecated and on its way +to be removed from the kernel. + + +Real-Time compliance for GPIO IRQ chips +--------------------------------------- + +Any provider of irqchips needs to be carefully tailored to support Real-Time +preemption. It is desirable that all irqchips in the GPIO subsystem keep this +in mind and do the proper testing to assure they are real time-enabled. + +So, pay attention on above realtime considerations in the documentation. + +The following is a checklist to follow when preparing a driver for real-time +compliance: + +- ensure spinlock_t is not used as part irq_chip implementation +- ensure that sleepable APIs are not used as part irq_chip implementation + If sleepable APIs have to be used, these can be done from the .irq_bus_lock() + and .irq_bus_unlock() callbacks +- Chained GPIO irqchips: ensure spinlock_t or any sleepable APIs are not used + from the chained IRQ handler +- Generic chained GPIO irqchips: take care about generic_handle_irq() calls and + apply corresponding work-around +- Chained GPIO irqchips: get rid of the chained IRQ handler and use generic irq + handler if possible +- regmap_mmio: it is possible to disable internal locking in regmap by setting + .disable_locking and handling the locking in the GPIO driver +- Test your driver with the appropriate in-kernel real-time test cases for both + level and edge IRQs + +* [1] http://www.spinics.net/lists/linux-omap/msg120425.html +* [2] https://lore.kernel.org/r/1443209283-20781-2-git-send-email-grygorii.strashko@ti.com +* [3] https://lore.kernel.org/r/1443209283-20781-3-git-send-email-grygorii.strashko@ti.com + + +Requesting self-owned GPIO pins +=============================== + +Sometimes it is useful to allow a GPIO chip driver to request its own GPIO +descriptors through the gpiolib API. A GPIO driver can use the following +functions to request and free descriptors:: + + struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc, + u16 hwnum, + const char *label, + enum gpiod_flags flags) + + void gpiochip_free_own_desc(struct gpio_desc *desc) + +Descriptors requested with gpiochip_request_own_desc() must be released with +gpiochip_free_own_desc(). + +These functions must be used with care since they do not affect module use +count. Do not use the functions to request gpio descriptors not owned by the +calling driver. diff --git a/Documentation/driver-api/gpio/drivers-on-gpio.rst b/Documentation/driver-api/gpio/drivers-on-gpio.rst new file mode 100644 index 0000000000..af632d764a --- /dev/null +++ b/Documentation/driver-api/gpio/drivers-on-gpio.rst @@ -0,0 +1,114 @@ +============================ +Subsystem drivers using GPIO +============================ + +Note that standard kernel drivers exist for common GPIO tasks and will provide +the right in-kernel and userspace APIs/ABIs for the job, and that these +drivers can quite easily interconnect with other kernel subsystems using +hardware descriptions such as device tree or ACPI: + +- leds-gpio: drivers/leds/leds-gpio.c will handle LEDs connected to GPIO + lines, giving you the LED sysfs interface + +- ledtrig-gpio: drivers/leds/trigger/ledtrig-gpio.c will provide a LED trigger, + i.e. a LED will turn on/off in response to a GPIO line going high or low + (and that LED may in turn use the leds-gpio as per above). + +- gpio-keys: drivers/input/keyboard/gpio_keys.c is used when your GPIO line + can generate interrupts in response to a key press. Also supports debounce. + +- gpio-keys-polled: drivers/input/keyboard/gpio_keys_polled.c is used when your + GPIO line cannot generate interrupts, so it needs to be periodically polled + by a timer. + +- gpio_mouse: drivers/input/mouse/gpio_mouse.c is used to provide a mouse with + up to three buttons by simply using GPIOs and no mouse port. You can cut the + mouse cable and connect the wires to GPIO lines or solder a mouse connector + to the lines for a more permanent solution of this type. + +- gpio-beeper: drivers/input/misc/gpio-beeper.c is used to provide a beep from + an external speaker connected to a GPIO line. + +- extcon-gpio: drivers/extcon/extcon-gpio.c is used when you need to read an + external connector status, such as a headset line for an audio driver or an + HDMI connector. It will provide a better userspace sysfs interface than GPIO. + +- restart-gpio: drivers/power/reset/gpio-restart.c is used to restart/reboot + the system by pulling a GPIO line and will register a restart handler so + userspace can issue the right system call to restart the system. + +- poweroff-gpio: drivers/power/reset/gpio-poweroff.c is used to power the + system down by pulling a GPIO line and will register a pm_power_off() + callback so that userspace can issue the right system call to power down the + system. + +- gpio-gate-clock: drivers/clk/clk-gpio.c is used to control a gated clock + (off/on) that uses a GPIO, and integrated with the clock subsystem. + +- i2c-gpio: drivers/i2c/busses/i2c-gpio.c is used to drive an I2C bus + (two wires, SDA and SCL lines) by hammering (bitbang) two GPIO lines. It will + appear as any other I2C bus to the system and makes it possible to connect + drivers for the I2C devices on the bus like any other I2C bus driver. + +- spi_gpio: drivers/spi/spi-gpio.c is used to drive an SPI bus (variable number + of wires, at least SCK and optionally MISO, MOSI and chip select lines) using + GPIO hammering (bitbang). It will appear as any other SPI bus on the system + and makes it possible to connect drivers for SPI devices on the bus like + any other SPI bus driver. For example any MMC/SD card can then be connected + to this SPI by using the mmc_spi host from the MMC/SD card subsystem. + +- w1-gpio: drivers/w1/masters/w1-gpio.c is used to drive a one-wire bus using + a GPIO line, integrating with the W1 subsystem and handling devices on + the bus like any other W1 device. + +- gpio-fan: drivers/hwmon/gpio-fan.c is used to control a fan for cooling the + system, connected to a GPIO line (and optionally a GPIO alarm line), + presenting all the right in-kernel and sysfs interfaces to make your system + not overheat. + +- gpio-regulator: drivers/regulator/gpio-regulator.c is used to control a + regulator providing a certain voltage by pulling a GPIO line, integrating + with the regulator subsystem and giving you all the right interfaces. + +- gpio-wdt: drivers/watchdog/gpio_wdt.c is used to provide a watchdog timer + that will periodically "ping" a hardware connected to a GPIO line by toggling + it from 1-to-0-to-1. If that hardware does not receive its "ping" + periodically, it will reset the system. + +- gpio-nand: drivers/mtd/nand/raw/gpio.c is used to connect a NAND flash chip + to a set of simple GPIO lines: RDY, NCE, ALE, CLE, NWP. It interacts with the + NAND flash MTD subsystem and provides chip access and partition parsing like + any other NAND driving hardware. + +- ps2-gpio: drivers/input/serio/ps2-gpio.c is used to drive a PS/2 (IBM) serio + bus, data and clock line, by bit banging two GPIO lines. It will appear as + any other serio bus to the system and makes it possible to connect drivers + for e.g. keyboards and other PS/2 protocol based devices. + +- cec-gpio: drivers/media/platform/cec-gpio/ is used to interact with a CEC + Consumer Electronics Control bus using only GPIO. It is used to communicate + with devices on the HDMI bus. + +- gpio-charger: drivers/power/supply/gpio-charger.c is used if you need to do + battery charging and all you have to go by to check the presence of the + AC charger or more complex tasks such as indicating charging status using + nothing but GPIO lines, this driver provides that and also a clearly defined + way to pass the charging parameters from hardware descriptions such as the + device tree. + +- gpio-mux: drivers/mux/gpio.c is used for controlling a multiplexer using + n GPIO lines such that you can mux in 2^n different devices by activating + different GPIO lines. Often the GPIOs are on a SoC and the devices are + some SoC-external entities, such as different components on a PCB that + can be selectively enabled. + +Apart from this there are special GPIO drivers in subsystems like MMC/SD to +read card detect and write protect GPIO lines, and in the TTY serial subsystem +to emulate MCTRL (modem control) signals CTS/RTS by using two GPIO lines. The +MTD NOR flash has add-ons for extra GPIO lines too, though the address bus is +usually connected directly to the flash. + +Use those instead of talking directly to the GPIOs from userspace; they +integrate with kernel frameworks better than your userspace code could. +Needless to say, just using the appropriate kernel drivers will simplify and +speed up your embedded hacking in particular by providing ready-made components. diff --git a/Documentation/driver-api/gpio/index.rst b/Documentation/driver-api/gpio/index.rst new file mode 100644 index 0000000000..1d48fe248f --- /dev/null +++ b/Documentation/driver-api/gpio/index.rst @@ -0,0 +1,50 @@ +=================================== +General Purpose Input/Output (GPIO) +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + using-gpio + driver + consumer + board + drivers-on-gpio + legacy + bt8xxgpio + +Core +==== + +.. kernel-doc:: include/linux/gpio/driver.h + :internal: + +.. kernel-doc:: drivers/gpio/gpiolib.c + :export: + +ACPI support +============ + +.. kernel-doc:: drivers/gpio/gpiolib-acpi.c + :export: + +Device tree support +=================== + +.. kernel-doc:: drivers/gpio/gpiolib-of.c + :export: + +Device-managed API +================== + +.. kernel-doc:: drivers/gpio/gpiolib-devres.c + :export: + +sysfs helpers +============= + +.. kernel-doc:: drivers/gpio/gpiolib-sysfs.c + :export: diff --git a/Documentation/driver-api/gpio/intro.rst b/Documentation/driver-api/gpio/intro.rst new file mode 100644 index 0000000000..c9c19243b9 --- /dev/null +++ b/Documentation/driver-api/gpio/intro.rst @@ -0,0 +1,124 @@ +============ +Introduction +============ + + +GPIO Interfaces +=============== + +The documents in this directory give detailed instructions on how to access +GPIOs in drivers, and how to write a driver for a device that provides GPIOs +itself. + +Due to the history of GPIO interfaces in the kernel, there are two different +ways to obtain and use GPIOs: + + - The descriptor-based interface is the preferred way to manipulate GPIOs, + and is described by all the files in this directory excepted legacy.rst. + - The legacy integer-based interface which is considered deprecated (but still + usable for compatibility reasons) is documented in legacy.rst. + +The remainder of this document applies to the new descriptor-based interface. +legacy.rst contains the same information applied to the legacy +integer-based interface. + + +What is a GPIO? +=============== + +A "General Purpose Input/Output" (GPIO) is a flexible software-controlled +digital signal. They are provided from many kinds of chips, and are familiar +to Linux developers working with embedded and custom hardware. Each GPIO +represents a bit connected to a particular pin, or "ball" on Ball Grid Array +(BGA) packages. Board schematics show which external hardware connects to +which GPIOs. Drivers can be written generically, so that board setup code +passes such pin configuration data to drivers. + +System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every +non-dedicated pin can be configured as a GPIO; and most chips have at least +several dozen of them. Programmable logic devices (like FPGAs) can easily +provide GPIOs; multifunction chips like power managers, and audio codecs +often have a few such pins to help with pin scarcity on SOCs; and there are +also "GPIO Expander" chips that connect using the I2C or SPI serial buses. +Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS +firmware knowing how they're used). + +The exact capabilities of GPIOs vary between systems. Common options: + + - Output values are writable (high=1, low=0). Some chips also have + options about how that value is driven, so that for example only one + value might be driven, supporting "wire-OR" and similar schemes for the + other value (notably, "open drain" signaling). + + - Input values are likewise readable (1, 0). Some chips support readback + of pins configured as "output", which is very useful in such "wire-OR" + cases (to support bidirectional signaling). GPIO controllers may have + input de-glitch/debounce logic, sometimes with software controls. + + - Inputs can often be used as IRQ signals, often edge triggered but + sometimes level triggered. Such IRQs may be configurable as system + wakeup events, to wake the system from a low power state. + + - Usually a GPIO will be configurable as either input or output, as needed + by different product boards; single direction ones exist too. + + - Most GPIOs can be accessed while holding spinlocks, but those accessed + through a serial bus normally can't. Some systems support both types. + +On a given board each GPIO is used for one specific purpose like monitoring +MMC/SD card insertion/removal, detecting card write-protect status, driving +a LED, configuring a transceiver, bit-banging a serial bus, poking a hardware +watchdog, sensing a switch, and so on. + + +Common GPIO Properties +====================== + +These properties are met through all the other documents of the GPIO interface +and it is useful to understand them, especially if you need to define GPIO +mappings. + +Active-High and Active-Low +-------------------------- +It is natural to assume that a GPIO is "active" when its output signal is 1 +("high"), and inactive when it is 0 ("low"). However in practice the signal of a +GPIO may be inverted before is reaches its destination, or a device could decide +to have different conventions about what "active" means. Such decisions should +be transparent to device drivers, therefore it is possible to define a GPIO as +being either active-high ("1" means "active", the default) or active-low ("0" +means "active") so that drivers only need to worry about the logical signal and +not about what happens at the line level. + +Open Drain and Open Source +-------------------------- +Sometimes shared signals need to use "open drain" (where only the low signal +level is actually driven), or "open source" (where only the high signal level is +driven) signaling. That term applies to CMOS transistors; "open collector" is +used for TTL. A pullup or pulldown resistor causes the high or low signal level. +This is sometimes called a "wire-AND"; or more practically, from the negative +logic (low=true) perspective this is a "wire-OR". + +One common example of an open drain signal is a shared active-low IRQ line. +Also, bidirectional data bus signals sometimes use open drain signals. + +Some GPIO controllers directly support open drain and open source outputs; many +don't. When you need open drain signaling but your hardware doesn't directly +support it, there's a common idiom you can use to emulate it with any GPIO pin +that can be used as either an input or an output: + + **LOW**: ``gpiod_direction_output(gpio, 0)`` ... this drives the signal and + overrides the pullup. + + **HIGH**: ``gpiod_direction_input(gpio)`` ... this turns off the output, so + the pullup (or some other device) controls the signal. + +The same logic can be applied to emulate open source signaling, by driving the +high signal and configuring the GPIO as input for low. This open drain/open +source emulation can be handled transparently by the GPIO framework. + +If you are "driving" the signal high but gpiod_get_value(gpio) reports a low +value (after the appropriate rise time passes), you know some other component is +driving the shared signal low. That's not necessarily an error. As one common +example, that's how I2C clocks are stretched: a slave that needs a slower clock +delays the rising edge of SCK, and the I2C master adjusts its signaling rate +accordingly. diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst new file mode 100644 index 0000000000..b650591479 --- /dev/null +++ b/Documentation/driver-api/gpio/legacy.rst @@ -0,0 +1,695 @@ +====================== +Legacy GPIO Interfaces +====================== + +This provides an overview of GPIO access conventions on Linux. + +These calls use the gpio_* naming prefix. No other calls should use that +prefix, or the related __gpio_* prefix. + + +What is a GPIO? +=============== +A "General Purpose Input/Output" (GPIO) is a flexible software-controlled +digital signal. They are provided from many kinds of chip, and are familiar +to Linux developers working with embedded and custom hardware. Each GPIO +represents a bit connected to a particular pin, or "ball" on Ball Grid Array +(BGA) packages. Board schematics show which external hardware connects to +which GPIOs. Drivers can be written generically, so that board setup code +passes such pin configuration data to drivers. + +System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every +non-dedicated pin can be configured as a GPIO; and most chips have at least +several dozen of them. Programmable logic devices (like FPGAs) can easily +provide GPIOs; multifunction chips like power managers, and audio codecs +often have a few such pins to help with pin scarcity on SOCs; and there are +also "GPIO Expander" chips that connect using the I2C or SPI serial busses. +Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS +firmware knowing how they're used). + +The exact capabilities of GPIOs vary between systems. Common options: + + - Output values are writable (high=1, low=0). Some chips also have + options about how that value is driven, so that for example only one + value might be driven ... supporting "wire-OR" and similar schemes + for the other value (notably, "open drain" signaling). + + - Input values are likewise readable (1, 0). Some chips support readback + of pins configured as "output", which is very useful in such "wire-OR" + cases (to support bidirectional signaling). GPIO controllers may have + input de-glitch/debounce logic, sometimes with software controls. + + - Inputs can often be used as IRQ signals, often edge triggered but + sometimes level triggered. Such IRQs may be configurable as system + wakeup events, to wake the system from a low power state. + + - Usually a GPIO will be configurable as either input or output, as needed + by different product boards; single direction ones exist too. + + - Most GPIOs can be accessed while holding spinlocks, but those accessed + through a serial bus normally can't. Some systems support both types. + +On a given board each GPIO is used for one specific purpose like monitoring +MMC/SD card insertion/removal, detecting card writeprotect status, driving +a LED, configuring a transceiver, bitbanging a serial bus, poking a hardware +watchdog, sensing a switch, and so on. + + +GPIO conventions +================ +Note that this is called a "convention" because you don't need to do it this +way, and it's no crime if you don't. There **are** cases where portability +is not the main issue; GPIOs are often used for the kind of board-specific +glue logic that may even change between board revisions, and can't ever be +used on a board that's wired differently. Only least-common-denominator +functionality can be very portable. Other features are platform-specific, +and that can be critical for glue logic. + +Plus, this doesn't require any implementation framework, just an interface. +One platform might implement it as simple inline functions accessing chip +registers; another might implement it by delegating through abstractions +used for several very different kinds of GPIO controller. (There is some +optional code supporting such an implementation strategy, described later +in this document, but drivers acting as clients to the GPIO interface must +not care how it's implemented.) + +That said, if the convention is supported on their platform, drivers should +use it when possible. Platforms must select GPIOLIB if GPIO functionality +is strictly required. Drivers that can't work without +standard GPIO calls should have Kconfig entries which depend on GPIOLIB. The +GPIO calls are available, either as "real code" or as optimized-away stubs, +when drivers use the include file: + + #include <linux/gpio.h> + +If you stick to this convention then it'll be easier for other developers to +see what your code is doing, and help maintain it. + +Note that these operations include I/O barriers on platforms which need to +use them; drivers don't need to add them explicitly. + + +Identifying GPIOs +----------------- +GPIOs are identified by unsigned integers in the range 0..MAX_INT. That +reserves "negative" numbers for other purposes like marking signals as +"not available on this board", or indicating faults. Code that doesn't +touch the underlying hardware treats these integers as opaque cookies. + +Platforms define how they use those integers, and usually #define symbols +for the GPIO lines so that board-specific setup code directly corresponds +to the relevant schematics. In contrast, drivers should only use GPIO +numbers passed to them from that setup code, using platform_data to hold +board-specific pin configuration data (along with other board specific +data they need). That avoids portability problems. + +So for example one platform uses numbers 32-159 for GPIOs; while another +uses numbers 0..63 with one set of GPIO controllers, 64-79 with another +type of GPIO controller, and on one particular board 80-95 with an FPGA. +The numbers need not be contiguous; either of those platforms could also +use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. + +If you want to initialize a structure with an invalid GPIO number, use +some negative number (perhaps "-EINVAL"); that will never be valid. To +test if such number from such a structure could reference a GPIO, you +may use this predicate: + + int gpio_is_valid(int number); + +A number that's not valid will be rejected by calls which may request +or free GPIOs (see below). Other numbers may also be rejected; for +example, a number might be valid but temporarily unused on a given board. + +Whether a platform supports multiple GPIO controllers is a platform-specific +implementation issue, as are whether that support can leave "holes" in the space +of GPIO numbers, and whether new controllers can be added at runtime. Such issues +can affect things including whether adjacent GPIO numbers are both valid. + +Using GPIOs +----------- +The first thing a system should do with a GPIO is allocate it, using +the gpio_request() call; see later. + +One of the next things to do with a GPIO, often in board setup code when +setting up a platform_device using the GPIO, is mark its direction:: + + /* set as input or output, returning 0 or negative errno */ + int gpio_direction_input(unsigned gpio); + int gpio_direction_output(unsigned gpio, int value); + +The return value is zero for success, else a negative errno. It should +be checked, since the get/set calls don't have error returns and since +misconfiguration is possible. You should normally issue these calls from +a task context. However, for spinlock-safe GPIOs it's OK to use them +before tasking is enabled, as part of early board setup. + +For output GPIOs, the value provided becomes the initial output value. +This helps avoid signal glitching during system startup. + +For compatibility with legacy interfaces to GPIOs, setting the direction +of a GPIO implicitly requests that GPIO (see below) if it has not been +requested already. That compatibility is being removed from the optional +gpiolib framework. + +Setting the direction can fail if the GPIO number is invalid, or when +that particular GPIO can't be used in that mode. It's generally a bad +idea to rely on boot firmware to have set the direction correctly, since +it probably wasn't validated to do more than boot Linux. (Similarly, +that board setup code probably needs to multiplex that pin as a GPIO, +and configure pullups/pulldowns appropriately.) + + +Spinlock-Safe GPIO access +------------------------- +Most GPIO controllers can be accessed with memory read/write instructions. +Those don't need to sleep, and can safely be done from inside hard +(nonthreaded) IRQ handlers and similar contexts. + +Use the following calls to access such GPIOs:: + + /* GPIO INPUT: return zero or nonzero */ + int gpio_get_value(unsigned gpio); + + /* GPIO OUTPUT */ + void gpio_set_value(unsigned gpio, int value); + +The values are boolean, zero for low, nonzero for high. When reading the +value of an output pin, the value returned should be what's seen on the +pin ... that won't always match the specified output value, because of +issues including open-drain signaling and output latencies. + +The get/set calls have no error returns because "invalid GPIO" should have +been reported earlier from gpio_direction_*(). However, note that not all +platforms can read the value of output pins; those that can't should always +return zero. Also, using these calls for GPIOs that can't safely be accessed +without sleeping (see below) is an error. + +Platform-specific implementations are encouraged to optimize the two +calls to access the GPIO value in cases where the GPIO number (and for +output, value) are constant. It's normal for them to need only a couple +of instructions in such cases (reading or writing a hardware register), +and not to need spinlocks. Such optimized calls can make bitbanging +applications a lot more efficient (in both space and time) than spending +dozens of instructions on subroutine calls. + + +GPIO access that may sleep +-------------------------- +Some GPIO controllers must be accessed using message based busses like I2C +or SPI. Commands to read or write those GPIO values require waiting to +get to the head of a queue to transmit a command and get its response. +This requires sleeping, which can't be done from inside IRQ handlers. +To access such GPIOs, a different set of accessors is defined:: + + /* GPIO INPUT: return zero or nonzero, might sleep */ + int gpio_get_value_cansleep(unsigned gpio); + + /* GPIO OUTPUT, might sleep */ + void gpio_set_value_cansleep(unsigned gpio, int value); + +Accessing such GPIOs requires a context which may sleep, for example +a threaded IRQ handler, and those accessors must be used instead of +spinlock-safe accessors without the cansleep() name suffix. + +Other than the fact that these accessors might sleep, and will work +on GPIOs that can't be accessed from hardIRQ handlers, these calls act +the same as the spinlock-safe calls. + +**IN ADDITION** calls to setup and configure such GPIOs must be made +from contexts which may sleep, since they may need to access the GPIO +controller chip too (These setup calls are usually made from board +setup or driver probe/teardown code, so this is an easy constraint.):: + + gpio_direction_input() + gpio_direction_output() + gpio_request() + + ## gpio_request_one() + ## gpio_request_array() + ## gpio_free_array() + + gpio_free() + + +Claiming and Releasing GPIOs +---------------------------- +To help catch system configuration errors, two calls are defined:: + + /* request GPIO, returning 0 or negative errno. + * non-null labels may be useful for diagnostics. + */ + int gpio_request(unsigned gpio, const char *label); + + /* release previously-claimed GPIO */ + void gpio_free(unsigned gpio); + +Passing invalid GPIO numbers to gpio_request() will fail, as will requesting +GPIOs that have already been claimed with that call. The return value of +gpio_request() must be checked. You should normally issue these calls from +a task context. However, for spinlock-safe GPIOs it's OK to request GPIOs +before tasking is enabled, as part of early board setup. + +These calls serve two basic purposes. One is marking the signals which +are actually in use as GPIOs, for better diagnostics; systems may have +several hundred potential GPIOs, but often only a dozen are used on any +given board. Another is to catch conflicts, identifying errors when +(a) two or more drivers wrongly think they have exclusive use of that +signal, or (b) something wrongly believes it's safe to remove drivers +needed to manage a signal that's in active use. That is, requesting a +GPIO can serve as a kind of lock. + +Some platforms may also use knowledge about what GPIOs are active for +power management, such as by powering down unused chip sectors and, more +easily, gating off unused clocks. + +For GPIOs that use pins known to the pinctrl subsystem, that subsystem should +be informed of their use; a gpiolib driver's .request() operation may call +pinctrl_gpio_request(), and a gpiolib driver's .free() operation may call +pinctrl_gpio_free(). The pinctrl subsystem allows a pinctrl_gpio_request() +to succeed concurrently with a pin or pingroup being "owned" by a device for +pin multiplexing. + +Any programming of pin multiplexing hardware that is needed to route the +GPIO signal to the appropriate pin should occur within a GPIO driver's +.direction_input() or .direction_output() operations, and occur after any +setup of an output GPIO's value. This allows a glitch-free migration from a +pin's special function to GPIO. This is sometimes required when using a GPIO +to implement a workaround on signals typically driven by a non-GPIO HW block. + +Some platforms allow some or all GPIO signals to be routed to different pins. +Similarly, other aspects of the GPIO or pin may need to be configured, such as +pullup/pulldown. Platform software should arrange that any such details are +configured prior to gpio_request() being called for those GPIOs, e.g. using +the pinctrl subsystem's mapping table, so that GPIO users need not be aware +of these details. + +Also note that it's your responsibility to have stopped using a GPIO +before you free it. + +Considering in most cases GPIOs are actually configured right after they +are claimed, three additional calls are defined:: + + /* request a single GPIO, with initial configuration specified by + * 'flags', identical to gpio_request() wrt other arguments and + * return value + */ + int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); + + /* request multiple GPIOs in a single call + */ + int gpio_request_array(struct gpio *array, size_t num); + + /* release multiple GPIOs in a single call + */ + void gpio_free_array(struct gpio *array, size_t num); + +where 'flags' is currently defined to specify the following properties: + + * GPIOF_DIR_IN - to configure direction as input + * GPIOF_DIR_OUT - to configure direction as output + + * GPIOF_INIT_LOW - as output, set initial level to LOW + * GPIOF_INIT_HIGH - as output, set initial level to HIGH + +since GPIOF_INIT_* are only valid when configured as output, so group valid +combinations as: + + * GPIOF_IN - configure as input + * GPIOF_OUT_INIT_LOW - configured as output, initial level LOW + * GPIOF_OUT_INIT_HIGH - configured as output, initial level HIGH + +Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is +introduced to encapsulate all three fields as:: + + struct gpio { + unsigned gpio; + unsigned long flags; + const char *label; + }; + +A typical example of usage:: + + static struct gpio leds_gpios[] = { + { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */ + { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* default to OFF */ + { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* default to OFF */ + { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* default to OFF */ + { ... }, + }; + + err = gpio_request_one(31, GPIOF_IN, "Reset Button"); + if (err) + ... + + err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios)); + if (err) + ... + + gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios)); + + +GPIOs mapped to IRQs +-------------------- +GPIO numbers are unsigned integers; so are IRQ numbers. These make up +two logically distinct namespaces (GPIO 0 need not use IRQ 0). You can +map between them using calls like:: + + /* map GPIO numbers to IRQ numbers */ + int gpio_to_irq(unsigned gpio); + +Those return either the corresponding number in the other namespace, or +else a negative errno code if the mapping can't be done. (For example, +some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO +number that wasn't set up as an input using gpio_direction_input(), or +to use an IRQ number that didn't originally come from gpio_to_irq(). + +These two mapping calls are expected to cost on the order of a single +addition or subtraction. They're not allowed to sleep. + +Non-error values returned from gpio_to_irq() can be passed to request_irq() +or free_irq(). They will often be stored into IRQ resources for platform +devices, by the board-specific initialization code. Note that IRQ trigger +options are part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are +system wakeup capabilities. + + +Emulating Open Drain Signals +---------------------------- +Sometimes shared signals need to use "open drain" signaling, where only the +low signal level is actually driven. (That term applies to CMOS transistors; +"open collector" is used for TTL.) A pullup resistor causes the high signal +level. This is sometimes called a "wire-AND"; or more practically, from the +negative logic (low=true) perspective this is a "wire-OR". + +One common example of an open drain signal is a shared active-low IRQ line. +Also, bidirectional data bus signals sometimes use open drain signals. + +Some GPIO controllers directly support open drain outputs; many don't. When +you need open drain signaling but your hardware doesn't directly support it, +there's a common idiom you can use to emulate it with any GPIO pin that can +be used as either an input or an output: + + LOW: gpio_direction_output(gpio, 0) ... this drives the signal + and overrides the pullup. + + HIGH: gpio_direction_input(gpio) ... this turns off the output, + so the pullup (or some other device) controls the signal. + +If you are "driving" the signal high but gpio_get_value(gpio) reports a low +value (after the appropriate rise time passes), you know some other component +is driving the shared signal low. That's not necessarily an error. As one +common example, that's how I2C clocks are stretched: a slave that needs a +slower clock delays the rising edge of SCK, and the I2C master adjusts its +signaling rate accordingly. + + +GPIO controllers and the pinctrl subsystem +------------------------------------------ + +A GPIO controller on a SOC might be tightly coupled with the pinctrl +subsystem, in the sense that the pins can be used by other functions +together with an optional gpio feature. We have already covered the +case where e.g. a GPIO controller need to reserve a pin or set the +direction of a pin by calling any of:: + + pinctrl_gpio_request() + pinctrl_gpio_free() + pinctrl_gpio_direction_input() + pinctrl_gpio_direction_output() + +But how does the pin control subsystem cross-correlate the GPIO +numbers (which are a global business) to a certain pin on a certain +pin controller? + +This is done by registering "ranges" of pins, which are essentially +cross-reference tables. These are described in +Documentation/driver-api/pin-control.rst + +While the pin allocation is totally managed by the pinctrl subsystem, +gpio (under gpiolib) is still maintained by gpio drivers. It may happen +that different pin ranges in a SoC is managed by different gpio drivers. + +This makes it logical to let gpio drivers announce their pin ranges to +the pin ctrl subsystem before it will call 'pinctrl_gpio_request' in order +to request the corresponding pin to be prepared by the pinctrl subsystem +before any gpio usage. + +For this, the gpio controller can register its pin range with pinctrl +subsystem. There are two ways of doing it currently: with or without DT. + +For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt. + +For non-DT support, user can call gpiochip_add_pin_range() with appropriate +parameters to register a range of gpio pins with a pinctrl driver. For this +exact name string of pinctrl device has to be passed as one of the +argument to this routine. + + +What do these conventions omit? +=============================== +One of the biggest things these conventions omit is pin multiplexing, since +this is highly chip-specific and nonportable. One platform might not need +explicit multiplexing; another might have just two options for use of any +given pin; another might have eight options per pin; another might be able +to route a given GPIO to any one of several pins. (Yes, those examples all +come from systems that run Linux today.) + +Related to multiplexing is configuration and enabling of the pullups or +pulldowns integrated on some platforms. Not all platforms support them, +or support them in the same way; and any given board might use external +pullups (or pulldowns) so that the on-chip ones should not be used. +(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) +Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a +platform-specific issue, as are models like (not) having a one-to-one +correspondence between configurable pins and GPIOs. + +There are other system-specific mechanisms that are not specified here, +like the aforementioned options for input de-glitching and wire-OR output. +Hardware may support reading or writing GPIOs in gangs, but that's usually +configuration dependent: for GPIOs sharing the same bank. (GPIOs are +commonly grouped in banks of 16 or 32, with a given SOC having several such +banks.) Some systems can trigger IRQs from output GPIOs, or read values +from pins not managed as GPIOs. Code relying on such mechanisms will +necessarily be nonportable. + +Dynamic definition of GPIOs is not currently standard; for example, as +a side effect of configuring an add-on board with some GPIO expanders. + + +GPIO implementor's framework (OPTIONAL) +======================================= +As noted earlier, there is an optional implementation framework making it +easier for platforms to support different kinds of GPIO controller using +the same programming interface. This framework is called "gpiolib". + +As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file +will be found there. That will list all the controllers registered through +this framework, and the state of the GPIOs currently in use. + + +Controller Drivers: gpio_chip +----------------------------- +In this framework each GPIO controller is packaged as a "struct gpio_chip" +with information common to each controller of that type: + + - methods to establish GPIO direction + - methods used to access GPIO values + - flag saying whether calls to its methods may sleep + - optional debugfs dump method (showing extra state like pullup config) + - label for diagnostics + +There is also per-instance data, which may come from device.platform_data: +the number of its first GPIO, and how many GPIOs it exposes. + +The code implementing a gpio_chip should support multiple instances of the +controller, possibly using the driver model. That code will configure each +gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be +rare; use gpiochip_remove() when it is unavoidable. + +Most often a gpio_chip is part of an instance-specific structure with state +not exposed by the GPIO interfaces, such as addressing, power management, +and more. Chips such as codecs will have complex non-GPIO state. + +Any debugfs dump method should normally ignore signals which haven't been +requested as GPIOs. They can use gpiochip_is_requested(), which returns +either NULL or the label associated with that GPIO when it was requested. + + +Platform Support +---------------- +To force-enable this framework, a platform's Kconfig will "select" GPIOLIB, +else it is up to the user to configure support for GPIO. + +If neither of these options are selected, the platform does not support +GPIOs through GPIO-lib and the code cannot be enabled by the user. + +Trivial implementations of those functions can directly use framework +code, which always dispatches through the gpio_chip:: + + #define gpio_get_value __gpio_get_value + #define gpio_set_value __gpio_set_value + +Fancier implementations could instead define those as inline functions with +logic optimizing access to specific SOC-based GPIOs. For example, if the +referenced GPIO is the constant "12", getting or setting its value could +cost as little as two or three instructions, never sleeping. When such an +optimization is not possible those calls must delegate to the framework +code, costing at least a few dozen instructions. For bitbanged I/O, such +instruction savings can be significant. + +For SOCs, platform-specific code defines and registers gpio_chip instances +for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to +match chip vendor documentation, and directly match board schematics. They +may well start at zero and go up to a platform-specific limit. Such GPIOs +are normally integrated into platform initialization to make them always be +available, from arch_initcall() or earlier; they can often serve as IRQs. + + +Board Support +------------- +For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi +function devices, FPGAs or CPLDs -- most often board-specific code handles +registering controller devices and ensures that their drivers know what GPIO +numbers to use with gpiochip_add(). Their numbers often start right after +platform-specific GPIOs. + +For example, board setup code could create structures identifying the range +of GPIOs that chip will expose, and passes them to each GPIO expander chip +using platform_data. Then the chip driver's probe() routine could pass that +data to gpiochip_add(). + +Initialization order can be important. For example, when a device relies on +an I2C-based GPIO, its probe() routine should only be called after that GPIO +becomes available. That may mean the device should not be registered until +calls for that GPIO can work. One way to address such dependencies is for +such gpio_chip controllers to provide setup() and teardown() callbacks to +board specific code; those board specific callbacks would register devices +once all the necessary resources are available, and remove them later when +the GPIO controller device becomes unavailable. + + +Sysfs Interface for Userspace (OPTIONAL) +======================================== +Platforms which use the "gpiolib" implementors framework may choose to +configure a sysfs user interface to GPIOs. This is different from the +debugfs interface, since it provides control over GPIO direction and +value instead of just showing a gpio state summary. Plus, it could be +present on production systems without debugging support. + +Given appropriate hardware documentation for the system, userspace could +know for example that GPIO #23 controls the write protect line used to +protect boot loader segments in flash memory. System upgrade procedures +may need to temporarily remove that protection, first importing a GPIO, +then changing its output state, then updating the code before re-enabling +the write protection. In normal use, GPIO #23 would never be touched, +and the kernel would have no need to know about it. + +Again depending on appropriate hardware documentation, on some systems +userspace GPIO can be used to determine system configuration data that +standard kernels won't know about. And for some tasks, simple userspace +GPIO drivers could be all that the system really needs. + +Note that standard kernel drivers exist for common "LEDs and Buttons" +GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those +instead of talking directly to the GPIOs; they integrate with kernel +frameworks better than your userspace code could. + + +Paths in Sysfs +-------------- +There are three kinds of entry in /sys/class/gpio: + + - Control interfaces used to get userspace control over GPIOs; + + - GPIOs themselves; and + + - GPIO controllers ("gpio_chip" instances). + +That's in addition to standard files including the "device" symlink. + +The control interfaces are write-only: + + /sys/class/gpio/ + + "export" ... Userspace may ask the kernel to export control of + a GPIO to userspace by writing its number to this file. + + Example: "echo 19 > export" will create a "gpio19" node + for GPIO #19, if that's not requested by kernel code. + + "unexport" ... Reverses the effect of exporting to userspace. + + Example: "echo 19 > unexport" will remove a "gpio19" + node exported using the "export" file. + +GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42) +and have the following read/write attributes: + + /sys/class/gpio/gpioN/ + + "direction" ... reads as either "in" or "out". This value may + normally be written. Writing as "out" defaults to + initializing the value as low. To ensure glitch free + operation, values "low" and "high" may be written to + configure the GPIO as an output with that initial value. + + Note that this attribute *will not exist* if the kernel + doesn't support changing the direction of a GPIO, or + it was exported by kernel code that didn't explicitly + allow userspace to reconfigure this GPIO's direction. + + "value" ... reads as either 0 (low) or 1 (high). If the GPIO + is configured as an output, this value may be written; + any nonzero value is treated as high. + + If the pin can be configured as interrupt-generating interrupt + and if it has been configured to generate interrupts (see the + description of "edge"), you can poll(2) on that file and + poll(2) will return whenever the interrupt was triggered. If + you use poll(2), set the events POLLPRI. If you use select(2), + set the file descriptor in exceptfds. After poll(2) returns, + either lseek(2) to the beginning of the sysfs file and read the + new value or close the file and re-open it to read the value. + + "edge" ... reads as either "none", "rising", "falling", or + "both". Write these strings to select the signal edge(s) + that will make poll(2) on the "value" file return. + + This file exists only if the pin can be configured as an + interrupt generating input pin. + + "active_low" ... reads as either 0 (false) or 1 (true). Write + any nonzero value to invert the value attribute both + for reading and writing. Existing and subsequent + poll(2) support configuration via the edge attribute + for "rising" and "falling" edges will follow this + setting. + +GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the +controller implementing GPIOs starting at #42) and have the following +read-only attributes: + + /sys/class/gpio/gpiochipN/ + + "base" ... same as N, the first GPIO managed by this chip + + "label" ... provided for diagnostics (not always unique) + + "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1) + +Board documentation should in most cases cover what GPIOs are used for +what purposes. However, those numbers are not always stable; GPIOs on +a daughtercard might be different depending on the base board being used, +or other cards in the stack. In such cases, you may need to use the +gpiochip nodes (possibly in conjunction with schematics) to determine +the correct GPIO number to use for a given signal. + + +API Reference +============= + +The functions listed in this section are deprecated. The GPIO descriptor based +API should be used in new code. + +.. kernel-doc:: drivers/gpio/gpiolib-legacy.c + :export: diff --git a/Documentation/driver-api/gpio/using-gpio.rst b/Documentation/driver-api/gpio/using-gpio.rst new file mode 100644 index 0000000000..894d88855d --- /dev/null +++ b/Documentation/driver-api/gpio/using-gpio.rst @@ -0,0 +1,50 @@ +========================= +Using GPIO Lines in Linux +========================= + +The Linux kernel exists to abstract and present hardware to users. GPIO lines +as such are normally not user facing abstractions. The most obvious, natural +and preferred way to use GPIO lines is to let kernel hardware drivers deal +with them. + +For examples of already existing generic drivers that will also be good +examples for any other kernel drivers you want to author, refer to +Documentation/driver-api/gpio/drivers-on-gpio.rst + +For any kind of mass produced system you want to support, such as servers, +laptops, phones, tablets, routers, and any consumer or office or business goods +using appropriate kernel drivers is paramount. Submit your code for inclusion +in the upstream Linux kernel when you feel it is mature enough and you will get +help to refine it, see Documentation/process/submitting-patches.rst. + +In Linux GPIO lines also have a userspace ABI. + +The userspace ABI is intended for one-off deployments. Examples are prototypes, +factory lines, maker community projects, workshop specimen, production tools, +industrial automation, PLC-type use cases, door controllers, in short a piece +of specialized equipment that is not produced by the numbers, requiring +operators to have a deep knowledge of the equipment and knows about the +software-hardware interface to be set up. They should not have a natural fit +to any existing kernel subsystem and not be a good fit for an operating system, +because of not being reusable or abstract enough, or involving a lot of non +computer hardware related policy. + +Applications that have a good reason to use the industrial I/O (IIO) subsystem +from userspace will likely be a good fit for using GPIO lines from userspace as +well. + +Do not under any circumstances abuse the GPIO userspace ABI to cut corners in +any product development projects. If you use it for prototyping, then do not +productify the prototype: rewrite it using proper kernel drivers. Do not under +any circumstances deploy any uniform products using GPIO from userspace. + +The userspace ABI is a character device for each GPIO hardware unit (GPIO chip). +These devices will appear on the system as ``/dev/gpiochip0`` thru +``/dev/gpiochipN``. Examples of how to directly use the userspace ABI can be +found in the kernel tree ``tools/gpio`` subdirectory. + +For structured and managed applications, we recommend that you make use of the +libgpiod_ library. This provides helper abstractions, command line utilities +and arbitration for multiple simultaneous consumers on the same GPIO chip. + +.. _libgpiod: https://git.kernel.org/pub/scm/libs/libgpiod/libgpiod.git/ diff --git a/Documentation/driver-api/hsi.rst b/Documentation/driver-api/hsi.rst new file mode 100644 index 0000000000..01b6bebfbd --- /dev/null +++ b/Documentation/driver-api/hsi.rst @@ -0,0 +1,88 @@ +High Speed Synchronous Serial Interface (HSI) +============================================= + +Introduction +--------------- + +High Speed Synchronous Interface (HSI) is a full duplex, low latency protocol, +that is optimized for die-level interconnect between an Application Processor +and a Baseband chipset. It has been specified by the MIPI alliance in 2003 and +implemented by multiple vendors since then. + +The HSI interface supports full duplex communication over multiple channels +(typically 8) and is capable of reaching speeds up to 200 Mbit/s. + +The serial protocol uses two signals, DATA and FLAG as combined data and clock +signals and an additional READY signal for flow control. An additional WAKE +signal can be used to wakeup the chips from standby modes. The signals are +commonly prefixed by AC for signals going from the application die to the +cellular die and CA for signals going the other way around. + +:: + + +------------+ +---------------+ + | Cellular | | Application | + | Die | | Die | + | | - - - - - - CAWAKE - - - - - - >| | + | T|------------ CADATA ------------>|R | + | X|------------ CAFLAG ------------>|X | + | |<----------- ACREADY ------------| | + | | | | + | | | | + | |< - - - - - ACWAKE - - - - - - -| | + | R|<----------- ACDATA -------------|T | + | X|<----------- ACFLAG -------------|X | + | |------------ CAREADY ----------->| | + | | | | + | | | | + +------------+ +---------------+ + +HSI Subsystem in Linux +------------------------- + +In the Linux kernel the hsi subsystem is supposed to be used for HSI devices. +The hsi subsystem contains drivers for hsi controllers including support for +multi-port controllers and provides a generic API for using the HSI ports. + +It also contains HSI client drivers, which make use of the generic API to +implement a protocol used on the HSI interface. These client drivers can +use an arbitrary number of channels. + +hsi-char Device +------------------ + +Each port automatically registers a generic client driver called hsi_char, +which provides a character device for userspace representing the HSI port. +It can be used to communicate via HSI from userspace. Userspace may +configure the hsi_char device using the following ioctl commands: + +HSC_RESET + flush the HSI port + +HSC_SET_PM + enable or disable the client. + +HSC_SEND_BREAK + send break + +HSC_SET_RX + set RX configuration + +HSC_GET_RX + get RX configuration + +HSC_SET_TX + set TX configuration + +HSC_GET_TX + get TX configuration + +The kernel HSI API +------------------ + +.. kernel-doc:: include/linux/hsi/hsi.h + :internal: + +.. kernel-doc:: drivers/hsi/hsi_core.c + :export: + diff --git a/Documentation/driver-api/hte/hte.rst b/Documentation/driver-api/hte/hte.rst new file mode 100644 index 0000000000..153f3233c1 --- /dev/null +++ b/Documentation/driver-api/hte/hte.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +============================================ +The Linux Hardware Timestamping Engine (HTE) +============================================ + +:Author: Dipen Patel + +Introduction +------------ + +Certain devices have built in hardware timestamping engines which can +monitor sets of system signals, lines, buses etc... in realtime for state +change; upon detecting the change they can automatically store the timestamp at +the moment of occurrence. Such functionality may help achieve better accuracy +in obtaining timestamps than using software counterparts i.e. ktime and +friends. + +This document describes the API that can be used by hardware timestamping +engine provider and consumer drivers that want to use the hardware timestamping +engine (HTE) framework. Both consumers and providers must include +``#include <linux/hte.h>``. + +The HTE framework APIs for the providers +---------------------------------------- + +.. kernel-doc:: drivers/hte/hte.c + :functions: devm_hte_register_chip hte_push_ts_ns + +The HTE framework APIs for the consumers +---------------------------------------- + +.. kernel-doc:: drivers/hte/hte.c + :functions: hte_init_line_attr hte_ts_get hte_ts_put devm_hte_request_ts_ns hte_request_ts_ns hte_enable_ts hte_disable_ts of_hte_req_count hte_get_clk_src_info + +The HTE framework public structures +----------------------------------- +.. kernel-doc:: include/linux/hte.h + +More on the HTE timestamp data +------------------------------ +The ``struct hte_ts_data`` is used to pass timestamp details between the +consumers and the providers. It expresses timestamp data in nanoseconds in +u64. An example of the typical timestamp data life cycle, for the GPIO line is +as follows:: + + - Monitors GPIO line change. + - Detects the state change on GPIO line. + - Converts timestamps in nanoseconds. + - Stores GPIO raw level in raw_level variable if the provider has that + hardware capability. + - Pushes this hte_ts_data object to HTE subsystem. + - HTE subsystem increments seq counter and invokes consumer provided callback. + Based on callback return value, the HTE core invokes secondary callback in + the thread context. + +HTE subsystem debugfs attributes +-------------------------------- +HTE subsystem creates debugfs attributes at ``/sys/kernel/debug/hte/``. +It also creates line/signal-related debugfs attributes at +``/sys/kernel/debug/hte/<provider>/<label or line id>/``. Note that these +attributes are read-only. + +`ts_requested` + The total number of entities requested from the given provider, + where entity is specified by the provider and could represent + lines, GPIO, chip signals, buses etc... + The attribute will be available at + ``/sys/kernel/debug/hte/<provider>/``. + +`total_ts` + The total number of entities supported by the provider. + The attribute will be available at + ``/sys/kernel/debug/hte/<provider>/``. + +`dropped_timestamps` + The dropped timestamps for a given line. + The attribute will be available at + ``/sys/kernel/debug/hte/<provider>/<label or line id>/``. diff --git a/Documentation/driver-api/hte/index.rst b/Documentation/driver-api/hte/index.rst new file mode 100644 index 0000000000..29011de9a4 --- /dev/null +++ b/Documentation/driver-api/hte/index.rst @@ -0,0 +1,22 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================ +The Linux Hardware Timestamping Engine (HTE) +============================================ + +The HTE Subsystem +================= + +.. toctree:: + :maxdepth: 1 + + hte + +HTE Tegra Provider +================== + +.. toctree:: + :maxdepth: 1 + + tegra-hte + diff --git a/Documentation/driver-api/hte/tegra-hte.rst b/Documentation/driver-api/hte/tegra-hte.rst new file mode 100644 index 0000000000..85e6547727 --- /dev/null +++ b/Documentation/driver-api/hte/tegra-hte.rst @@ -0,0 +1,47 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +HTE Kernel provider driver +========================== + +Description +----------- +The Nvidia tegra HTE provider also known as GTE (Generic Timestamping Engine) +driver implements two GTE instances: 1) GPIO GTE and 2) LIC +(Legacy Interrupt Controller) IRQ GTE. Both GTE instances get the timestamp +from the system counter TSC which has 31.25MHz clock rate, and the driver +converts clock tick rate to nanoseconds before storing it as timestamp value. + +GPIO GTE +-------- + +This GTE instance timestamps GPIO in real time. For that to happen GPIO +needs to be configured as input. Only the always on (AON) GPIO controller +instance supports timestamping GPIOs in real time as it is tightly coupled with +the GPIO GTE. To support this, GPIOLIB adds two optional APIs as mentioned +below. The GPIO GTE code supports both kernel and userspace consumers. The +kernel space consumers can directly talk to HTE subsystem while userspace +consumers timestamp requests go through GPIOLIB CDEV framework to HTE +subsystem. The hte devicetree binding described at +``Documentation/devicetree/bindings/timestamp`` provides an example of how a +consumer can request an GPIO line. + +See gpiod_enable_hw_timestamp_ns() and gpiod_disable_hw_timestamp_ns(). + +For userspace consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE flag must be +specified during IOCTL calls. Refer to ``tools/gpio/gpio-event-mon.c``, which +returns the timestamp in nanoseconds. + +LIC (Legacy Interrupt Controller) IRQ GTE +----------------------------------------- + +This GTE instance timestamps LIC IRQ lines in real time. The hte devicetree +binding described at ``Documentation/devicetree/bindings/timestamp`` +provides an example of how a consumer can request an IRQ line. Since it is a +one-to-one mapping with IRQ GTE provider, consumers can simply specify the IRQ +number that they are interested in. There is no userspace consumer support for +this GTE instance in the HTE framework. + +The provider source code of both IRQ and GPIO GTE instances is located at +``drivers/hte/hte-tegra194.c``. The test driver +``drivers/hte/hte-tegra194-test.c`` demonstrates HTE API usage for both IRQ +and GPIO GTE. diff --git a/Documentation/driver-api/i2c.rst b/Documentation/driver-api/i2c.rst new file mode 100644 index 0000000000..7582c079d7 --- /dev/null +++ b/Documentation/driver-api/i2c.rst @@ -0,0 +1,48 @@ +I\ :sup:`2`\ C and SMBus Subsystem +================================== + +I\ :sup:`2`\ C (or without fancy typography, "I2C") is an acronym for +the "Inter-IC" bus, a simple bus protocol which is widely used where low +data rate communications suffice. Since it's also a licensed trademark, +some vendors use another name (such as "Two-Wire Interface", TWI) for +the same bus. I2C only needs two signals (SCL for clock, SDA for data), +conserving board real estate and minimizing signal quality issues. Most +I2C devices use seven bit addresses, and bus speeds of up to 400 kHz; +there's a high speed extension (3.4 MHz) that's not yet found wide use. +I2C is a multi-master bus; open drain signaling is used to arbitrate +between masters, as well as to handshake and to synchronize clocks from +slower clients. + +The Linux I2C programming interfaces support the master side of bus +interactions and the slave side. The programming interface is +structured around two kinds of driver, and two kinds of device. An I2C +"Adapter Driver" abstracts the controller hardware; it binds to a +physical device (perhaps a PCI device or platform_device) and exposes a +:c:type:`struct i2c_adapter <i2c_adapter>` representing each +I2C bus segment it manages. On each I2C bus segment will be I2C devices +represented by a :c:type:`struct i2c_client <i2c_client>`. +Those devices will be bound to a :c:type:`struct i2c_driver +<i2c_driver>`, which should follow the standard Linux driver model. There +are functions to perform various I2C protocol operations; at this writing +all such functions are usable only from task context. + +The System Management Bus (SMBus) is a sibling protocol. Most SMBus +systems are also I2C conformant. The electrical constraints are tighter +for SMBus, and it standardizes particular protocol messages and idioms. +Controllers that support I2C can also support most SMBus operations, but +SMBus controllers don't support all the protocol options that an I2C +controller will. There are functions to perform various SMBus protocol +operations, either using I2C primitives or by issuing SMBus commands to +i2c_adapter devices which don't support those I2C operations. + +.. kernel-doc:: include/linux/i2c.h + :internal: + +.. kernel-doc:: drivers/i2c/i2c-boardinfo.c + :functions: i2c_register_board_info + +.. kernel-doc:: drivers/i2c/i2c-core-base.c + :export: + +.. kernel-doc:: drivers/i2c/i2c-core-smbus.c + :export: diff --git a/Documentation/driver-api/i3c/device-driver-api.rst b/Documentation/driver-api/i3c/device-driver-api.rst new file mode 100644 index 0000000000..85bc3381cd --- /dev/null +++ b/Documentation/driver-api/i3c/device-driver-api.rst @@ -0,0 +1,9 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +I3C device driver API +===================== + +.. kernel-doc:: include/linux/i3c/device.h + +.. kernel-doc:: drivers/i3c/device.c diff --git a/Documentation/driver-api/i3c/index.rst b/Documentation/driver-api/i3c/index.rst new file mode 100644 index 0000000000..783d6dad05 --- /dev/null +++ b/Documentation/driver-api/i3c/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +I3C subsystem +============= + +.. toctree:: + + protocol + device-driver-api + master-driver-api diff --git a/Documentation/driver-api/i3c/master-driver-api.rst b/Documentation/driver-api/i3c/master-driver-api.rst new file mode 100644 index 0000000000..332552b283 --- /dev/null +++ b/Documentation/driver-api/i3c/master-driver-api.rst @@ -0,0 +1,9 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +I3C master controller driver API +================================ + +.. kernel-doc:: drivers/i3c/master.c + +.. kernel-doc:: include/linux/i3c/master.h diff --git a/Documentation/driver-api/i3c/protocol.rst b/Documentation/driver-api/i3c/protocol.rst new file mode 100644 index 0000000000..02653defa0 --- /dev/null +++ b/Documentation/driver-api/i3c/protocol.rst @@ -0,0 +1,203 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============ +I3C protocol +============ + +Disclaimer +========== + +This chapter will focus on aspects that matter to software developers. For +everything hardware related (like how things are transmitted on the bus, how +collisions are prevented, ...) please have a look at the I3C specification. + +This document is just a brief introduction to the I3C protocol and the concepts +it brings to the table. If you need more information, please refer to the MIPI +I3C specification (can be downloaded here +https://resources.mipi.org/mipi-i3c-v1-download). + +Introduction +============ + +The I3C (pronounced 'eye-three-see') is a MIPI standardized protocol designed +to overcome I2C limitations (limited speed, external signals needed for +interrupts, no automatic detection of the devices connected to the bus, ...) +while remaining power-efficient. + +I3C Bus +======= + +An I3C bus is made of several I3C devices and possibly some I2C devices as +well, but let's focus on I3C devices for now. + +An I3C device on the I3C bus can have one of the following roles: + +* Master: the device is driving the bus. It's the one in charge of initiating + transactions or deciding who is allowed to talk on the bus (slave generated + events are possible in I3C, see below). +* Slave: the device acts as a slave, and is not able to send frames to another + slave on the bus. The device can still send events to the master on + its own initiative if the master allowed it. + +I3C is a multi-master protocol, so there might be several masters on a bus, +though only one device can act as a master at a given time. In order to gain +bus ownership, a master has to follow a specific procedure. + +Each device on the I3C bus has to be assigned a dynamic address to be able to +communicate. Until this is done, the device should only respond to a limited +set of commands. If it has a static address (also called legacy I2C address), +the device can reply to I2C transfers. + +In addition to these per-device addresses, the protocol defines a broadcast +address in order to address all devices on the bus. + +Once a dynamic address has been assigned to a device, this address will be used +for any direct communication with the device. Note that even after being +assigned a dynamic address, the device should still process broadcast messages. + +I3C Device discovery +==================== + +The I3C protocol defines a mechanism to automatically discover devices present +on the bus, their capabilities and the functionalities they provide. In this +regard I3C is closer to a discoverable bus like USB than it is to I2C or SPI. + +The discovery mechanism is called DAA (Dynamic Address Assignment), because it +not only discovers devices but also assigns them a dynamic address. + +During DAA, each I3C device reports 3 important things: + +* BCR: Bus Characteristic Register. This 8-bit register describes the device bus + related capabilities +* DCR: Device Characteristic Register. This 8-bit register describes the + functionalities provided by the device +* Provisional ID: A 48-bit unique identifier. On a given bus there should be no + Provisional ID collision, otherwise the discovery mechanism may fail. + +I3C slave events +================ + +The I3C protocol allows slaves to generate events on their own, and thus allows +them to take temporary control of the bus. + +This mechanism is called IBI for In Band Interrupts, and as stated in the name, +it allows devices to generate interrupts without requiring an external signal. + +During DAA, each device on the bus has been assigned an address, and this +address will serve as a priority identifier to determine who wins if 2 different +devices are generating an interrupt at the same moment on the bus (the lower the +dynamic address the higher the priority). + +Masters are allowed to inhibit interrupts if they want to. This inhibition +request can be broadcast (applies to all devices) or sent to a specific +device. + +I3C Hot-Join +============ + +The Hot-Join mechanism is similar to USB hotplug. This mechanism allows +slaves to join the bus after it has been initialized by the master. + +This covers the following use cases: + +* the device is not powered when the bus is probed +* the device is hotplugged on the bus through an extension board + +This mechanism is relying on slave events to inform the master that a new +device joined the bus and is waiting for a dynamic address. + +The master is then free to address the request as it wishes: ignore it or +assign a dynamic address to the slave. + +I3C transfer types +================== + +If you omit SMBus (which is just a standardization on how to access registers +exposed by I2C devices), I2C has only one transfer type. + +I3C defines 3 different classes of transfer in addition to I2C transfers which +are here for backward compatibility with I2C devices. + +I3C CCC commands +---------------- + +CCC (Common Command Code) commands are meant to be used for anything that is +related to bus management and all features that are common to a set of devices. + +CCC commands contain an 8-bit CCC ID describing the command that is executed. +The MSB of this ID specifies whether this is a broadcast command (bit7 = 0) or a +unicast one (bit7 = 1). + +The command ID can be followed by a payload. Depending on the command, this +payload is either sent by the master sending the command (write CCC command), +or sent by the slave receiving the command (read CCC command). Of course, read +accesses only apply to unicast commands. +Note that, when sending a CCC command to a specific device, the device address +is passed in the first byte of the payload. + +The payload length is not explicitly passed on the bus, and should be extracted +from the CCC ID. + +Note that vendors can use a dedicated range of CCC IDs for their own commands +(0x61-0x7f and 0xe0-0xef). + +I3C Private SDR transfers +------------------------- + +Private SDR (Single Data Rate) transfers should be used for anything that is +device specific and does not require high transfer speed. + +It is the equivalent of I2C transfers but in the I3C world. Each transfer is +passed the device address (dynamic address assigned during DAA), a payload +and a direction. + +The only difference with I2C is that the transfer is much faster (typical clock +frequency is 12.5MHz). + +I3C HDR commands +---------------- + +HDR commands should be used for anything that is device specific and requires +high transfer speed. + +The first thing attached to an HDR command is the HDR mode. There are currently +3 different modes defined by the I3C specification (refer to the specification +for more details): + +* HDR-DDR: Double Data Rate mode +* HDR-TSP: Ternary Symbol Pure. Only usable on busses with no I2C devices +* HDR-TSL: Ternary Symbol Legacy. Usable on busses with I2C devices + +When sending an HDR command, the whole bus has to enter HDR mode, which is done +using a broadcast CCC command. +Once the bus has entered a specific HDR mode, the master sends the HDR command. +An HDR command is made of: + +* one 16-bits command word in big endian +* N 16-bits data words in big endian + +Those words may be wrapped with specific preambles/post-ambles which depend on +the chosen HDR mode and are detailed here (see the specification for more +details). + +The 16-bits command word is made of: + +* bit[15]: direction bit, read is 1, write is 0 +* bit[14:8]: command code. Identifies the command being executed, the amount of + data words and their meaning +* bit[7:1]: I3C address of the device this command is addressed to +* bit[0]: reserved/parity-bit + +Backward compatibility with I2C devices +======================================= + +The I3C protocol has been designed to be backward compatible with I2C devices. +This backward compatibility allows one to connect a mix of I2C and I3C devices +on the same bus, though, in order to be really efficient, I2C devices should +be equipped with 50 ns spike filters. + +I2C devices can't be discovered like I3C ones and have to be statically +declared. In order to let the master know what these devices are capable of +(both in terms of bus related limitations and functionalities), the software +has to provide some information, which is done through the LVR (Legacy I2C +Virtual Register). diff --git a/Documentation/driver-api/iio/buffers.rst b/Documentation/driver-api/iio/buffers.rst new file mode 100644 index 0000000000..e83026aebe --- /dev/null +++ b/Documentation/driver-api/iio/buffers.rst @@ -0,0 +1,126 @@ +======= +Buffers +======= + +* struct iio_buffer — general buffer structure +* :c:func:`iio_validate_scan_mask_onehot` — Validates that exactly one channel + is selected +* :c:func:`iio_buffer_get` — Grab a reference to the buffer +* :c:func:`iio_buffer_put` — Release the reference to the buffer + +The Industrial I/O core offers a way for continuous data capture based on a +trigger source. Multiple data channels can be read at once from +:file:`/dev/iio:device{X}` character device node, thus reducing the CPU load. + +IIO buffer sysfs interface +========================== +An IIO buffer has an associated attributes directory under +:file:`/sys/bus/iio/iio:device{X}/buffer/*`. Here are some of the existing +attributes: + +* :file:`length`, the total number of data samples (capacity) that can be + stored by the buffer. +* :file:`enable`, activate buffer capture. + +IIO buffer setup +================ + +The meta information associated with a channel reading placed in a buffer is +called a scan element. The important bits configuring scan elements are +exposed to userspace applications via the +:file:`/sys/bus/iio/iio:device{X}/scan_elements/` directory. This directory contains +attributes of the following form: + +* :file:`enable`, used for enabling a channel. If and only if its attribute + is non *zero*, then a triggered capture will contain data samples for this + channel. +* :file:`index`, the scan_index of the channel. +* :file:`type`, description of the scan element data storage within the buffer + and hence the form in which it is read from user space. + Format is [be|le]:[s|u]bits/storagebits[Xrepeat][>>shift] . + + * *be* or *le*, specifies big or little endian. + * *s* or *u*, specifies if signed (2's complement) or unsigned. + * *bits*, is the number of valid data bits. + * *storagebits*, is the number of bits (after padding) that it occupies in the + buffer. + * *repeat*, specifies the number of bits/storagebits repetitions. When the + repeat element is 0 or 1, then the repeat value is omitted. + * *shift*, if specified, is the shift that needs to be applied prior to + masking out unused bits. + +For example, a driver for a 3-axis accelerometer with 12 bit resolution where +data is stored in two 8-bits registers as follows:: + + 7 6 5 4 3 2 1 0 + +---+---+---+---+---+---+---+---+ + |D3 |D2 |D1 |D0 | X | X | X | X | (LOW byte, address 0x06) + +---+---+---+---+---+---+---+---+ + + 7 6 5 4 3 2 1 0 + +---+---+---+---+---+---+---+---+ + |D11|D10|D9 |D8 |D7 |D6 |D5 |D4 | (HIGH byte, address 0x07) + +---+---+---+---+---+---+---+---+ + +will have the following scan element type for each axis:: + + $ cat /sys/bus/iio/devices/iio:device0/scan_elements/in_accel_y_type + le:s12/16>>4 + +A user space application will interpret data samples read from the buffer as +two byte little endian signed data, that needs a 4 bits right shift before +masking out the 12 valid bits of data. + +For implementing buffer support a driver should initialize the following +fields in iio_chan_spec definition:: + + struct iio_chan_spec { + /* other members */ + int scan_index + struct { + char sign; + u8 realbits; + u8 storagebits; + u8 shift; + u8 repeat; + enum iio_endian endianness; + } scan_type; + }; + +The driver implementing the accelerometer described above will have the +following channel definition:: + + struct iio_chan_spec accel_channels[] = { + { + .type = IIO_ACCEL, + .modified = 1, + .channel2 = IIO_MOD_X, + /* other stuff here */ + .scan_index = 0, + .scan_type = { + .sign = 's', + .realbits = 12, + .storagebits = 16, + .shift = 4, + .endianness = IIO_LE, + }, + } + /* similar for Y (with channel2 = IIO_MOD_Y, scan_index = 1) + * and Z (with channel2 = IIO_MOD_Z, scan_index = 2) axis + */ + } + +Here **scan_index** defines the order in which the enabled channels are placed +inside the buffer. Channels with a lower **scan_index** will be placed before +channels with a higher index. Each channel needs to have a unique +**scan_index**. + +Setting **scan_index** to -1 can be used to indicate that the specific channel +does not support buffered capture. In this case no entries will be created for +the channel in the scan_elements directory. + +More details +============ +.. kernel-doc:: include/linux/iio/buffer.h +.. kernel-doc:: drivers/iio/industrialio-buffer.c + :export: diff --git a/Documentation/driver-api/iio/core.rst b/Documentation/driver-api/iio/core.rst new file mode 100644 index 0000000000..715cf29482 --- /dev/null +++ b/Documentation/driver-api/iio/core.rst @@ -0,0 +1,182 @@ +============= +Core elements +============= + +The Industrial I/O core offers both a unified framework for writing drivers for +many different types of embedded sensors and a standard interface to user space +applications manipulating sensors. The implementation can be found under +:file:`drivers/iio/industrialio-*` + +Industrial I/O Devices +---------------------- + +* struct iio_dev - industrial I/O device +* iio_device_alloc() - allocate an :c:type:`iio_dev` from a driver +* iio_device_free() - free an :c:type:`iio_dev` from a driver +* iio_device_register() - register a device with the IIO subsystem +* iio_device_unregister() - unregister a device from the IIO + subsystem + +An IIO device usually corresponds to a single hardware sensor and it +provides all the information needed by a driver handling a device. +Let's first have a look at the functionality embedded in an IIO device +then we will show how a device driver makes use of an IIO device. + +There are two ways for a user space application to interact with an IIO driver. + +1. :file:`/sys/bus/iio/iio:device{X}/`, this represents a hardware sensor + and groups together the data channels of the same chip. +2. :file:`/dev/iio:device{X}`, character device node interface used for + buffered data transfer and for events information retrieval. + +A typical IIO driver will register itself as an :doc:`I2C <../i2c>` or +:doc:`SPI <../spi>` driver and will create two routines, probe and remove. + +At probe: + +1. Call iio_device_alloc(), which allocates memory for an IIO device. +2. Initialize IIO device fields with driver specific information (e.g. + device name, device channels). +3. Call iio_device_register(), this registers the device with the + IIO core. After this call the device is ready to accept requests from user + space applications. + +At remove, we free the resources allocated in probe in reverse order: + +1. iio_device_unregister(), unregister the device from the IIO core. +2. iio_device_free(), free the memory allocated for the IIO device. + +IIO device sysfs interface +========================== + +Attributes are sysfs files used to expose chip info and also allowing +applications to set various configuration parameters. For device with +index X, attributes can be found under /sys/bus/iio/iio:deviceX/ directory. +Common attributes are: + +* :file:`name`, description of the physical chip. +* :file:`dev`, shows the major:minor pair associated with + :file:`/dev/iio:deviceX` node. +* :file:`sampling_frequency_available`, available discrete set of sampling + frequency values for device. +* Available standard attributes for IIO devices are described in the + :file:`Documentation/ABI/testing/sysfs-bus-iio` file in the Linux kernel + sources. + +IIO device channels +=================== + +struct iio_chan_spec - specification of a single channel + +An IIO device channel is a representation of a data channel. An IIO device can +have one or multiple channels. For example: + +* a thermometer sensor has one channel representing the temperature measurement. +* a light sensor with two channels indicating the measurements in the visible + and infrared spectrum. +* an accelerometer can have up to 3 channels representing acceleration on X, Y + and Z axes. + +An IIO channel is described by the struct iio_chan_spec. +A thermometer driver for the temperature sensor in the example above would +have to describe its channel as follows:: + + static const struct iio_chan_spec temp_channel[] = { + { + .type = IIO_TEMP, + .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), + }, + }; + +Channel sysfs attributes exposed to userspace are specified in the form of +bitmasks. Depending on their shared info, attributes can be set in one of the +following masks: + +* **info_mask_separate**, attributes will be specific to + this channel +* **info_mask_shared_by_type**, attributes are shared by all channels of the + same type +* **info_mask_shared_by_dir**, attributes are shared by all channels of the same + direction +* **info_mask_shared_by_all**, attributes are shared by all channels + +When there are multiple data channels per channel type we have two ways to +distinguish between them: + +* set **.modified** field of :c:type:`iio_chan_spec` to 1. Modifiers are + specified using **.channel2** field of the same :c:type:`iio_chan_spec` + structure and are used to indicate a physically unique characteristic of the + channel such as its direction or spectral response. For example, a light + sensor can have two channels, one for infrared light and one for both + infrared and visible light. +* set **.indexed** field of :c:type:`iio_chan_spec` to 1. In this case the + channel is simply another instance with an index specified by the **.channel** + field. + +Here is how we can make use of the channel's modifiers:: + + static const struct iio_chan_spec light_channels[] = { + { + .type = IIO_INTENSITY, + .modified = 1, + .channel2 = IIO_MOD_LIGHT_IR, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ), + }, + { + .type = IIO_INTENSITY, + .modified = 1, + .channel2 = IIO_MOD_LIGHT_BOTH, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ), + }, + { + .type = IIO_LIGHT, + .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), + .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ), + }, + } + +This channel's definition will generate two separate sysfs files for raw data +retrieval: + +* :file:`/sys/bus/iio/iio:device{X}/in_intensity_ir_raw` +* :file:`/sys/bus/iio/iio:device{X}/in_intensity_both_raw` + +one file for processed data: + +* :file:`/sys/bus/iio/iio:device{X}/in_illuminance_input` + +and one shared sysfs file for sampling frequency: + +* :file:`/sys/bus/iio/iio:device{X}/sampling_frequency`. + +Here is how we can make use of the channel's indexing:: + + static const struct iio_chan_spec light_channels[] = { + { + .type = IIO_VOLTAGE, + .indexed = 1, + .channel = 0, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + }, + { + .type = IIO_VOLTAGE, + .indexed = 1, + .channel = 1, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + }, + } + +This will generate two separate attributes files for raw data retrieval: + +* :file:`/sys/bus/iio/devices/iio:device{X}/in_voltage0_raw`, representing + voltage measurement for channel 0. +* :file:`/sys/bus/iio/devices/iio:device{X}/in_voltage1_raw`, representing + voltage measurement for channel 1. + +More details +============ +.. kernel-doc:: include/linux/iio/iio.h +.. kernel-doc:: drivers/iio/industrialio-core.c + :export: diff --git a/Documentation/driver-api/iio/hw-consumer.rst b/Documentation/driver-api/iio/hw-consumer.rst new file mode 100644 index 0000000000..76133a3796 --- /dev/null +++ b/Documentation/driver-api/iio/hw-consumer.rst @@ -0,0 +1,50 @@ +=========== +HW consumer +=========== +An IIO device can be directly connected to another device in hardware. In this +case the buffers between IIO provider and IIO consumer are handled by hardware. +The Industrial I/O HW consumer offers a way to bond these IIO devices without +software buffer for data. The implementation can be found under +:file:`drivers/iio/buffer/hw-consumer.c` + + +* struct iio_hw_consumer — Hardware consumer structure +* :c:func:`iio_hw_consumer_alloc` — Allocate IIO hardware consumer +* :c:func:`iio_hw_consumer_free` — Free IIO hardware consumer +* :c:func:`iio_hw_consumer_enable` — Enable IIO hardware consumer +* :c:func:`iio_hw_consumer_disable` — Disable IIO hardware consumer + + +HW consumer setup +================= + +As standard IIO device the implementation is based on IIO provider/consumer. +A typical IIO HW consumer setup looks like this:: + + static struct iio_hw_consumer *hwc; + + static const struct iio_info adc_info = { + .read_raw = adc_read_raw, + }; + + static int adc_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) + { + ret = iio_hw_consumer_enable(hwc); + + /* Acquire data */ + + ret = iio_hw_consumer_disable(hwc); + } + + static int adc_probe(struct platform_device *pdev) + { + hwc = devm_iio_hw_consumer_alloc(&iio->dev); + } + +More details +============ +.. kernel-doc:: drivers/iio/buffer/industrialio-hw-consumer.c + :export: + diff --git a/Documentation/driver-api/iio/index.rst b/Documentation/driver-api/iio/index.rst new file mode 100644 index 0000000000..7fba341bd8 --- /dev/null +++ b/Documentation/driver-api/iio/index.rst @@ -0,0 +1,18 @@ +.. include:: <isonum.txt> + +Industrial I/O +============== + +**Copyright** |copy| 2015 Intel Corporation + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + core + buffers + triggers + triggered-buffers + hw-consumer diff --git a/Documentation/driver-api/iio/intro.rst b/Documentation/driver-api/iio/intro.rst new file mode 100644 index 0000000000..3653fbd570 --- /dev/null +++ b/Documentation/driver-api/iio/intro.rst @@ -0,0 +1,33 @@ +.. include:: <isonum.txt> + +============ +Introduction +============ + +The main purpose of the Industrial I/O subsystem (IIO) is to provide support +for devices that in some sense perform either +analog-to-digital conversion (ADC) or digital-to-analog conversion (DAC) +or both. The aim is to fill the gap between the somewhat similar hwmon and +:doc:`input <../input>` subsystems. Hwmon is directed at low sample rate +sensors used to monitor and control the system itself, like fan speed control +or temperature measurement. :doc:`Input <../input>` is, as its name suggests, +focused on human interaction input devices (keyboard, mouse, touchscreen). +In some cases there is considerable overlap between these and IIO. + +Devices that fall into this category include: + +* analog to digital converters (ADCs) +* accelerometers +* capacitance to digital converters (CDCs) +* digital to analog converters (DACs) +* gyroscopes +* inertial measurement units (IMUs) +* color and light sensors +* magnetometers +* pressure sensors +* proximity sensors +* temperature sensors + +Usually these sensors are connected via :doc:`SPI <../spi>` or +:doc:`I2C <../i2c>`. A common use case of the sensors devices is to have +combined functionality (e.g. light plus proximity sensor). diff --git a/Documentation/driver-api/iio/triggered-buffers.rst b/Documentation/driver-api/iio/triggered-buffers.rst new file mode 100644 index 0000000000..417555dbbd --- /dev/null +++ b/Documentation/driver-api/iio/triggered-buffers.rst @@ -0,0 +1,69 @@ +================= +Triggered Buffers +================= + +Now that we know what buffers and triggers are let's see how they work together. + +IIO triggered buffer setup +========================== + +* :c:func:`iio_triggered_buffer_setup` — Setup triggered buffer and pollfunc +* :c:func:`iio_triggered_buffer_cleanup` — Free resources allocated by + :c:func:`iio_triggered_buffer_setup` +* struct iio_buffer_setup_ops — buffer setup related callbacks + +A typical triggered buffer setup looks like this:: + + const struct iio_buffer_setup_ops sensor_buffer_setup_ops = { + .preenable = sensor_buffer_preenable, + .postenable = sensor_buffer_postenable, + .postdisable = sensor_buffer_postdisable, + .predisable = sensor_buffer_predisable, + }; + + irqreturn_t sensor_iio_pollfunc(int irq, void *p) + { + pf->timestamp = iio_get_time_ns((struct indio_dev *)p); + return IRQ_WAKE_THREAD; + } + + irqreturn_t sensor_trigger_handler(int irq, void *p) + { + u16 buf[8]; + int i = 0; + + /* read data for each active channel */ + for_each_set_bit(bit, active_scan_mask, masklength) + buf[i++] = sensor_get_data(bit) + + iio_push_to_buffers_with_timestamp(indio_dev, buf, timestamp); + + iio_trigger_notify_done(trigger); + return IRQ_HANDLED; + } + + /* setup triggered buffer, usually in probe function */ + iio_triggered_buffer_setup(indio_dev, sensor_iio_polfunc, + sensor_trigger_handler, + sensor_buffer_setup_ops); + +The important things to notice here are: + +* :c:type:`iio_buffer_setup_ops`, the buffer setup functions to be called at + predefined points in the buffer configuration sequence (e.g. before enable, + after disable). If not specified, the IIO core uses the default + iio_triggered_buffer_setup_ops. +* **sensor_iio_pollfunc**, the function that will be used as top half of poll + function. It should do as little processing as possible, because it runs in + interrupt context. The most common operation is recording of the current + timestamp and for this reason one can use the IIO core defined + :c:func:`iio_pollfunc_store_time` function. +* **sensor_trigger_handler**, the function that will be used as bottom half of + the poll function. This runs in the context of a kernel thread and all the + processing takes place here. It usually reads data from the device and + stores it in the internal buffer together with the timestamp recorded in the + top half. + +More details +============ +.. kernel-doc:: drivers/iio/buffer/industrialio-triggered-buffer.c diff --git a/Documentation/driver-api/iio/triggers.rst b/Documentation/driver-api/iio/triggers.rst new file mode 100644 index 0000000000..288625e406 --- /dev/null +++ b/Documentation/driver-api/iio/triggers.rst @@ -0,0 +1,78 @@ +======== +Triggers +======== + +* struct iio_trigger — industrial I/O trigger device +* :c:func:`devm_iio_trigger_alloc` — Resource-managed iio_trigger_alloc +* :c:func:`devm_iio_trigger_register` — Resource-managed iio_trigger_register + iio_trigger_unregister +* :c:func:`iio_trigger_validate_own_device` — Check if a trigger and IIO + device belong to the same device + +In many situations it is useful for a driver to be able to capture data based +on some external event (trigger) as opposed to periodically polling for data. +An IIO trigger can be provided by a device driver that also has an IIO device +based on hardware generated events (e.g. data ready or threshold exceeded) or +provided by a separate driver from an independent interrupt source (e.g. GPIO +line connected to some external system, timer interrupt or user space writing +a specific file in sysfs). A trigger may initiate data capture for a number of +sensors and also it may be completely unrelated to the sensor itself. + +IIO trigger sysfs interface +=========================== + +There are two locations in sysfs related to triggers: + +* :file:`/sys/bus/iio/devices/trigger{Y}/*`, this file is created once an + IIO trigger is registered with the IIO core and corresponds to trigger + with index Y. + Because triggers can be very different depending on type there are few + standard attributes that we can describe here: + + * :file:`name`, trigger name that can be later used for association with a + device. + * :file:`sampling_frequency`, some timer based triggers use this attribute to + specify the frequency for trigger calls. + +* :file:`/sys/bus/iio/devices/iio:device{X}/trigger/*`, this directory is + created once the device supports a triggered buffer. We can associate a + trigger with our device by writing the trigger's name in the + :file:`current_trigger` file. + +IIO trigger setup +================= + +Let's see a simple example of how to setup a trigger to be used by a driver:: + + struct iio_trigger_ops trigger_ops = { + .set_trigger_state = sample_trigger_state, + .validate_device = sample_validate_device, + } + + struct iio_trigger *trig; + + /* first, allocate memory for our trigger */ + trig = iio_trigger_alloc(dev, "trig-%s-%d", name, idx); + + /* setup trigger operations field */ + trig->ops = &trigger_ops; + + /* now register the trigger with the IIO core */ + iio_trigger_register(trig); + +IIO trigger ops +=============== + +* struct iio_trigger_ops — operations structure for an iio_trigger. + +Notice that a trigger has a set of operations attached: + +* :file:`set_trigger_state`, switch the trigger on/off on demand. +* :file:`validate_device`, function to validate the device when the current + trigger gets changed. + +More details +============ +.. kernel-doc:: include/linux/iio/trigger.h +.. kernel-doc:: drivers/iio/industrialio-trigger.c + :export: diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst new file mode 100644 index 0000000000..1e16a40da3 --- /dev/null +++ b/Documentation/driver-api/index.rst @@ -0,0 +1,123 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +Driver implementer's API guide +============================== + +The kernel offers a wide variety of interfaces to support the development +of device drivers. This document is an only somewhat organized collection +of some of those interfaces — it will hopefully get better over time! The +available subsections can be seen below. + +.. class:: toc-title + + Table of contents + +.. toctree:: + :maxdepth: 2 + + driver-model/index + basics + infrastructure + ioctl + early-userspace/index + pm/index + clk + device-io + dma-buf + device_link + component + message-based + infiniband + aperture + frame-buffer + regulator + reset + iio/index + input + usb/index + firewire + pci/index + cxl/index + spi + i2c + ipmb + ipmi + i3c/index + interconnect + devfreq + hsi + edac + scsi + libata + target + mailbox + mtdnand + miscellaneous + mei/index + mtd/index + mmc/index + nvdimm/index + w1 + rapidio/index + s390-drivers + vme + 80211/index + uio-howto + firmware/index + pin-control + gpio/index + md/index + media/index + misc_devices + nfc/index + dmaengine/index + slimbus + soundwire/index + thermal/index + fpga/index + acpi/index + auxiliary_bus + backlight/lp855x-driver.rst + connector + console + dcdbas + eisa + isa + isapnp + io-mapping + io_ordering + generic-counter + memory-devices/index + men-chameleon-bus + ntb + nvmem + parport-lowlevel + pps + ptp + phy/index + pwm + pldmfw/index + rfkill + serial/index + sm501 + surface_aggregator/index + switchtec + sync_file + tty/index + vfio-mediated-device + vfio + vfio-pci-device-specific-driver-acceptance + virtio/index + xilinx/index + xillybus + zorro + hte/index + wmi + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/infiniband.rst b/Documentation/driver-api/infiniband.rst new file mode 100644 index 0000000000..30e142ccbe --- /dev/null +++ b/Documentation/driver-api/infiniband.rst @@ -0,0 +1,124 @@ +=========================================== +InfiniBand and Remote DMA (RDMA) Interfaces +=========================================== + +Introduction and Overview +========================= + +TBD + +InfiniBand core interfaces +========================== + +.. kernel-doc:: drivers/infiniband/core/iwpm_util.h + :internal: + +.. kernel-doc:: drivers/infiniband/core/cq.c + :export: + +.. kernel-doc:: drivers/infiniband/core/cm.c + :export: + +.. kernel-doc:: drivers/infiniband/core/rw.c + :export: + +.. kernel-doc:: drivers/infiniband/core/device.c + :export: + +.. kernel-doc:: drivers/infiniband/core/verbs.c + :export: + +.. kernel-doc:: drivers/infiniband/core/packer.c + :export: + +.. kernel-doc:: drivers/infiniband/core/sa_query.c + :export: + +.. kernel-doc:: drivers/infiniband/core/ud_header.c + :export: + +.. kernel-doc:: drivers/infiniband/core/umem.c + :export: + +.. kernel-doc:: drivers/infiniband/core/umem_odp.c + :export: + +RDMA Verbs transport library +============================ + +.. kernel-doc:: drivers/infiniband/sw/rdmavt/mr.c + :export: + +.. kernel-doc:: drivers/infiniband/sw/rdmavt/rc.c + :export: + +.. kernel-doc:: drivers/infiniband/sw/rdmavt/ah.c + :export: + +.. kernel-doc:: drivers/infiniband/sw/rdmavt/vt.c + :export: + +.. kernel-doc:: drivers/infiniband/sw/rdmavt/cq.c + :export: + +.. kernel-doc:: drivers/infiniband/sw/rdmavt/qp.c + :export: + +.. kernel-doc:: drivers/infiniband/sw/rdmavt/mcast.c + :export: + +Upper Layer Protocols +===================== + +iSCSI Extensions for RDMA (iSER) +-------------------------------- + +.. kernel-doc:: drivers/infiniband/ulp/iser/iscsi_iser.h + :internal: + +.. kernel-doc:: drivers/infiniband/ulp/iser/iscsi_iser.c + :functions: iscsi_iser_pdu_alloc iser_initialize_task_headers \ + iscsi_iser_task_init iscsi_iser_mtask_xmit iscsi_iser_task_xmit \ + iscsi_iser_cleanup_task iscsi_iser_check_protection \ + iscsi_iser_conn_create iscsi_iser_conn_bind \ + iscsi_iser_conn_start iscsi_iser_conn_stop \ + iscsi_iser_session_destroy iscsi_iser_session_create \ + iscsi_iser_set_param iscsi_iser_ep_connect iscsi_iser_ep_poll \ + iscsi_iser_ep_disconnect + +.. kernel-doc:: drivers/infiniband/ulp/iser/iser_initiator.c + :internal: + +.. kernel-doc:: drivers/infiniband/ulp/iser/iser_verbs.c + :internal: + +Omni-Path (OPA) Virtual NIC support +----------------------------------- + +.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h + :internal: + +.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h + :internal: + +.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c + :internal: + +.. kernel-doc:: drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c + :internal: + +InfiniBand SCSI RDMA protocol target support +-------------------------------------------- + +.. kernel-doc:: drivers/infiniband/ulp/srpt/ib_srpt.h + :internal: + +.. kernel-doc:: drivers/infiniband/ulp/srpt/ib_srpt.c + :internal: + +iSCSI Extensions for RDMA (iSER) target support +----------------------------------------------- + +.. kernel-doc:: drivers/infiniband/ulp/isert/ib_isert.c + :internal: + diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst new file mode 100644 index 0000000000..3d52dfdfa9 --- /dev/null +++ b/Documentation/driver-api/infrastructure.rst @@ -0,0 +1,97 @@ +Device drivers infrastructure +============================= + +The Basic Device Driver-Model Structures +---------------------------------------- + +.. kernel-doc:: include/linux/device.h + :internal: + :no-identifiers: device_link_state + +.. kernel-doc:: include/linux/device/bus.h + :identifiers: bus_type bus_notifier_event + +.. kernel-doc:: include/linux/device/class.h + :identifiers: class + +.. kernel-doc:: include/linux/device/driver.h + :identifiers: probe_type device_driver + +Device Drivers Base +------------------- + +.. kernel-doc:: drivers/base/init.c + :internal: + +.. kernel-doc:: include/linux/device/driver.h + :no-identifiers: probe_type device_driver + +.. kernel-doc:: drivers/base/driver.c + :export: + +.. kernel-doc:: drivers/base/core.c + :export: + +.. kernel-doc:: drivers/base/syscore.c + :export: + +.. kernel-doc:: include/linux/device/class.h + :no-identifiers: class + +.. kernel-doc:: drivers/base/class.c + :export: + +.. kernel-doc:: drivers/base/node.c + :internal: + +.. kernel-doc:: drivers/base/transport_class.c + :export: + +.. kernel-doc:: drivers/base/dd.c + :export: + +.. kernel-doc:: include/linux/platform_device.h + :internal: + +.. kernel-doc:: drivers/base/platform.c + :export: + +.. kernel-doc:: include/linux/device/bus.h + :no-identifiers: bus_type bus_notifier_event + +.. kernel-doc:: drivers/base/bus.c + :export: + +Device Drivers DMA Management +----------------------------- + +.. kernel-doc:: kernel/dma/mapping.c + :export: + +Device drivers PnP support +-------------------------- + +.. kernel-doc:: drivers/pnp/core.c + :internal: + +.. kernel-doc:: drivers/pnp/card.c + :export: + +.. kernel-doc:: drivers/pnp/driver.c + :internal: + +.. kernel-doc:: drivers/pnp/manager.c + :export: + +.. kernel-doc:: drivers/pnp/support.c + :export: + +Userspace IO devices +-------------------- + +.. kernel-doc:: drivers/uio/uio.c + :export: + +.. kernel-doc:: include/linux/uio_driver.h + :internal: + diff --git a/Documentation/driver-api/input.rst b/Documentation/driver-api/input.rst new file mode 100644 index 0000000000..4bbb26ae2a --- /dev/null +++ b/Documentation/driver-api/input.rst @@ -0,0 +1,42 @@ +Input Subsystem +=============== + +Input core +---------- + +.. kernel-doc:: include/linux/input.h + :internal: + +.. kernel-doc:: drivers/input/input.c + :export: + +.. kernel-doc:: drivers/input/ff-core.c + :export: + +.. kernel-doc:: drivers/input/ff-memless.c + :export: + +Multitouch Library +------------------ + +.. kernel-doc:: include/linux/input/mt.h + :internal: + +.. kernel-doc:: drivers/input/input-mt.c + :export: + +Matrix keyboards/keypads +------------------------ + +.. kernel-doc:: include/linux/input/matrix_keypad.h + :internal: + +Sparse keymap support +--------------------- + +.. kernel-doc:: include/linux/input/sparse-keymap.h + :internal: + +.. kernel-doc:: drivers/input/sparse-keymap.c + :export: + diff --git a/Documentation/driver-api/interconnect.rst b/Documentation/driver-api/interconnect.rst new file mode 100644 index 0000000000..a92d0f277a --- /dev/null +++ b/Documentation/driver-api/interconnect.rst @@ -0,0 +1,140 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +Generic System Interconnect Subsystem +===================================== + +Introduction +------------ + +This framework is designed to provide a standard kernel interface to control +the settings of the interconnects on an SoC. These settings can be throughput, +latency and priority between multiple interconnected devices or functional +blocks. This can be controlled dynamically in order to save power or provide +maximum performance. + +The interconnect bus is hardware with configurable parameters, which can be +set on a data path according to the requests received from various drivers. +An example of interconnect buses are the interconnects between various +components or functional blocks in chipsets. There can be multiple interconnects +on an SoC that can be multi-tiered. + +Below is a simplified diagram of a real-world SoC interconnect bus topology. + +:: + + +----------------+ +----------------+ + | HW Accelerator |--->| M NoC |<---------------+ + +----------------+ +----------------+ | + | | +------------+ + +-----+ +-------------+ V +------+ | | + | DDR | | +--------+ | PCIe | | | + +-----+ | | Slaves | +------+ | | + ^ ^ | +--------+ | | C NoC | + | | V V | | + +------------------+ +------------------------+ | | +-----+ + | |-->| |-->| |-->| CPU | + | |-->| |<--| | +-----+ + | Mem NoC | | S NoC | +------------+ + | |<--| |---------+ | + | |<--| |<------+ | | +--------+ + +------------------+ +------------------------+ | | +-->| Slaves | + ^ ^ ^ ^ ^ | | +--------+ + | | | | | | V + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | CPUs | | | GPU | | DSP | | Masters |-->| P NoC |-->| Slaves | + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | + +-------+ + | Modem | + +-------+ + +Terminology +----------- + +Interconnect provider is the software definition of the interconnect hardware. +The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC +and Mem NoC. + +Interconnect node is the software definition of the interconnect hardware +port. Each interconnect provider consists of multiple interconnect nodes, +which are connected to other SoC components including other interconnect +providers. The point on the diagram where the CPUs connect to the memory is +called an interconnect node, which belongs to the Mem NoC interconnect provider. + +Interconnect endpoints are the first or the last element of the path. Every +endpoint is a node, but not every node is an endpoint. + +Interconnect path is everything between two endpoints including all the nodes +that have to be traversed to reach from a source to destination node. It may +include multiple master-slave pairs across several interconnect providers. + +Interconnect consumers are the entities which make use of the data paths exposed +by the providers. The consumers send requests to providers requesting various +throughput, latency and priority. Usually the consumers are device drivers, that +send request based on their needs. An example for a consumer is a video decoder +that supports various formats and image sizes. + +Interconnect providers +---------------------- + +Interconnect provider is an entity that implements methods to initialize and +configure interconnect bus hardware. The interconnect provider drivers should +be registered with the interconnect provider core. + +.. kernel-doc:: include/linux/interconnect-provider.h + +Interconnect consumers +---------------------- + +Interconnect consumers are the clients which use the interconnect APIs to +get paths between endpoints and set their bandwidth/latency/QoS requirements +for these interconnect paths. These interfaces are not currently +documented. + +Interconnect debugfs interfaces +------------------------------- + +Like several other subsystems interconnect will create some files for debugging +and introspection. Files in debugfs are not considered ABI so application +software shouldn't rely on format details change between kernel versions. + +``/sys/kernel/debug/interconnect/interconnect_summary``: + +Show all interconnect nodes in the system with their aggregated bandwidth +request. Indented under each node show bandwidth requests from each device. + +``/sys/kernel/debug/interconnect/interconnect_graph``: + +Show the interconnect graph in the graphviz dot format. It shows all +interconnect nodes and links in the system and groups together nodes from the +same provider as subgraphs. The format is human-readable and can also be piped +through dot to generate diagrams in many graphical formats:: + + $ cat /sys/kernel/debug/interconnect/interconnect_graph | \ + dot -Tsvg > interconnect_graph.svg + +The ``test-client`` directory provides interfaces for issuing BW requests to +any arbitrary path. Note that for safety reasons, this feature is disabled by +default without a Kconfig to enable it. Enabling it requires code changes to +``#define INTERCONNECT_ALLOW_WRITE_DEBUGFS``. Example usage:: + + cd /sys/kernel/debug/interconnect/test-client/ + + # Configure node endpoints for the path from CPU to DDR on + # qcom/sm8550. + echo chm_apps > src_node + echo ebi > dst_node + + # Get path between src_node and dst_node. This is only + # necessary after updating the node endpoints. + echo 1 > get + + # Set desired BW to 1GBps avg and 2GBps peak. + echo 1000000 > avg_bw + echo 2000000 > peak_bw + + # Vote for avg_bw and peak_bw on the latest path from "get". + # Voting for multiple paths is possible by repeating this + # process for different nodes endpoints. + echo 1 > commit diff --git a/Documentation/driver-api/io-mapping.rst b/Documentation/driver-api/io-mapping.rst new file mode 100644 index 0000000000..7274204b04 --- /dev/null +++ b/Documentation/driver-api/io-mapping.rst @@ -0,0 +1,91 @@ +======================== +The io_mapping functions +======================== + +API +=== + +The io_mapping functions in linux/io-mapping.h provide an abstraction for +efficiently mapping small regions of an I/O device to the CPU. The initial +usage is to support the large graphics aperture on 32-bit processors where +ioremap_wc cannot be used to statically map the entire aperture to the CPU +as it would consume too much of the kernel address space. + +A mapping object is created during driver initialization using:: + + struct io_mapping *io_mapping_create_wc(unsigned long base, + unsigned long size) + +'base' is the bus address of the region to be made +mappable, while 'size' indicates how large a mapping region to +enable. Both are in bytes. + +This _wc variant provides a mapping which may only be used with +io_mapping_map_atomic_wc(), io_mapping_map_local_wc() or +io_mapping_map_wc(). + +With this mapping object, individual pages can be mapped either temporarily +or long term, depending on the requirements. Of course, temporary maps are +more efficient. They come in two flavours:: + + void *io_mapping_map_local_wc(struct io_mapping *mapping, + unsigned long offset) + + void *io_mapping_map_atomic_wc(struct io_mapping *mapping, + unsigned long offset) + +'offset' is the offset within the defined mapping region. Accessing +addresses beyond the region specified in the creation function yields +undefined results. Using an offset which is not page aligned yields an +undefined result. The return value points to a single page in CPU address +space. + +This _wc variant returns a write-combining map to the page and may only be +used with mappings created by io_mapping_create_wc() + +Temporary mappings are only valid in the context of the caller. The mapping +is not guaranteed to be globally visible. + +io_mapping_map_local_wc() has a side effect on X86 32bit as it disables +migration to make the mapping code work. No caller can rely on this side +effect. + +io_mapping_map_atomic_wc() has the side effect of disabling preemption and +pagefaults. Don't use in new code. Use io_mapping_map_local_wc() instead. + +Nested mappings need to be undone in reverse order because the mapping +code uses a stack for keeping track of them:: + + addr1 = io_mapping_map_local_wc(map1, offset1); + addr2 = io_mapping_map_local_wc(map2, offset2); + ... + io_mapping_unmap_local(addr2); + io_mapping_unmap_local(addr1); + +The mappings are released with:: + + void io_mapping_unmap_local(void *vaddr) + void io_mapping_unmap_atomic(void *vaddr) + +'vaddr' must be the value returned by the last io_mapping_map_local_wc() or +io_mapping_map_atomic_wc() call. This unmaps the specified mapping and +undoes the side effects of the mapping functions. + +If you need to sleep while holding a mapping, you can use the regular +variant, although this may be significantly slower:: + + void *io_mapping_map_wc(struct io_mapping *mapping, + unsigned long offset) + +This works like io_mapping_map_atomic/local_wc() except it has no side +effects and the pointer is globally visible. + +The mappings are released with:: + + void io_mapping_unmap(void *vaddr) + +Use for pages mapped with io_mapping_map_wc(). + +At driver close time, the io_mapping object must be freed:: + + void io_mapping_free(struct io_mapping *mapping) diff --git a/Documentation/driver-api/io_ordering.rst b/Documentation/driver-api/io_ordering.rst new file mode 100644 index 0000000000..2ab303ce9a --- /dev/null +++ b/Documentation/driver-api/io_ordering.rst @@ -0,0 +1,51 @@ +============================================== +Ordering I/O writes to memory-mapped addresses +============================================== + +On some platforms, so-called memory-mapped I/O is weakly ordered. On such +platforms, driver writers are responsible for ensuring that I/O writes to +memory-mapped addresses on their device arrive in the order intended. This is +typically done by reading a 'safe' device or bridge register, causing the I/O +chipset to flush pending writes to the device before any reads are posted. A +driver would usually use this technique immediately prior to the exit of a +critical section of code protected by spinlocks. This would ensure that +subsequent writes to I/O space arrived only after all prior writes (much like a +memory barrier op, mb(), only with respect to I/O). + +A more concrete example from a hypothetical device driver:: + + ... + CPU A: spin_lock_irqsave(&dev_lock, flags) + CPU A: val = readl(my_status); + CPU A: ... + CPU A: writel(newval, ring_ptr); + CPU A: spin_unlock_irqrestore(&dev_lock, flags) + ... + CPU B: spin_lock_irqsave(&dev_lock, flags) + CPU B: val = readl(my_status); + CPU B: ... + CPU B: writel(newval2, ring_ptr); + CPU B: spin_unlock_irqrestore(&dev_lock, flags) + ... + +In the case above, the device may receive newval2 before it receives newval, +which could cause problems. Fixing it is easy enough though:: + + ... + CPU A: spin_lock_irqsave(&dev_lock, flags) + CPU A: val = readl(my_status); + CPU A: ... + CPU A: writel(newval, ring_ptr); + CPU A: (void)readl(safe_register); /* maybe a config register? */ + CPU A: spin_unlock_irqrestore(&dev_lock, flags) + ... + CPU B: spin_lock_irqsave(&dev_lock, flags) + CPU B: val = readl(my_status); + CPU B: ... + CPU B: writel(newval2, ring_ptr); + CPU B: (void)readl(safe_register); /* maybe a config register? */ + CPU B: spin_unlock_irqrestore(&dev_lock, flags) + +Here, the reads from safe_register will cause the I/O chipset to flush any +pending writes before actually posting the read to the chipset, preventing +possible data corruption. diff --git a/Documentation/driver-api/ioctl.rst b/Documentation/driver-api/ioctl.rst new file mode 100644 index 0000000000..35795f6a15 --- /dev/null +++ b/Documentation/driver-api/ioctl.rst @@ -0,0 +1,253 @@ +====================== +ioctl based interfaces +====================== + +ioctl() is the most common way for applications to interface +with device drivers. It is flexible and easily extended by adding new +commands and can be passed through character devices, block devices as +well as sockets and other special file descriptors. + +However, it is also very easy to get ioctl command definitions wrong, +and hard to fix them later without breaking existing applications, +so this documentation tries to help developers get it right. + +Command number definitions +========================== + +The command number, or request number, is the second argument passed to +the ioctl system call. While this can be any 32-bit number that uniquely +identifies an action for a particular driver, there are a number of +conventions around defining them. + +``include/uapi/asm-generic/ioctl.h`` provides four macros for defining +ioctl commands that follow modern conventions: ``_IO``, ``_IOR``, +``_IOW``, and ``_IOWR``. These should be used for all new commands, +with the correct parameters: + +_IO/_IOR/_IOW/_IOWR + The macro name specifies how the argument will be used. It may be a + pointer to data to be passed into the kernel (_IOW), out of the kernel + (_IOR), or both (_IOWR). _IO can indicate either commands with no + argument or those passing an integer value instead of a pointer. + It is recommended to only use _IO for commands without arguments, + and use pointers for passing data. + +type + An 8-bit number, often a character literal, specific to a subsystem + or driver, and listed in Documentation/userspace-api/ioctl/ioctl-number.rst + +nr + An 8-bit number identifying the specific command, unique for a give + value of 'type' + +data_type + The name of the data type pointed to by the argument, the command number + encodes the ``sizeof(data_type)`` value in a 13-bit or 14-bit integer, + leading to a limit of 8191 bytes for the maximum size of the argument. + Note: do not pass sizeof(data_type) type into _IOR/_IOW/IOWR, as that + will lead to encoding sizeof(sizeof(data_type)), i.e. sizeof(size_t). + _IO does not have a data_type parameter. + + +Interface versions +================== + +Some subsystems use version numbers in data structures to overload +commands with different interpretations of the argument. + +This is generally a bad idea, since changes to existing commands tend +to break existing applications. + +A better approach is to add a new ioctl command with a new number. The +old command still needs to be implemented in the kernel for compatibility, +but this can be a wrapper around the new implementation. + +Return code +=========== + +ioctl commands can return negative error codes as documented in errno(3); +these get turned into errno values in user space. On success, the return +code should be zero. It is also possible but not recommended to return +a positive 'long' value. + +When the ioctl callback is called with an unknown command number, the +handler returns either -ENOTTY or -ENOIOCTLCMD, which also results in +-ENOTTY being returned from the system call. Some subsystems return +-ENOSYS or -EINVAL here for historic reasons, but this is wrong. + +Prior to Linux 5.5, compat_ioctl handlers were required to return +-ENOIOCTLCMD in order to use the fallback conversion into native +commands. As all subsystems are now responsible for handling compat +mode themselves, this is no longer needed, but it may be important to +consider when backporting bug fixes to older kernels. + +Timestamps +========== + +Traditionally, timestamps and timeout values are passed as ``struct +timespec`` or ``struct timeval``, but these are problematic because of +incompatible definitions of these structures in user space after the +move to 64-bit time_t. + +The ``struct __kernel_timespec`` type can be used instead to be embedded +in other data structures when separate second/nanosecond values are +desired, or passed to user space directly. This is still not ideal though, +as the structure matches neither the kernel's timespec64 nor the user +space timespec exactly. The get_timespec64() and put_timespec64() helper +functions can be used to ensure that the layout remains compatible with +user space and the padding is treated correctly. + +As it is cheap to convert seconds to nanoseconds, but the opposite +requires an expensive 64-bit division, a simple __u64 nanosecond value +can be simpler and more efficient. + +Timeout values and timestamps should ideally use CLOCK_MONOTONIC time, +as returned by ktime_get_ns() or ktime_get_ts64(). Unlike +CLOCK_REALTIME, this makes the timestamps immune from jumping backwards +or forwards due to leap second adjustments and clock_settime() calls. + +ktime_get_real_ns() can be used for CLOCK_REALTIME timestamps that +need to be persistent across a reboot or between multiple machines. + +32-bit compat mode +================== + +In order to support 32-bit user space running on a 64-bit machine, each +subsystem or driver that implements an ioctl callback handler must also +implement the corresponding compat_ioctl handler. + +As long as all the rules for data structures are followed, this is as +easy as setting the .compat_ioctl pointer to a helper function such as +compat_ptr_ioctl() or blkdev_compat_ptr_ioctl(). + +compat_ptr() +------------ + +On the s390 architecture, 31-bit user space has ambiguous representations +for data pointers, with the upper bit being ignored. When running such +a process in compat mode, the compat_ptr() helper must be used to +clear the upper bit of a compat_uptr_t and turn it into a valid 64-bit +pointer. On other architectures, this macro only performs a cast to a +``void __user *`` pointer. + +In an compat_ioctl() callback, the last argument is an unsigned long, +which can be interpreted as either a pointer or a scalar depending on +the command. If it is a scalar, then compat_ptr() must not be used, to +ensure that the 64-bit kernel behaves the same way as a 32-bit kernel +for arguments with the upper bit set. + +The compat_ptr_ioctl() helper can be used in place of a custom +compat_ioctl file operation for drivers that only take arguments that +are pointers to compatible data structures. + +Structure layout +---------------- + +Compatible data structures have the same layout on all architectures, +avoiding all problematic members: + +* ``long`` and ``unsigned long`` are the size of a register, so + they can be either 32-bit or 64-bit wide and cannot be used in portable + data structures. Fixed-length replacements are ``__s32``, ``__u32``, + ``__s64`` and ``__u64``. + +* Pointers have the same problem, in addition to requiring the + use of compat_ptr(). The best workaround is to use ``__u64`` + in place of pointers, which requires a cast to ``uintptr_t`` in user + space, and the use of u64_to_user_ptr() in the kernel to convert + it back into a user pointer. + +* On the x86-32 (i386) architecture, the alignment of 64-bit variables + is only 32-bit, but they are naturally aligned on most other + architectures including x86-64. This means a structure like:: + + struct foo { + __u32 a; + __u64 b; + __u32 c; + }; + + has four bytes of padding between a and b on x86-64, plus another four + bytes of padding at the end, but no padding on i386, and it needs a + compat_ioctl conversion handler to translate between the two formats. + + To avoid this problem, all structures should have their members + naturally aligned, or explicit reserved fields added in place of the + implicit padding. The ``pahole`` tool can be used for checking the + alignment. + +* On ARM OABI user space, structures are padded to multiples of 32-bit, + making some structs incompatible with modern EABI kernels if they + do not end on a 32-bit boundary. + +* On the m68k architecture, struct members are not guaranteed to have an + alignment greater than 16-bit, which is a problem when relying on + implicit padding. + +* Bitfields and enums generally work as one would expect them to, + but some properties of them are implementation-defined, so it is better + to avoid them completely in ioctl interfaces. + +* ``char`` members can be either signed or unsigned, depending on + the architecture, so the __u8 and __s8 types should be used for 8-bit + integer values, though char arrays are clearer for fixed-length strings. + +Information leaks +================= + +Uninitialized data must not be copied back to user space, as this can +cause an information leak, which can be used to defeat kernel address +space layout randomization (KASLR), helping in an attack. + +For this reason (and for compat support) it is best to avoid any +implicit padding in data structures. Where there is implicit padding +in an existing structure, kernel drivers must be careful to fully +initialize an instance of the structure before copying it to user +space. This is usually done by calling memset() before assigning to +individual members. + +Subsystem abstractions +====================== + +While some device drivers implement their own ioctl function, most +subsystems implement the same command for multiple drivers. Ideally the +subsystem has an .ioctl() handler that copies the arguments from and +to user space, passing them into subsystem specific callback functions +through normal kernel pointers. + +This helps in various ways: + +* Applications written for one driver are more likely to work for + another one in the same subsystem if there are no subtle differences + in the user space ABI. + +* The complexity of user space access and data structure layout is done + in one place, reducing the potential for implementation bugs. + +* It is more likely to be reviewed by experienced developers + that can spot problems in the interface when the ioctl is shared + between multiple drivers than when it is only used in a single driver. + +Alternatives to ioctl +===================== + +There are many cases in which ioctl is not the best solution for a +problem. Alternatives include: + +* System calls are a better choice for a system-wide feature that + is not tied to a physical device or constrained by the file system + permissions of a character device node + +* netlink is the preferred way of configuring any network related + objects through sockets. + +* debugfs is used for ad-hoc interfaces for debugging functionality + that does not need to be exposed as a stable interface to applications. + +* sysfs is a good way to expose the state of an in-kernel object + that is not tied to a file descriptor. + +* configfs can be used for more complex configuration than sysfs + +* A custom file system can provide extra flexibility with a simple + user interface but adds a lot of complexity to the implementation. diff --git a/Documentation/driver-api/ipmb.rst b/Documentation/driver-api/ipmb.rst new file mode 100644 index 0000000000..209c49e051 --- /dev/null +++ b/Documentation/driver-api/ipmb.rst @@ -0,0 +1,109 @@ +============================== +IPMB Driver for a Satellite MC +============================== + +The Intelligent Platform Management Bus or IPMB, is an +I2C bus that provides a standardized interconnection between +different boards within a chassis. This interconnection is +between the baseboard management (BMC) and chassis electronics. +IPMB is also associated with the messaging protocol through the +IPMB bus. + +The devices using the IPMB are usually management +controllers that perform management functions such as servicing +the front panel interface, monitoring the baseboard, +hot-swapping disk drivers in the system chassis, etc... + +When an IPMB is implemented in the system, the BMC serves as +a controller to give system software access to the IPMB. The BMC +sends IPMI requests to a device (usually a Satellite Management +Controller or Satellite MC) via IPMB and the device +sends a response back to the BMC. + +For more information on IPMB and the format of an IPMB message, +refer to the IPMB and IPMI specifications. + +IPMB driver for Satellite MC +---------------------------- + +ipmb-dev-int - This is the driver needed on a Satellite MC to +receive IPMB messages from a BMC and send a response back. +This driver works with the I2C driver and a userspace +program such as OpenIPMI: + +1) It is an I2C slave backend driver. So, it defines a callback + function to set the Satellite MC as an I2C slave. + This callback function handles the received IPMI requests. + +2) It defines the read and write functions to enable a user + space program (such as OpenIPMI) to communicate with the kernel. + + +Load the IPMB driver +-------------------- + +The driver needs to be loaded at boot time or manually first. +First, make sure you have the following in your config file: +CONFIG_IPMB_DEVICE_INTERFACE=y + +1) If you want the driver to be loaded at boot time: + +a) Add this entry to your ACPI table, under the appropriate SMBus:: + + Device (SMB0) // Example SMBus host controller + { + Name (_HID, "<Vendor-Specific HID>") // Vendor-Specific HID + Name (_UID, 0) // Unique ID of particular host controller + : + : + Device (IPMB) + { + Name (_HID, "IPMB0001") // IPMB device interface + Name (_UID, 0) // Unique device identifier + } + } + +b) Example for device tree:: + + &i2c2 { + status = "okay"; + + ipmb@10 { + compatible = "ipmb-dev"; + reg = <0x10>; + i2c-protocol; + }; + }; + +If xmit of data to be done using raw i2c block vs smbus +then "i2c-protocol" needs to be defined as above. + +2) Manually from Linux:: + + modprobe ipmb-dev-int + + +Instantiate the device +---------------------- + +After loading the driver, you can instantiate the device as +described in 'Documentation/i2c/instantiating-devices.rst'. +If you have multiple BMCs, each connected to your Satellite MC via +a different I2C bus, you can instantiate a device for each of +those BMCs. + +The name of the instantiated device contains the I2C bus number +associated with it as follows:: + + BMC1 ------ IPMB/I2C bus 1 ---------| /dev/ipmb-1 + Satellite MC + BMC1 ------ IPMB/I2C bus 2 ---------| /dev/ipmb-2 + +For instance, you can instantiate the ipmb-dev-int device from +user space at the 7 bit address 0x10 on bus 2:: + + # echo ipmb-dev 0x1010 > /sys/bus/i2c/devices/i2c-2/new_device + +This will create the device file /dev/ipmb-2, which can be accessed +by the user space program. The device needs to be instantiated +before running the user space program. diff --git a/Documentation/driver-api/ipmi.rst b/Documentation/driver-api/ipmi.rst new file mode 100644 index 0000000000..e224e47b6b --- /dev/null +++ b/Documentation/driver-api/ipmi.rst @@ -0,0 +1,810 @@ +===================== +The Linux IPMI Driver +===================== + +:Author: Corey Minyard <minyard@mvista.com> / <minyard@acm.org> + +The Intelligent Platform Management Interface, or IPMI, is a +standard for controlling intelligent devices that monitor a system. +It provides for dynamic discovery of sensors in the system and the +ability to monitor the sensors and be informed when the sensor's +values change or go outside certain boundaries. It also has a +standardized database for field-replaceable units (FRUs) and a watchdog +timer. + +To use this, you need an interface to an IPMI controller in your +system (called a Baseboard Management Controller, or BMC) and +management software that can use the IPMI system. + +This document describes how to use the IPMI driver for Linux. If you +are not familiar with IPMI itself, see the web site at +https://www.intel.com/design/servers/ipmi/index.htm. IPMI is a big +subject and I can't cover it all here! + +Configuration +------------- + +The Linux IPMI driver is modular, which means you have to pick several +things to have it work right depending on your hardware. Most of +these are available in the 'Character Devices' menu then the IPMI +menu. + +No matter what, you must pick 'IPMI top-level message handler' to use +IPMI. What you do beyond that depends on your needs and hardware. + +The message handler does not provide any user-level interfaces. +Kernel code (like the watchdog) can still use it. If you need access +from userland, you need to select 'Device interface for IPMI' if you +want access through a device driver. + +The driver interface depends on your hardware. If your system +properly provides the SMBIOS info for IPMI, the driver will detect it +and just work. If you have a board with a standard interface (These +will generally be either "KCS", "SMIC", or "BT", consult your hardware +manual), choose the 'IPMI SI handler' option. A driver also exists +for direct I2C access to the IPMI management controller. Some boards +support this, but it is unknown if it will work on every board. For +this, choose 'IPMI SMBus handler', but be ready to try to do some +figuring to see if it will work on your system if the SMBIOS/APCI +information is wrong or not present. It is fairly safe to have both +these enabled and let the drivers auto-detect what is present. + +You should generally enable ACPI on your system, as systems with IPMI +can have ACPI tables describing them. + +If you have a standard interface and the board manufacturer has done +their job correctly, the IPMI controller should be automatically +detected (via ACPI or SMBIOS tables) and should just work. Sadly, +many boards do not have this information. The driver attempts +standard defaults, but they may not work. If you fall into this +situation, you need to read the section below named 'The SI Driver' or +"The SMBus Driver" on how to hand-configure your system. + +IPMI defines a standard watchdog timer. You can enable this with the +'IPMI Watchdog Timer' config option. If you compile the driver into +the kernel, then via a kernel command-line option you can have the +watchdog timer start as soon as it initializes. It also have a lot +of other options, see the 'Watchdog' section below for more details. +Note that you can also have the watchdog continue to run if it is +closed (by default it is disabled on close). Go into the 'Watchdog +Cards' menu, enable 'Watchdog Timer Support', and enable the option +'Disable watchdog shutdown on close'. + +IPMI systems can often be powered off using IPMI commands. Select +'IPMI Poweroff' to do this. The driver will auto-detect if the system +can be powered off by IPMI. It is safe to enable this even if your +system doesn't support this option. This works on ATCA systems, the +Radisys CPI1 card, and any IPMI system that supports standard chassis +management commands. + +If you want the driver to put an event into the event log on a panic, +enable the 'Generate a panic event to all BMCs on a panic' option. If +you want the whole panic string put into the event log using OEM +events, enable the 'Generate OEM events containing the panic string' +option. You can also enable these dynamically by setting the module +parameter named "panic_op" in the ipmi_msghandler module to "event" +or "string". Setting that parameter to "none" disables this function. + +Basic Design +------------ + +The Linux IPMI driver is designed to be very modular and flexible, you +only need to take the pieces you need and you can use it in many +different ways. Because of that, it's broken into many chunks of +code. These chunks (by module name) are: + +ipmi_msghandler - This is the central piece of software for the IPMI +system. It handles all messages, message timing, and responses. The +IPMI users tie into this, and the IPMI physical interfaces (called +System Management Interfaces, or SMIs) also tie in here. This +provides the kernelland interface for IPMI, but does not provide an +interface for use by application processes. + +ipmi_devintf - This provides a userland IOCTL interface for the IPMI +driver, each open file for this device ties in to the message handler +as an IPMI user. + +ipmi_si - A driver for various system interfaces. This supports KCS, +SMIC, and BT interfaces. Unless you have an SMBus interface or your +own custom interface, you probably need to use this. + +ipmi_ssif - A driver for accessing BMCs on the SMBus. It uses the +I2C kernel driver's SMBus interfaces to send and receive IPMI messages +over the SMBus. + +ipmi_powernv - A driver for access BMCs on POWERNV systems. + +ipmi_watchdog - IPMI requires systems to have a very capable watchdog +timer. This driver implements the standard Linux watchdog timer +interface on top of the IPMI message handler. + +ipmi_poweroff - Some systems support the ability to be turned off via +IPMI commands. + +bt-bmc - This is not part of the main driver, but instead a driver for +accessing a BMC-side interface of a BT interface. It is used on BMCs +running Linux to provide an interface to the host. + +These are all individually selectable via configuration options. + +Much documentation for the interface is in the include files. The +IPMI include files are: + +linux/ipmi.h - Contains the user interface and IOCTL interface for IPMI. + +linux/ipmi_smi.h - Contains the interface for system management interfaces +(things that interface to IPMI controllers) to use. + +linux/ipmi_msgdefs.h - General definitions for base IPMI messaging. + + +Addressing +---------- + +The IPMI addressing works much like IP addresses, you have an overlay +to handle the different address types. The overlay is:: + + struct ipmi_addr + { + int addr_type; + short channel; + char data[IPMI_MAX_ADDR_SIZE]; + }; + +The addr_type determines what the address really is. The driver +currently understands two different types of addresses. + +"System Interface" addresses are defined as:: + + struct ipmi_system_interface_addr + { + int addr_type; + short channel; + }; + +and the type is IPMI_SYSTEM_INTERFACE_ADDR_TYPE. This is used for talking +straight to the BMC on the current card. The channel must be +IPMI_BMC_CHANNEL. + +Messages that are destined to go out on the IPMB bus going through the +BMC use the IPMI_IPMB_ADDR_TYPE address type. The format is:: + + struct ipmi_ipmb_addr + { + int addr_type; + short channel; + unsigned char slave_addr; + unsigned char lun; + }; + +The "channel" here is generally zero, but some devices support more +than one channel, it corresponds to the channel as defined in the IPMI +spec. + +There is also an IPMB direct address for a situation where the sender +is directly on an IPMB bus and doesn't have to go through the BMC. +You can send messages to a specific management controller (MC) on the +IPMB using the IPMI_IPMB_DIRECT_ADDR_TYPE with the following format:: + + struct ipmi_ipmb_direct_addr + { + int addr_type; + short channel; + unsigned char slave_addr; + unsigned char rq_lun; + unsigned char rs_lun; + }; + +The channel is always zero. You can also receive commands from other +MCs that you have registered to handle and respond to them, so you can +use this to implement a management controller on a bus.. + +Messages +-------- + +Messages are defined as:: + + struct ipmi_msg + { + unsigned char netfn; + unsigned char lun; + unsigned char cmd; + unsigned char *data; + int data_len; + }; + +The driver takes care of adding/stripping the header information. The +data portion is just the data to be send (do NOT put addressing info +here) or the response. Note that the completion code of a response is +the first item in "data", it is not stripped out because that is how +all the messages are defined in the spec (and thus makes counting the +offsets a little easier :-). + +When using the IOCTL interface from userland, you must provide a block +of data for "data", fill it, and set data_len to the length of the +block of data, even when receiving messages. Otherwise the driver +will have no place to put the message. + +Messages coming up from the message handler in kernelland will come in +as:: + + struct ipmi_recv_msg + { + struct list_head link; + + /* The type of message as defined in the "Receive Types" + defines above. */ + int recv_type; + + ipmi_user_t *user; + struct ipmi_addr addr; + long msgid; + struct ipmi_msg msg; + + /* Call this when done with the message. It will presumably free + the message and do any other necessary cleanup. */ + void (*done)(struct ipmi_recv_msg *msg); + + /* Place-holder for the data, don't make any assumptions about + the size or existence of this, since it may change. */ + unsigned char msg_data[IPMI_MAX_MSG_LENGTH]; + }; + +You should look at the receive type and handle the message +appropriately. + + +The Upper Layer Interface (Message Handler) +------------------------------------------- + +The upper layer of the interface provides the users with a consistent +view of the IPMI interfaces. It allows multiple SMI interfaces to be +addressed (because some boards actually have multiple BMCs on them) +and the user should not have to care what type of SMI is below them. + + +Watching For Interfaces +^^^^^^^^^^^^^^^^^^^^^^^ + +When your code comes up, the IPMI driver may or may not have detected +if IPMI devices exist. So you might have to defer your setup until +the device is detected, or you might be able to do it immediately. +To handle this, and to allow for discovery, you register an SMI +watcher with ipmi_smi_watcher_register() to iterate over interfaces +and tell you when they come and go. + + +Creating the User +^^^^^^^^^^^^^^^^^ + +To use the message handler, you must first create a user using +ipmi_create_user. The interface number specifies which SMI you want +to connect to, and you must supply callback functions to be called +when data comes in. The callback function can run at interrupt level, +so be careful using the callbacks. This also allows to you pass in a +piece of data, the handler_data, that will be passed back to you on +all calls. + +Once you are done, call ipmi_destroy_user() to get rid of the user. + +From userland, opening the device automatically creates a user, and +closing the device automatically destroys the user. + + +Messaging +^^^^^^^^^ + +To send a message from kernel-land, the ipmi_request_settime() call does +pretty much all message handling. Most of the parameter are +self-explanatory. However, it takes a "msgid" parameter. This is NOT +the sequence number of messages. It is simply a long value that is +passed back when the response for the message is returned. You may +use it for anything you like. + +Responses come back in the function pointed to by the ipmi_recv_hndl +field of the "handler" that you passed in to ipmi_create_user(). +Remember again, these may be running at interrupt level. Remember to +look at the receive type, too. + +From userland, you fill out an ipmi_req_t structure and use the +IPMICTL_SEND_COMMAND ioctl. For incoming stuff, you can use select() +or poll() to wait for messages to come in. However, you cannot use +read() to get them, you must call the IPMICTL_RECEIVE_MSG with the +ipmi_recv_t structure to actually get the message. Remember that you +must supply a pointer to a block of data in the msg.data field, and +you must fill in the msg.data_len field with the size of the data. +This gives the receiver a place to actually put the message. + +If the message cannot fit into the data you provide, you will get an +EMSGSIZE error and the driver will leave the data in the receive +queue. If you want to get it and have it truncate the message, us +the IPMICTL_RECEIVE_MSG_TRUNC ioctl. + +When you send a command (which is defined by the lowest-order bit of +the netfn per the IPMI spec) on the IPMB bus, the driver will +automatically assign the sequence number to the command and save the +command. If the response is not receive in the IPMI-specified 5 +seconds, it will generate a response automatically saying the command +timed out. If an unsolicited response comes in (if it was after 5 +seconds, for instance), that response will be ignored. + +In kernelland, after you receive a message and are done with it, you +MUST call ipmi_free_recv_msg() on it, or you will leak messages. Note +that you should NEVER mess with the "done" field of a message, that is +required to properly clean up the message. + +Note that when sending, there is an ipmi_request_supply_msgs() call +that lets you supply the smi and receive message. This is useful for +pieces of code that need to work even if the system is out of buffers +(the watchdog timer uses this, for instance). You supply your own +buffer and own free routines. This is not recommended for normal use, +though, since it is tricky to manage your own buffers. + + +Events and Incoming Commands +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The driver takes care of polling for IPMI events and receiving +commands (commands are messages that are not responses, they are +commands that other things on the IPMB bus have sent you). To receive +these, you must register for them, they will not automatically be sent +to you. + +To receive events, you must call ipmi_set_gets_events() and set the +"val" to non-zero. Any events that have been received by the driver +since startup will immediately be delivered to the first user that +registers for events. After that, if multiple users are registered +for events, they will all receive all events that come in. + +For receiving commands, you have to individually register commands you +want to receive. Call ipmi_register_for_cmd() and supply the netfn +and command name for each command you want to receive. You also +specify a bitmask of the channels you want to receive the command from +(or use IPMI_CHAN_ALL for all channels if you don't care). Only one +user may be registered for each netfn/cmd/channel, but different users +may register for different commands, or the same command if the +channel bitmasks do not overlap. + +To respond to a received command, set the response bit in the returned +netfn, use the address from the received message, and use the same +msgid that you got in the receive message. + +From userland, equivalent IOCTLs are provided to do these functions. + + +The Lower Layer (SMI) Interface +------------------------------- + +As mentioned before, multiple SMI interfaces may be registered to the +message handler, each of these is assigned an interface number when +they register with the message handler. They are generally assigned +in the order they register, although if an SMI unregisters and then +another one registers, all bets are off. + +The ipmi_smi.h defines the interface for management interfaces, see +that for more details. + + +The SI Driver +------------- + +The SI driver allows KCS, BT, and SMIC interfaces to be configured +in the system. It discovers interfaces through a host of different +methods, depending on the system. + +You can specify up to four interfaces on the module load line and +control some module parameters:: + + modprobe ipmi_si.o type=<type1>,<type2>.... + ports=<port1>,<port2>... addrs=<addr1>,<addr2>... + irqs=<irq1>,<irq2>... + regspacings=<sp1>,<sp2>,... regsizes=<size1>,<size2>,... + regshifts=<shift1>,<shift2>,... + slave_addrs=<addr1>,<addr2>,... + force_kipmid=<enable1>,<enable2>,... + kipmid_max_busy_us=<ustime1>,<ustime2>,... + unload_when_empty=[0|1] + trydmi=[0|1] tryacpi=[0|1] + tryplatform=[0|1] trypci=[0|1] + +Each of these except try... items is a list, the first item for the +first interface, second item for the second interface, etc. + +The si_type may be either "kcs", "smic", or "bt". If you leave it blank, it +defaults to "kcs". + +If you specify addrs as non-zero for an interface, the driver will +use the memory address given as the address of the device. This +overrides si_ports. + +If you specify ports as non-zero for an interface, the driver will +use the I/O port given as the device address. + +If you specify irqs as non-zero for an interface, the driver will +attempt to use the given interrupt for the device. + +The other try... items disable discovery by their corresponding +names. These are all enabled by default, set them to zero to disable +them. The tryplatform disables openfirmware. + +The next three parameters have to do with register layout. The +registers used by the interfaces may not appear at successive +locations and they may not be in 8-bit registers. These parameters +allow the layout of the data in the registers to be more precisely +specified. + +The regspacings parameter give the number of bytes between successive +register start addresses. For instance, if the regspacing is set to 4 +and the start address is 0xca2, then the address for the second +register would be 0xca6. This defaults to 1. + +The regsizes parameter gives the size of a register, in bytes. The +data used by IPMI is 8-bits wide, but it may be inside a larger +register. This parameter allows the read and write type to specified. +It may be 1, 2, 4, or 8. The default is 1. + +Since the register size may be larger than 32 bits, the IPMI data may not +be in the lower 8 bits. The regshifts parameter give the amount to shift +the data to get to the actual IPMI data. + +The slave_addrs specifies the IPMI address of the local BMC. This is +usually 0x20 and the driver defaults to that, but in case it's not, it +can be specified when the driver starts up. + +The force_ipmid parameter forcefully enables (if set to 1) or disables +(if set to 0) the kernel IPMI daemon. Normally this is auto-detected +by the driver, but systems with broken interrupts might need an enable, +or users that don't want the daemon (don't need the performance, don't +want the CPU hit) can disable it. + +If unload_when_empty is set to 1, the driver will be unloaded if it +doesn't find any interfaces or all the interfaces fail to work. The +default is one. Setting to 0 is useful with the hotmod, but is +obviously only useful for modules. + +When compiled into the kernel, the parameters can be specified on the +kernel command line as:: + + ipmi_si.type=<type1>,<type2>... + ipmi_si.ports=<port1>,<port2>... ipmi_si.addrs=<addr1>,<addr2>... + ipmi_si.irqs=<irq1>,<irq2>... + ipmi_si.regspacings=<sp1>,<sp2>,... + ipmi_si.regsizes=<size1>,<size2>,... + ipmi_si.regshifts=<shift1>,<shift2>,... + ipmi_si.slave_addrs=<addr1>,<addr2>,... + ipmi_si.force_kipmid=<enable1>,<enable2>,... + ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,... + +It works the same as the module parameters of the same names. + +If your IPMI interface does not support interrupts and is a KCS or +SMIC interface, the IPMI driver will start a kernel thread for the +interface to help speed things up. This is a low-priority kernel +thread that constantly polls the IPMI driver while an IPMI operation +is in progress. The force_kipmid module parameter will all the user to +force this thread on or off. If you force it off and don't have +interrupts, the driver will run VERY slowly. Don't blame me, +these interfaces suck. + +Unfortunately, this thread can use a lot of CPU depending on the +interface's performance. This can waste a lot of CPU and cause +various issues with detecting idle CPU and using extra power. To +avoid this, the kipmid_max_busy_us sets the maximum amount of time, in +microseconds, that kipmid will spin before sleeping for a tick. This +value sets a balance between performance and CPU waste and needs to be +tuned to your needs. Maybe, someday, auto-tuning will be added, but +that's not a simple thing and even the auto-tuning would need to be +tuned to the user's desired performance. + +The driver supports a hot add and remove of interfaces. This way, +interfaces can be added or removed after the kernel is up and running. +This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a +write-only parameter. You write a string to this interface. The string +has the format:: + + <op1>[:op2[:op3...]] + +The "op"s are:: + + add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]] + +You can specify more than one interface on the line. The "opt"s are:: + + rsp=<regspacing> + rsi=<regsize> + rsh=<regshift> + irq=<irq> + ipmb=<ipmb slave addr> + +and these have the same meanings as discussed above. Note that you +can also use this on the kernel command line for a more compact format +for specifying an interface. Note that when removing an interface, +only the first three parameters (si type, address type, and address) +are used for the comparison. Any options are ignored for removing. + +The SMBus Driver (SSIF) +----------------------- + +The SMBus driver allows up to 4 SMBus devices to be configured in the +system. By default, the driver will only register with something it +finds in DMI or ACPI tables. You can change this +at module load time (for a module) with:: + + modprobe ipmi_ssif.o + addr=<i2caddr1>[,<i2caddr2>[,...]] + adapter=<adapter1>[,<adapter2>[...]] + dbg=<flags1>,<flags2>... + slave_addrs=<addr1>,<addr2>,... + tryacpi=[0|1] trydmi=[0|1] + [dbg_probe=1] + alerts_broken + +The addresses are normal I2C addresses. The adapter is the string +name of the adapter, as shown in /sys/class/i2c-adapter/i2c-<n>/name. +It is *NOT* i2c-<n> itself. Also, the comparison is done ignoring +spaces, so if the name is "This is an I2C chip" you can say +adapter_name=ThisisanI2cchip. This is because it's hard to pass in +spaces in kernel parameters. + +The debug flags are bit flags for each BMC found, they are: +IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8 + +The tryxxx parameters can be used to disable detecting interfaces +from various sources. + +Setting dbg_probe to 1 will enable debugging of the probing and +detection process for BMCs on the SMBusses. + +The slave_addrs specifies the IPMI address of the local BMC. This is +usually 0x20 and the driver defaults to that, but in case it's not, it +can be specified when the driver starts up. + +alerts_broken does not enable SMBus alert for SSIF. Otherwise SMBus +alert will be enabled on supported hardware. + +Discovering the IPMI compliant BMC on the SMBus can cause devices on +the I2C bus to fail. The SMBus driver writes a "Get Device ID" IPMI +message as a block write to the I2C bus and waits for a response. +This action can be detrimental to some I2C devices. It is highly +recommended that the known I2C address be given to the SMBus driver in +the smb_addr parameter unless you have DMI or ACPI data to tell the +driver what to use. + +When compiled into the kernel, the addresses can be specified on the +kernel command line as:: + + ipmb_ssif.addr=<i2caddr1>[,<i2caddr2>[...]] + ipmi_ssif.adapter=<adapter1>[,<adapter2>[...]] + ipmi_ssif.dbg=<flags1>[,<flags2>[...]] + ipmi_ssif.dbg_probe=1 + ipmi_ssif.slave_addrs=<addr1>[,<addr2>[...]] + ipmi_ssif.tryacpi=[0|1] ipmi_ssif.trydmi=[0|1] + +These are the same options as on the module command line. + +The I2C driver does not support non-blocking access or polling, so +this driver cannod to IPMI panic events, extend the watchdog at panic +time, or other panic-related IPMI functions without special kernel +patches and driver modifications. You can get those at the openipmi +web page. + +The driver supports a hot add and remove of interfaces through the I2C +sysfs interface. + +The IPMI IPMB Driver +-------------------- + +This driver is for supporting a system that sits on an IPMB bus; it +allows the interface to look like a normal IPMI interface. Sending +system interface addressed messages to it will cause the message to go +to the registered BMC on the system (default at IPMI address 0x20). + +It also allows you to directly address other MCs on the bus using the +ipmb direct addressing. You can receive commands from other MCs on +the bus and they will be handled through the normal received command +mechanism described above. + +Parameters are:: + + ipmi_ipmb.bmcaddr=<address to use for system interface addresses messages> + ipmi_ipmb.retry_time_ms=<Time between retries on IPMB> + ipmi_ipmb.max_retries=<Number of times to retry a message> + +Loading the module will not result in the driver automatcially +starting unless there is device tree information setting it up. If +you want to instantiate one of these by hand, do:: + + echo ipmi-ipmb <addr> > /sys/class/i2c-dev/i2c-<n>/device/new_device + +Note that the address you give here is the I2C address, not the IPMI +address. So if you want your MC address to be 0x60, you put 0x30 +here. See the I2C driver info for more details. + +Command bridging to other IPMB busses through this interface does not +work. The receive message queue is not implemented, by design. There +is only one receive message queue on a BMC, and that is meant for the +host drivers, not something on the IPMB bus. + +A BMC may have multiple IPMB busses, which bus your device sits on +depends on how the system is wired. You can fetch the channels with +"ipmitool channel info <n>" where <n> is the channel, with the +channels being 0-7 and try the IPMB channels. + +Other Pieces +------------ + +Get the detailed info related with the IPMI device +-------------------------------------------------- + +Some users need more detailed information about a device, like where +the address came from or the raw base device for the IPMI interface. +You can use the IPMI smi_watcher to catch the IPMI interfaces as they +come or go, and to grab the information, you can use the function +ipmi_get_smi_info(), which returns the following structure:: + + struct ipmi_smi_info { + enum ipmi_addr_src addr_src; + struct device *dev; + union { + struct { + void *acpi_handle; + } acpi_info; + } addr_info; + }; + +Currently special info for only for SI_ACPI address sources is +returned. Others may be added as necessary. + +Note that the dev pointer is included in the above structure, and +assuming ipmi_smi_get_info returns success, you must call put_device +on the dev pointer. + + +Watchdog +-------- + +A watchdog timer is provided that implements the Linux-standard +watchdog timer interface. It has three module parameters that can be +used to control it:: + + modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type> + preaction=<preaction type> preop=<preop type> start_now=x + nowayout=x ifnum_to_use=n panic_wdt_timeout=<t> + +ifnum_to_use specifies which interface the watchdog timer should use. +The default is -1, which means to pick the first one registered. + +The timeout is the number of seconds to the action, and the pretimeout +is the amount of seconds before the reset that the pre-timeout panic will +occur (if pretimeout is zero, then pretimeout will not be enabled). Note +that the pretimeout is the time before the final timeout. So if the +timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout +will occur in 40 second (10 seconds before the timeout). The panic_wdt_timeout +is the value of timeout which is set on kernel panic, in order to let actions +such as kdump to occur during panic. + +The action may be "reset", "power_cycle", or "power_off", and +specifies what to do when the timer times out, and defaults to +"reset". + +The preaction may be "pre_smi" for an indication through the SMI +interface, "pre_int" for an indication through the SMI with an +interrupts, and "pre_nmi" for a NMI on a preaction. This is how +the driver is informed of the pretimeout. + +The preop may be set to "preop_none" for no operation on a pretimeout, +"preop_panic" to set the preoperation to panic, or "preop_give_data" +to provide data to read from the watchdog device when the pretimeout +occurs. A "pre_nmi" setting CANNOT be used with "preop_give_data" +because you can't do data operations from an NMI. + +When preop is set to "preop_give_data", one byte comes ready to read +on the device when the pretimeout occurs. Select and fasync work on +the device, as well. + +If start_now is set to 1, the watchdog timer will start running as +soon as the driver is loaded. + +If nowayout is set to 1, the watchdog timer will not stop when the +watchdog device is closed. The default value of nowayout is true +if the CONFIG_WATCHDOG_NOWAYOUT option is enabled, or false if not. + +When compiled into the kernel, the kernel command line is available +for configuring the watchdog:: + + ipmi_watchdog.timeout=<t> ipmi_watchdog.pretimeout=<t> + ipmi_watchdog.action=<action type> + ipmi_watchdog.preaction=<preaction type> + ipmi_watchdog.preop=<preop type> + ipmi_watchdog.start_now=x + ipmi_watchdog.nowayout=x + ipmi_watchdog.panic_wdt_timeout=<t> + +The options are the same as the module parameter options. + +The watchdog will panic and start a 120 second reset timeout if it +gets a pre-action. During a panic or a reboot, the watchdog will +start a 120 timer if it is running to make sure the reboot occurs. + +Note that if you use the NMI preaction for the watchdog, you MUST NOT +use the nmi watchdog. There is no reasonable way to tell if an NMI +comes from the IPMI controller, so it must assume that if it gets an +otherwise unhandled NMI, it must be from IPMI and it will panic +immediately. + +Once you open the watchdog timer, you must write a 'V' character to the +device to close it, or the timer will not stop. This is a new semantic +for the driver, but makes it consistent with the rest of the watchdog +drivers in Linux. + + +Panic Timeouts +-------------- + +The OpenIPMI driver supports the ability to put semi-custom and custom +events in the system event log if a panic occurs. if you enable the +'Generate a panic event to all BMCs on a panic' option, you will get +one event on a panic in a standard IPMI event format. If you enable +the 'Generate OEM events containing the panic string' option, you will +also get a bunch of OEM events holding the panic string. + + +The field settings of the events are: + +* Generator ID: 0x21 (kernel) +* EvM Rev: 0x03 (this event is formatting in IPMI 1.0 format) +* Sensor Type: 0x20 (OS critical stop sensor) +* Sensor #: The first byte of the panic string (0 if no panic string) +* Event Dir | Event Type: 0x6f (Assertion, sensor-specific event info) +* Event Data 1: 0xa1 (Runtime stop in OEM bytes 2 and 3) +* Event data 2: second byte of panic string +* Event data 3: third byte of panic string + +See the IPMI spec for the details of the event layout. This event is +always sent to the local management controller. It will handle routing +the message to the right place + +Other OEM events have the following format: + +* Record ID (bytes 0-1): Set by the SEL. +* Record type (byte 2): 0xf0 (OEM non-timestamped) +* byte 3: The slave address of the card saving the panic +* byte 4: A sequence number (starting at zero) + The rest of the bytes (11 bytes) are the panic string. If the panic string + is longer than 11 bytes, multiple messages will be sent with increasing + sequence numbers. + +Because you cannot send OEM events using the standard interface, this +function will attempt to find an SEL and add the events there. It +will first query the capabilities of the local management controller. +If it has an SEL, then they will be stored in the SEL of the local +management controller. If not, and the local management controller is +an event generator, the event receiver from the local management +controller will be queried and the events sent to the SEL on that +device. Otherwise, the events go nowhere since there is nowhere to +send them. + + +Poweroff +-------- + +If the poweroff capability is selected, the IPMI driver will install +a shutdown function into the standard poweroff function pointer. This +is in the ipmi_poweroff module. When the system requests a powerdown, +it will send the proper IPMI commands to do this. This is supported on +several platforms. + +There is a module parameter named "poweroff_powercycle" that may +either be zero (do a power down) or non-zero (do a power cycle, power +the system off, then power it on in a few seconds). Setting +ipmi_poweroff.poweroff_control=x will do the same thing on the kernel +command line. The parameter is also available via the proc filesystem +in /proc/sys/dev/ipmi/poweroff_powercycle. Note that if the system +does not support power cycling, it will always do the power off. + +The "ifnum_to_use" parameter specifies which interface the poweroff +code should use. The default is -1, which means to pick the first one +registered. + +Note that if you have ACPI enabled, the system will prefer using ACPI to +power off. diff --git a/Documentation/driver-api/isa.rst b/Documentation/driver-api/isa.rst new file mode 100644 index 0000000000..3df1b16965 --- /dev/null +++ b/Documentation/driver-api/isa.rst @@ -0,0 +1,122 @@ +=========== +ISA Drivers +=========== + +The following text is adapted from the commit message of the initial +commit of the ISA bus driver authored by Rene Herman. + +During the recent "isa drivers using platform devices" discussion it was +pointed out that (ALSA) ISA drivers ran into the problem of not having +the option to fail driver load (device registration rather) upon not +finding their hardware due to a probe() error not being passed up +through the driver model. In the course of that, I suggested a separate +ISA bus might be best; Russell King agreed and suggested this bus could +use the .match() method for the actual device discovery. + +The attached does this. For this old non (generically) discoverable ISA +hardware only the driver itself can do discovery so as a difference with +the platform_bus, this isa_bus also distributes match() up to the +driver. + +As another difference: these devices only exist in the driver model due +to the driver creating them because it might want to drive them, meaning +that all device creation has been made internal as well. + +The usage model this provides is nice, and has been acked from the ALSA +side by Takashi Iwai and Jaroslav Kysela. The ALSA driver module_init's +now (for oldisa-only drivers) become:: + + static int __init alsa_card_foo_init(void) + { + return isa_register_driver(&snd_foo_isa_driver, SNDRV_CARDS); + } + + static void __exit alsa_card_foo_exit(void) + { + isa_unregister_driver(&snd_foo_isa_driver); + } + +Quite like the other bus models therefore. This removes a lot of +duplicated init code from the ALSA ISA drivers. + +The passed in isa_driver struct is the regular driver struct embedding a +struct device_driver, the normal probe/remove/shutdown/suspend/resume +callbacks, and as indicated that .match callback. + +The "SNDRV_CARDS" you see being passed in is a "unsigned int ndev" +parameter, indicating how many devices to create and call our methods +with. + +The platform_driver callbacks are called with a platform_device param; +the isa_driver callbacks are being called with a ``struct device *dev, +unsigned int id`` pair directly -- with the device creation completely +internal to the bus it's much cleaner to not leak isa_dev's by passing +them in at all. The id is the only thing we ever want other then the +struct device anyways, and it makes for nicer code in the callbacks as +well. + +With this additional .match() callback ISA drivers have all options. If +ALSA would want to keep the old non-load behaviour, it could stick all +of the old .probe in .match, which would only keep them registered after +everything was found to be present and accounted for. If it wanted the +behaviour of always loading as it inadvertently did for a bit after the +changeover to platform devices, it could just not provide a .match() and +do everything in .probe() as before. + +If it, as Takashi Iwai already suggested earlier as a way of following +the model from saner buses more closely, wants to load when a later bind +could conceivably succeed, it could use .match() for the prerequisites +(such as checking the user wants the card enabled and that port/irq/dma +values have been passed in) and .probe() for everything else. This is +the nicest model. + +To the code... + +This exports only two functions; isa_{,un}register_driver(). + +isa_register_driver() register's the struct device_driver, and then +loops over the passed in ndev creating devices and registering them. +This causes the bus match method to be called for them, which is:: + + int isa_bus_match(struct device *dev, struct device_driver *driver) + { + struct isa_driver *isa_driver = to_isa_driver(driver); + + if (dev->platform_data == isa_driver) { + if (!isa_driver->match || + isa_driver->match(dev, to_isa_dev(dev)->id)) + return 1; + dev->platform_data = NULL; + } + return 0; + } + +The first thing this does is check if this device is in fact one of this +driver's devices by seeing if the device's platform_data pointer is set +to this driver. Platform devices compare strings, but we don't need to +do that with everything being internal, so isa_register_driver() abuses +dev->platform_data as a isa_driver pointer which we can then check here. +I believe platform_data is available for this, but if rather not, moving +the isa_driver pointer to the private struct isa_dev is ofcourse fine as +well. + +Then, if the driver did not provide a .match, it matches. If it did, +the driver match() method is called to determine a match. + +If it did **not** match, dev->platform_data is reset to indicate this to +isa_register_driver which can then unregister the device again. + +If during all this, there's any error, or no devices matched at all +everything is backed out again and the error, or -ENODEV, is returned. + +isa_unregister_driver() just unregisters the matched devices and the +driver itself. + +module_isa_driver is a helper macro for ISA drivers which do not do +anything special in module init/exit. This eliminates a lot of +boilerplate code. Each module may only use this macro once, and calling +it replaces module_init and module_exit. + +max_num_isa_dev is a macro to determine the maximum possible number of +ISA devices which may be registered in the I/O port address space given +the address extent of the ISA devices. diff --git a/Documentation/driver-api/isapnp.rst b/Documentation/driver-api/isapnp.rst new file mode 100644 index 0000000000..8d0840ac84 --- /dev/null +++ b/Documentation/driver-api/isapnp.rst @@ -0,0 +1,15 @@ +========================================================== +ISA Plug & Play support by Jaroslav Kysela <perex@suse.cz> +========================================================== + +Interface /proc/isapnp +====================== + +The interface has been removed. See pnp.txt for more details. + +Interface /proc/bus/isapnp +========================== + +This directory allows access to ISA PnP cards and logical devices. +The regular files contain the contents of ISA PnP registers for +a logical device. diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst new file mode 100644 index 0000000000..5da27a7492 --- /dev/null +++ b/Documentation/driver-api/libata.rst @@ -0,0 +1,993 @@ +======================== +libATA Developer's Guide +======================== + +:Author: Jeff Garzik + +Introduction +============ + +libATA is a library used inside the Linux kernel to support ATA host +controllers and devices. libATA provides an ATA driver API, class +transports for ATA and ATAPI devices, and SCSI<->ATA translation for ATA +devices according to the T10 SAT specification. + +This Guide documents the libATA driver API, library functions, library +internals, and a couple sample ATA low-level drivers. + +libata Driver API +================= + +:c:type:`struct ata_port_operations <ata_port_operations>` +is defined for every low-level libata +hardware driver, and it controls how the low-level driver interfaces +with the ATA and SCSI layers. + +FIS-based drivers will hook into the system with ``->qc_prep()`` and +``->qc_issue()`` high-level hooks. Hardware which behaves in a manner +similar to PCI IDE hardware may utilize several generic helpers, +defining at a bare minimum the bus I/O addresses of the ATA shadow +register blocks. + +:c:type:`struct ata_port_operations <ata_port_operations>` +---------------------------------------------------------- + +Post-IDENTIFY device configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + void (*dev_config) (struct ata_port *, struct ata_device *); + + +Called after IDENTIFY [PACKET] DEVICE is issued to each device found. +Typically used to apply device-specific fixups prior to issue of SET +FEATURES - XFER MODE, and prior to operation. + +This entry may be specified as NULL in ata_port_operations. + +Set PIO/DMA mode +~~~~~~~~~~~~~~~~ + +:: + + void (*set_piomode) (struct ata_port *, struct ata_device *); + void (*set_dmamode) (struct ata_port *, struct ata_device *); + void (*post_set_mode) (struct ata_port *); + unsigned int (*mode_filter) (struct ata_port *, struct ata_device *, unsigned int); + + +Hooks called prior to the issue of SET FEATURES - XFER MODE command. The +optional ``->mode_filter()`` hook is called when libata has built a mask of +the possible modes. This is passed to the ``->mode_filter()`` function +which should return a mask of valid modes after filtering those +unsuitable due to hardware limits. It is not valid to use this interface +to add modes. + +``dev->pio_mode`` and ``dev->dma_mode`` are guaranteed to be valid when +``->set_piomode()`` and when ``->set_dmamode()`` is called. The timings for +any other drive sharing the cable will also be valid at this point. That +is the library records the decisions for the modes of each drive on a +channel before it attempts to set any of them. + +``->post_set_mode()`` is called unconditionally, after the SET FEATURES - +XFER MODE command completes successfully. + +``->set_piomode()`` is always called (if present), but ``->set_dma_mode()`` +is only called if DMA is possible. + +Taskfile read/write +~~~~~~~~~~~~~~~~~~~ + +:: + + void (*sff_tf_load) (struct ata_port *ap, struct ata_taskfile *tf); + void (*sff_tf_read) (struct ata_port *ap, struct ata_taskfile *tf); + + +``->tf_load()`` is called to load the given taskfile into hardware +registers / DMA buffers. ``->tf_read()`` is called to read the hardware +registers / DMA buffers, to obtain the current set of taskfile register +values. Most drivers for taskfile-based hardware (PIO or MMIO) use +:c:func:`ata_sff_tf_load` and :c:func:`ata_sff_tf_read` for these hooks. + +PIO data read/write +~~~~~~~~~~~~~~~~~~~ + +:: + + void (*sff_data_xfer) (struct ata_device *, unsigned char *, unsigned int, int); + + +All bmdma-style drivers must implement this hook. This is the low-level +operation that actually copies the data bytes during a PIO data +transfer. Typically the driver will choose one of +:c:func:`ata_sff_data_xfer`, or :c:func:`ata_sff_data_xfer32`. + +ATA command execute +~~~~~~~~~~~~~~~~~~~ + +:: + + void (*sff_exec_command)(struct ata_port *ap, struct ata_taskfile *tf); + + +causes an ATA command, previously loaded with ``->tf_load()``, to be +initiated in hardware. Most drivers for taskfile-based hardware use +:c:func:`ata_sff_exec_command` for this hook. + +Per-cmd ATAPI DMA capabilities filter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + int (*check_atapi_dma) (struct ata_queued_cmd *qc); + + +Allow low-level driver to filter ATA PACKET commands, returning a status +indicating whether or not it is OK to use DMA for the supplied PACKET +command. + +This hook may be specified as NULL, in which case libata will assume +that atapi dma can be supported. + +Read specific ATA shadow registers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + u8 (*sff_check_status)(struct ata_port *ap); + u8 (*sff_check_altstatus)(struct ata_port *ap); + + +Reads the Status/AltStatus ATA shadow register from hardware. On some +hardware, reading the Status register has the side effect of clearing +the interrupt condition. Most drivers for taskfile-based hardware use +:c:func:`ata_sff_check_status` for this hook. + +Write specific ATA shadow register +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + void (*sff_set_devctl)(struct ata_port *ap, u8 ctl); + + +Write the device control ATA shadow register to the hardware. Most +drivers don't need to define this. + +Select ATA device on bus +~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + void (*sff_dev_select)(struct ata_port *ap, unsigned int device); + + +Issues the low-level hardware command(s) that causes one of N hardware +devices to be considered 'selected' (active and available for use) on +the ATA bus. This generally has no meaning on FIS-based devices. + +Most drivers for taskfile-based hardware use :c:func:`ata_sff_dev_select` for +this hook. + +Private tuning method +~~~~~~~~~~~~~~~~~~~~~ + +:: + + void (*set_mode) (struct ata_port *ap); + + +By default libata performs drive and controller tuning in accordance +with the ATA timing rules and also applies blacklists and cable limits. +Some controllers need special handling and have custom tuning rules, +typically raid controllers that use ATA commands but do not actually do +drive timing. + + **Warning** + + This hook should not be used to replace the standard controller + tuning logic when a controller has quirks. Replacing the default + tuning logic in that case would bypass handling for drive and bridge + quirks that may be important to data reliability. If a controller + needs to filter the mode selection it should use the mode_filter + hook instead. + +Control PCI IDE BMDMA engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + void (*bmdma_setup) (struct ata_queued_cmd *qc); + void (*bmdma_start) (struct ata_queued_cmd *qc); + void (*bmdma_stop) (struct ata_port *ap); + u8 (*bmdma_status) (struct ata_port *ap); + + +When setting up an IDE BMDMA transaction, these hooks arm +(``->bmdma_setup``), fire (``->bmdma_start``), and halt (``->bmdma_stop``) the +hardware's DMA engine. ``->bmdma_status`` is used to read the standard PCI +IDE DMA Status register. + +These hooks are typically either no-ops, or simply not implemented, in +FIS-based drivers. + +Most legacy IDE drivers use :c:func:`ata_bmdma_setup` for the +:c:func:`bmdma_setup` hook. :c:func:`ata_bmdma_setup` will write the pointer +to the PRD table to the IDE PRD Table Address register, enable DMA in the DMA +Command register, and call :c:func:`exec_command` to begin the transfer. + +Most legacy IDE drivers use :c:func:`ata_bmdma_start` for the +:c:func:`bmdma_start` hook. :c:func:`ata_bmdma_start` will write the +ATA_DMA_START flag to the DMA Command register. + +Many legacy IDE drivers use :c:func:`ata_bmdma_stop` for the +:c:func:`bmdma_stop` hook. :c:func:`ata_bmdma_stop` clears the ATA_DMA_START +flag in the DMA command register. + +Many legacy IDE drivers use :c:func:`ata_bmdma_status` as the +:c:func:`bmdma_status` hook. + +High-level taskfile hooks +~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + enum ata_completion_errors (*qc_prep) (struct ata_queued_cmd *qc); + int (*qc_issue) (struct ata_queued_cmd *qc); + + +Higher-level hooks, these two hooks can potentially supersede several of +the above taskfile/DMA engine hooks. ``->qc_prep`` is called after the +buffers have been DMA-mapped, and is typically used to populate the +hardware's DMA scatter-gather table. Some drivers use the standard +:c:func:`ata_bmdma_qc_prep` and :c:func:`ata_bmdma_dumb_qc_prep` helper +functions, but more advanced drivers roll their own. + +``->qc_issue`` is used to make a command active, once the hardware and S/G +tables have been prepared. IDE BMDMA drivers use the helper function +:c:func:`ata_sff_qc_issue` for taskfile protocol-based dispatch. More +advanced drivers implement their own ``->qc_issue``. + +:c:func:`ata_sff_qc_issue` calls ``->sff_tf_load()``, ``->bmdma_setup()``, and +``->bmdma_start()`` as necessary to initiate a transfer. + +Exception and probe handling (EH) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + void (*freeze) (struct ata_port *ap); + void (*thaw) (struct ata_port *ap); + + +:c:func:`ata_port_freeze` is called when HSM violations or some other +condition disrupts normal operation of the port. A frozen port is not +allowed to perform any operation until the port is thawed, which usually +follows a successful reset. + +The optional ``->freeze()`` callback can be used for freezing the port +hardware-wise (e.g. mask interrupt and stop DMA engine). If a port +cannot be frozen hardware-wise, the interrupt handler must ack and clear +interrupts unconditionally while the port is frozen. + +The optional ``->thaw()`` callback is called to perform the opposite of +``->freeze()``: prepare the port for normal operation once again. Unmask +interrupts, start DMA engine, etc. + +:: + + void (*error_handler) (struct ata_port *ap); + + +``->error_handler()`` is a driver's hook into probe, hotplug, and recovery +and other exceptional conditions. The primary responsibility of an +implementation is to call :c:func:`ata_do_eh` or :c:func:`ata_bmdma_drive_eh` +with a set of EH hooks as arguments: + +'prereset' hook (may be NULL) is called during an EH reset, before any +other actions are taken. + +'postreset' hook (may be NULL) is called after the EH reset is +performed. Based on existing conditions, severity of the problem, and +hardware capabilities, + +Either 'softreset' (may be NULL) or 'hardreset' (may be NULL) will be +called to perform the low-level EH reset. + +:: + + void (*post_internal_cmd) (struct ata_queued_cmd *qc); + + +Perform any hardware-specific actions necessary to finish processing +after executing a probe-time or EH-time command via +:c:func:`ata_exec_internal`. + +Hardware interrupt handling +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + irqreturn_t (*irq_handler)(int, void *, struct pt_regs *); + void (*irq_clear) (struct ata_port *); + + +``->irq_handler`` is the interrupt handling routine registered with the +system, by libata. ``->irq_clear`` is called during probe just before the +interrupt handler is registered, to be sure hardware is quiet. + +The second argument, dev_instance, should be cast to a pointer to +:c:type:`struct ata_host_set <ata_host_set>`. + +Most legacy IDE drivers use :c:func:`ata_sff_interrupt` for the irq_handler +hook, which scans all ports in the host_set, determines which queued +command was active (if any), and calls ata_sff_host_intr(ap,qc). + +Most legacy IDE drivers use :c:func:`ata_sff_irq_clear` for the +:c:func:`irq_clear` hook, which simply clears the interrupt and error flags +in the DMA status register. + +SATA phy read/write +~~~~~~~~~~~~~~~~~~~ + +:: + + int (*scr_read) (struct ata_port *ap, unsigned int sc_reg, + u32 *val); + int (*scr_write) (struct ata_port *ap, unsigned int sc_reg, + u32 val); + + +Read and write standard SATA phy registers. +sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE. + +Init and shutdown +~~~~~~~~~~~~~~~~~ + +:: + + int (*port_start) (struct ata_port *ap); + void (*port_stop) (struct ata_port *ap); + void (*host_stop) (struct ata_host_set *host_set); + + +``->port_start()`` is called just after the data structures for each port +are initialized. Typically this is used to alloc per-port DMA buffers / +tables / rings, enable DMA engines, and similar tasks. Some drivers also +use this entry point as a chance to allocate driver-private memory for +``ap->private_data``. + +Many drivers use :c:func:`ata_port_start` as this hook or call it from their +own :c:func:`port_start` hooks. :c:func:`ata_port_start` allocates space for +a legacy IDE PRD table and returns. + +``->port_stop()`` is called after ``->host_stop()``. Its sole function is to +release DMA/memory resources, now that they are no longer actively being +used. Many drivers also free driver-private data from port at this time. + +``->host_stop()`` is called after all ``->port_stop()`` calls have completed. +The hook must finalize hardware shutdown, release DMA and other +resources, etc. This hook may be specified as NULL, in which case it is +not called. + +Error handling +============== + +This chapter describes how errors are handled under libata. Readers are +advised to read SCSI EH (Documentation/scsi/scsi_eh.rst) and ATA +exceptions doc first. + +Origins of commands +------------------- + +In libata, a command is represented with +:c:type:`struct ata_queued_cmd <ata_queued_cmd>` or qc. +qc's are preallocated during port initialization and repetitively used +for command executions. Currently only one qc is allocated per port but +yet-to-be-merged NCQ branch allocates one for each tag and maps each qc +to NCQ tag 1-to-1. + +libata commands can originate from two sources - libata itself and SCSI +midlayer. libata internal commands are used for initialization and error +handling. All normal blk requests and commands for SCSI emulation are +passed as SCSI commands through queuecommand callback of SCSI host +template. + +How commands are issued +----------------------- + +Internal commands + Once allocated qc's taskfile is initialized for the command to be + executed. qc currently has two mechanisms to notify completion. One + is via ``qc->complete_fn()`` callback and the other is completion + ``qc->waiting``. ``qc->complete_fn()`` callback is the asynchronous path + used by normal SCSI translated commands and ``qc->waiting`` is the + synchronous (issuer sleeps in process context) path used by internal + commands. + + Once initialization is complete, host_set lock is acquired and the + qc is issued. + +SCSI commands + All libata drivers use :c:func:`ata_scsi_queuecmd` as + ``hostt->queuecommand`` callback. scmds can either be simulated or + translated. No qc is involved in processing a simulated scmd. The + result is computed right away and the scmd is completed. + + ``qc->complete_fn()`` callback is used for completion notification. ATA + commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use + :c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone`` + to notify upper layer when the qc is finished. After translation is + completed, the qc is issued with :c:func:`ata_qc_issue`. + + Note that SCSI midlayer invokes hostt->queuecommand while holding + host_set lock, so all above occur while holding host_set lock. + +How commands are processed +-------------------------- + +Depending on which protocol and which controller are used, commands are +processed differently. For the purpose of discussion, a controller which +uses taskfile interface and all standard callbacks is assumed. + +Currently 6 ATA command protocols are used. They can be sorted into the +following four categories according to how they are processed. + +ATA NO DATA or DMA + ATA_PROT_NODATA and ATA_PROT_DMA fall into this category. These + types of commands don't require any software intervention once + issued. Device will raise interrupt on completion. + +ATA PIO + ATA_PROT_PIO is in this category. libata currently implements PIO + with polling. ATA_NIEN bit is set to turn off interrupt and + pio_task on ata_wq performs polling and IO. + +ATAPI NODATA or DMA + ATA_PROT_ATAPI_NODATA and ATA_PROT_ATAPI_DMA are in this + category. packet_task is used to poll BSY bit after issuing PACKET + command. Once BSY is turned off by the device, packet_task + transfers CDB and hands off processing to interrupt handler. + +ATAPI PIO + ATA_PROT_ATAPI is in this category. ATA_NIEN bit is set and, as + in ATAPI NODATA or DMA, packet_task submits cdb. However, after + submitting cdb, further processing (data transfer) is handed off to + pio_task. + +How commands are completed +-------------------------- + +Once issued, all qc's are either completed with :c:func:`ata_qc_complete` or +time out. For commands which are handled by interrupts, +:c:func:`ata_host_intr` invokes :c:func:`ata_qc_complete`, and, for PIO tasks, +pio_task invokes :c:func:`ata_qc_complete`. In error cases, packet_task may +also complete commands. + +:c:func:`ata_qc_complete` does the following. + +1. DMA memory is unmapped. + +2. ATA_QCFLAG_ACTIVE is cleared from qc->flags. + +3. :c:expr:`qc->complete_fn` callback is invoked. If the return value of the + callback is not zero. Completion is short circuited and + :c:func:`ata_qc_complete` returns. + +4. :c:func:`__ata_qc_complete` is called, which does + + 1. ``qc->flags`` is cleared to zero. + + 2. ``ap->active_tag`` and ``qc->tag`` are poisoned. + + 3. ``qc->waiting`` is cleared & completed (in that order). + + 4. qc is deallocated by clearing appropriate bit in ``ap->qactive``. + +So, it basically notifies upper layer and deallocates qc. One exception +is short-circuit path in #3 which is used by :c:func:`atapi_qc_complete`. + +For all non-ATAPI commands, whether it fails or not, almost the same +code path is taken and very little error handling takes place. A qc is +completed with success status if it succeeded, with failed status +otherwise. + +However, failed ATAPI commands require more handling as REQUEST SENSE is +needed to acquire sense data. If an ATAPI command fails, +:c:func:`ata_qc_complete` is invoked with error status, which in turn invokes +:c:func:`atapi_qc_complete` via ``qc->complete_fn()`` callback. + +This makes :c:func:`atapi_qc_complete` set ``scmd->result`` to +SAM_STAT_CHECK_CONDITION, complete the scmd and return 1. As the +sense data is empty but ``scmd->result`` is CHECK CONDITION, SCSI midlayer +will invoke EH for the scmd, and returning 1 makes :c:func:`ata_qc_complete` +to return without deallocating the qc. This leads us to +:c:func:`ata_scsi_error` with partially completed qc. + +:c:func:`ata_scsi_error` +------------------------ + +:c:func:`ata_scsi_error` is the current ``transportt->eh_strategy_handler()`` +for libata. As discussed above, this will be entered in two cases - +timeout and ATAPI error completion. This function will check if a qc is active +and has not failed yet. Such a qc will be marked with AC_ERR_TIMEOUT such that +EH will know to handle it later. Then it calls low level libata driver's +:c:func:`error_handler` callback. + +When the :c:func:`error_handler` callback is invoked it stops BMDMA and +completes the qc. Note that as we're currently in EH, we cannot call +scsi_done. As described in SCSI EH doc, a recovered scmd should be +either retried with :c:func:`scsi_queue_insert` or finished with +:c:func:`scsi_finish_command`. Here, we override ``qc->scsidone`` with +:c:func:`scsi_finish_command` and calls :c:func:`ata_qc_complete`. + +If EH is invoked due to a failed ATAPI qc, the qc here is completed but +not deallocated. The purpose of this half-completion is to use the qc as +place holder to make EH code reach this place. This is a bit hackish, +but it works. + +Once control reaches here, the qc is deallocated by invoking +:c:func:`__ata_qc_complete` explicitly. Then, internal qc for REQUEST SENSE +is issued. Once sense data is acquired, scmd is finished by directly +invoking :c:func:`scsi_finish_command` on the scmd. Note that as we already +have completed and deallocated the qc which was associated with the +scmd, we don't need to/cannot call :c:func:`ata_qc_complete` again. + +Problems with the current EH +---------------------------- + +- Error representation is too crude. Currently any and all error + conditions are represented with ATA STATUS and ERROR registers. + Errors which aren't ATA device errors are treated as ATA device + errors by setting ATA_ERR bit. Better error descriptor which can + properly represent ATA and other errors/exceptions is needed. + +- When handling timeouts, no action is taken to make device forget + about the timed out command and ready for new commands. + +- EH handling via :c:func:`ata_scsi_error` is not properly protected from + usual command processing. On EH entrance, the device is not in + quiescent state. Timed out commands may succeed or fail any time. + pio_task and atapi_task may still be running. + +- Too weak error recovery. Devices / controllers causing HSM mismatch + errors and other errors quite often require reset to return to known + state. Also, advanced error handling is necessary to support features + like NCQ and hotplug. + +- ATA errors are directly handled in the interrupt handler and PIO + errors in pio_task. This is problematic for advanced error handling + for the following reasons. + + First, advanced error handling often requires context and internal qc + execution. + + Second, even a simple failure (say, CRC error) needs information + gathering and could trigger complex error handling (say, resetting & + reconfiguring). Having multiple code paths to gather information, + enter EH and trigger actions makes life painful. + + Third, scattered EH code makes implementing low level drivers + difficult. Low level drivers override libata callbacks. If EH is + scattered over several places, each affected callbacks should perform + its part of error handling. This can be error prone and painful. + +libata Library +============== + +.. kernel-doc:: drivers/ata/libata-core.c + :export: + +libata Core Internals +===================== + +.. kernel-doc:: drivers/ata/libata-core.c + :internal: + +.. kernel-doc:: drivers/ata/libata-eh.c + +libata SCSI translation/emulation +================================= + +.. kernel-doc:: drivers/ata/libata-scsi.c + :export: + +.. kernel-doc:: drivers/ata/libata-scsi.c + :internal: + +ATA errors and exceptions +========================= + +This chapter tries to identify what error/exception conditions exist for +ATA/ATAPI devices and describe how they should be handled in +implementation-neutral way. + +The term 'error' is used to describe conditions where either an explicit +error condition is reported from device or a command has timed out. + +The term 'exception' is either used to describe exceptional conditions +which are not errors (say, power or hotplug events), or to describe both +errors and non-error exceptional conditions. Where explicit distinction +between error and exception is necessary, the term 'non-error exception' +is used. + +Exception categories +-------------------- + +Exceptions are described primarily with respect to legacy taskfile + bus +master IDE interface. If a controller provides other better mechanism +for error reporting, mapping those into categories described below +shouldn't be difficult. + +In the following sections, two recovery actions - reset and +reconfiguring transport - are mentioned. These are described further in +`EH recovery actions <#exrec>`__. + +HSM violation +~~~~~~~~~~~~~ + +This error is indicated when STATUS value doesn't match HSM requirement +during issuing or execution any ATA/ATAPI command. + +- ATA_STATUS doesn't contain !BSY && DRDY && !DRQ while trying to + issue a command. + +- !BSY && !DRQ during PIO data transfer. + +- DRQ on command completion. + +- !BSY && ERR after CDB transfer starts but before the last byte of CDB + is transferred. ATA/ATAPI standard states that "The device shall not + terminate the PACKET command with an error before the last byte of + the command packet has been written" in the error outputs description + of PACKET command and the state diagram doesn't include such + transitions. + +In these cases, HSM is violated and not much information regarding the +error can be acquired from STATUS or ERROR register. IOW, this error can +be anything - driver bug, faulty device, controller and/or cable. + +As HSM is violated, reset is necessary to restore known state. +Reconfiguring transport for lower speed might be helpful too as +transmission errors sometimes cause this kind of errors. + +ATA/ATAPI device error (non-NCQ / non-CHECK CONDITION) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These are errors detected and reported by ATA/ATAPI devices indicating +device problems. For this type of errors, STATUS and ERROR register +values are valid and describe error condition. Note that some of ATA bus +errors are detected by ATA/ATAPI devices and reported using the same +mechanism as device errors. Those cases are described later in this +section. + +For ATA commands, this type of errors are indicated by !BSY && ERR +during command execution and on completion. + +For ATAPI commands, + +- !BSY && ERR && ABRT right after issuing PACKET indicates that PACKET + command is not supported and falls in this category. + +- !BSY && ERR(==CHK) && !ABRT after the last byte of CDB is transferred + indicates CHECK CONDITION and doesn't fall in this category. + +- !BSY && ERR(==CHK) && ABRT after the last byte of CDB is transferred + \*probably\* indicates CHECK CONDITION and doesn't fall in this + category. + +Of errors detected as above, the following are not ATA/ATAPI device +errors but ATA bus errors and should be handled according to +`ATA bus error <#excatATAbusErr>`__. + +CRC error during data transfer + This is indicated by ICRC bit in the ERROR register and means that + corruption occurred during data transfer. Up to ATA/ATAPI-7, the + standard specifies that this bit is only applicable to UDMA + transfers but ATA/ATAPI-8 draft revision 1f says that the bit may be + applicable to multiword DMA and PIO. + +ABRT error during data transfer or on completion + Up to ATA/ATAPI-7, the standard specifies that ABRT could be set on + ICRC errors and on cases where a device is not able to complete a + command. Combined with the fact that MWDMA and PIO transfer errors + aren't allowed to use ICRC bit up to ATA/ATAPI-7, it seems to imply + that ABRT bit alone could indicate transfer errors. + + However, ATA/ATAPI-8 draft revision 1f removes the part that ICRC + errors can turn on ABRT. So, this is kind of gray area. Some + heuristics are needed here. + +ATA/ATAPI device errors can be further categorized as follows. + +Media errors + This is indicated by UNC bit in the ERROR register. ATA devices + reports UNC error only after certain number of retries cannot + recover the data, so there's nothing much else to do other than + notifying upper layer. + + READ and WRITE commands report CHS or LBA of the first failed sector + but ATA/ATAPI standard specifies that the amount of transferred data + on error completion is indeterminate, so we cannot assume that + sectors preceding the failed sector have been transferred and thus + cannot complete those sectors successfully as SCSI does. + +Media changed / media change requested error + <<TODO: fill here>> + +Address error + This is indicated by IDNF bit in the ERROR register. Report to upper + layer. + +Other errors + This can be invalid command or parameter indicated by ABRT ERROR bit + or some other error condition. Note that ABRT bit can indicate a lot + of things including ICRC and Address errors. Heuristics needed. + +Depending on commands, not all STATUS/ERROR bits are applicable. These +non-applicable bits are marked with "na" in the output descriptions but +up to ATA/ATAPI-7 no definition of "na" can be found. However, +ATA/ATAPI-8 draft revision 1f describes "N/A" as follows. + + 3.2.3.3a N/A + A keyword the indicates a field has no defined value in this + standard and should not be checked by the host or device. N/A + fields should be cleared to zero. + +So, it seems reasonable to assume that "na" bits are cleared to zero by +devices and thus need no explicit masking. + +ATAPI device CHECK CONDITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +ATAPI device CHECK CONDITION error is indicated by set CHK bit (ERR bit) +in the STATUS register after the last byte of CDB is transferred for a +PACKET command. For this kind of errors, sense data should be acquired +to gather information regarding the errors. REQUEST SENSE packet command +should be used to acquire sense data. + +Once sense data is acquired, this type of errors can be handled +similarly to other SCSI errors. Note that sense data may indicate ATA +bus error (e.g. Sense Key 04h HARDWARE ERROR && ASC/ASCQ 47h/00h SCSI +PARITY ERROR). In such cases, the error should be considered as an ATA +bus error and handled according to `ATA bus error <#excatATAbusErr>`__. + +ATA device error (NCQ) +~~~~~~~~~~~~~~~~~~~~~~ + +NCQ command error is indicated by cleared BSY and set ERR bit during NCQ +command phase (one or more NCQ commands outstanding). Although STATUS +and ERROR registers will contain valid values describing the error, READ +LOG EXT is required to clear the error condition, determine which +command has failed and acquire more information. + +READ LOG EXT Log Page 10h reports which tag has failed and taskfile +register values describing the error. With this information the failed +command can be handled as a normal ATA command error as in +`ATA/ATAPI device error (non-NCQ / non-CHECK CONDITION) <#excatDevErr>`__ +and all other in-flight commands must be retried. Note that this retry +should not be counted - it's likely that commands retried this way would +have completed normally if it were not for the failed command. + +Note that ATA bus errors can be reported as ATA device NCQ errors. This +should be handled as described in `ATA bus error <#excatATAbusErr>`__. + +If READ LOG EXT Log Page 10h fails or reports NQ, we're thoroughly +screwed. This condition should be treated according to +`HSM violation <#excatHSMviolation>`__. + +ATA bus error +~~~~~~~~~~~~~ + +ATA bus error means that data corruption occurred during transmission +over ATA bus (SATA or PATA). This type of errors can be indicated by + +- ICRC or ABRT error as described in + `ATA/ATAPI device error (non-NCQ / non-CHECK CONDITION) <#excatDevErr>`__. + +- Controller-specific error completion with error information + indicating transmission error. + +- On some controllers, command timeout. In this case, there may be a + mechanism to determine that the timeout is due to transmission error. + +- Unknown/random errors, timeouts and all sorts of weirdities. + +As described above, transmission errors can cause wide variety of +symptoms ranging from device ICRC error to random device lockup, and, +for many cases, there is no way to tell if an error condition is due to +transmission error or not; therefore, it's necessary to employ some kind +of heuristic when dealing with errors and timeouts. For example, +encountering repetitive ABRT errors for known supported command is +likely to indicate ATA bus error. + +Once it's determined that ATA bus errors have possibly occurred, +lowering ATA bus transmission speed is one of actions which may +alleviate the problem. See `Reconfigure transport <#exrecReconf>`__ for +more information. + +PCI bus error +~~~~~~~~~~~~~ + +Data corruption or other failures during transmission over PCI (or other +system bus). For standard BMDMA, this is indicated by Error bit in the +BMDMA Status register. This type of errors must be logged as it +indicates something is very wrong with the system. Resetting host +controller is recommended. + +Late completion +~~~~~~~~~~~~~~~ + +This occurs when timeout occurs and the timeout handler finds out that +the timed out command has completed successfully or with error. This is +usually caused by lost interrupts. This type of errors must be logged. +Resetting host controller is recommended. + +Unknown error (timeout) +~~~~~~~~~~~~~~~~~~~~~~~ + +This is when timeout occurs and the command is still processing or the +host and device are in unknown state. When this occurs, HSM could be in +any valid or invalid state. To bring the device to known state and make +it forget about the timed out command, resetting is necessary. The timed +out command may be retried. + +Timeouts can also be caused by transmission errors. Refer to +`ATA bus error <#excatATAbusErr>`__ for more details. + +Hotplug and power management exceptions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +<<TODO: fill here>> + +EH recovery actions +------------------- + +This section discusses several important recovery actions. + +Clearing error condition +~~~~~~~~~~~~~~~~~~~~~~~~ + +Many controllers require its error registers to be cleared by error +handler. Different controllers may have different requirements. + +For SATA, it's strongly recommended to clear at least SError register +during error handling. + +Reset +~~~~~ + +During EH, resetting is necessary in the following cases. + +- HSM is in unknown or invalid state + +- HBA is in unknown or invalid state + +- EH needs to make HBA/device forget about in-flight commands + +- HBA/device behaves weirdly + +Resetting during EH might be a good idea regardless of error condition +to improve EH robustness. Whether to reset both or either one of HBA and +device depends on situation but the following scheme is recommended. + +- When it's known that HBA is in ready state but ATA/ATAPI device is in + unknown state, reset only device. + +- If HBA is in unknown state, reset both HBA and device. + +HBA resetting is implementation specific. For a controller complying to +taskfile/BMDMA PCI IDE, stopping active DMA transaction may be +sufficient iff BMDMA state is the only HBA context. But even mostly +taskfile/BMDMA PCI IDE complying controllers may have implementation +specific requirements and mechanism to reset themselves. This must be +addressed by specific drivers. + +OTOH, ATA/ATAPI standard describes in detail ways to reset ATA/ATAPI +devices. + +PATA hardware reset + This is hardware initiated device reset signalled with asserted PATA + RESET- signal. There is no standard way to initiate hardware reset + from software although some hardware provides registers that allow + driver to directly tweak the RESET- signal. + +Software reset + This is achieved by turning CONTROL SRST bit on for at least 5us. + Both PATA and SATA support it but, in case of SATA, this may require + controller-specific support as the second Register FIS to clear SRST + should be transmitted while BSY bit is still set. Note that on PATA, + this resets both master and slave devices on a channel. + +EXECUTE DEVICE DIAGNOSTIC command + Although ATA/ATAPI standard doesn't describe exactly, EDD implies + some level of resetting, possibly similar level with software reset. + Host-side EDD protocol can be handled with normal command processing + and most SATA controllers should be able to handle EDD's just like + other commands. As in software reset, EDD affects both devices on a + PATA bus. + + Although EDD does reset devices, this doesn't suit error handling as + EDD cannot be issued while BSY is set and it's unclear how it will + act when device is in unknown/weird state. + +ATAPI DEVICE RESET command + This is very similar to software reset except that reset can be + restricted to the selected device without affecting the other device + sharing the cable. + +SATA phy reset + This is the preferred way of resetting a SATA device. In effect, + it's identical to PATA hardware reset. Note that this can be done + with the standard SCR Control register. As such, it's usually easier + to implement than software reset. + +One more thing to consider when resetting devices is that resetting +clears certain configuration parameters and they need to be set to their +previous or newly adjusted values after reset. + +Parameters affected are. + +- CHS set up with INITIALIZE DEVICE PARAMETERS (seldom used) + +- Parameters set with SET FEATURES including transfer mode setting + +- Block count set with SET MULTIPLE MODE + +- Other parameters (SET MAX, MEDIA LOCK...) + +ATA/ATAPI standard specifies that some parameters must be maintained +across hardware or software reset, but doesn't strictly specify all of +them. Always reconfiguring needed parameters after reset is required for +robustness. Note that this also applies when resuming from deep sleep +(power-off). + +Also, ATA/ATAPI standard requires that IDENTIFY DEVICE / IDENTIFY PACKET +DEVICE is issued after any configuration parameter is updated or a +hardware reset and the result used for further operation. OS driver is +required to implement revalidation mechanism to support this. + +Reconfigure transport +~~~~~~~~~~~~~~~~~~~~~ + +For both PATA and SATA, a lot of corners are cut for cheap connectors, +cables or controllers and it's quite common to see high transmission +error rate. This can be mitigated by lowering transmission speed. + +The following is a possible scheme Jeff Garzik suggested. + + If more than $N (3?) transmission errors happen in 15 minutes, + + - if SATA, decrease SATA PHY speed. if speed cannot be decreased, + + - decrease UDMA xfer speed. if at UDMA0, switch to PIO4, + + - decrease PIO xfer speed. if at PIO3, complain, but continue + +ata_piix Internals +=================== + +.. kernel-doc:: drivers/ata/ata_piix.c + :internal: + +sata_sil Internals +=================== + +.. kernel-doc:: drivers/ata/sata_sil.c + :internal: + +Thanks +====== + +The bulk of the ATA knowledge comes thanks to long conversations with +Andre Hedrick (www.linux-ide.org), and long hours pondering the ATA and +SCSI specifications. + +Thanks to Alan Cox for pointing out similarities between SATA and SCSI, +and in general for motivation to hack on libata. + +libata's device detection method, ata_pio_devchk, and in general all +the early probing was based on extensive study of Hale Landis's +probe/reset code in his ATADRVR driver (www.ata-atapi.com). diff --git a/Documentation/driver-api/mailbox.rst b/Documentation/driver-api/mailbox.rst new file mode 100644 index 0000000000..0ed95009cc --- /dev/null +++ b/Documentation/driver-api/mailbox.rst @@ -0,0 +1,129 @@ +============================ +The Common Mailbox Framework +============================ + +:Author: Jassi Brar <jaswinder.singh@linaro.org> + +This document aims to help developers write client and controller +drivers for the API. But before we start, let us note that the +client (especially) and controller drivers are likely going to be +very platform specific because the remote firmware is likely to be +proprietary and implement non-standard protocol. So even if two +platforms employ, say, PL320 controller, the client drivers can't +be shared across them. Even the PL320 driver might need to accommodate +some platform specific quirks. So the API is meant mainly to avoid +similar copies of code written for each platform. Having said that, +nothing prevents the remote f/w to also be Linux based and use the +same api there. However none of that helps us locally because we only +ever deal at client's protocol level. + +Some of the choices made during implementation are the result of this +peculiarity of this "common" framework. + + + +Controller Driver (See include/linux/mailbox_controller.h) +========================================================== + + +Allocate mbox_controller and the array of mbox_chan. +Populate mbox_chan_ops, except peek_data() all are mandatory. +The controller driver might know a message has been consumed +by the remote by getting an IRQ or polling some hardware flag +or it can never know (the client knows by way of the protocol). +The method in order of preference is IRQ -> Poll -> None, which +the controller driver should set via 'txdone_irq' or 'txdone_poll' +or neither. + + +Client Driver (See include/linux/mailbox_client.h) +================================================== + + +The client might want to operate in blocking mode (synchronously +send a message through before returning) or non-blocking/async mode (submit +a message and a callback function to the API and return immediately). + +:: + + struct demo_client { + struct mbox_client cl; + struct mbox_chan *mbox; + struct completion c; + bool async; + /* ... */ + }; + + /* + * This is the handler for data received from remote. The behaviour is purely + * dependent upon the protocol. This is just an example. + */ + static void message_from_remote(struct mbox_client *cl, void *mssg) + { + struct demo_client *dc = container_of(cl, struct demo_client, cl); + if (dc->async) { + if (is_an_ack(mssg)) { + /* An ACK to our last sample sent */ + return; /* Or do something else here */ + } else { /* A new message from remote */ + queue_req(mssg); + } + } else { + /* Remote f/w sends only ACK packets on this channel */ + return; + } + } + + static void sample_sent(struct mbox_client *cl, void *mssg, int r) + { + struct demo_client *dc = container_of(cl, struct demo_client, cl); + complete(&dc->c); + } + + static void client_demo(struct platform_device *pdev) + { + struct demo_client *dc_sync, *dc_async; + /* The controller already knows async_pkt and sync_pkt */ + struct async_pkt ap; + struct sync_pkt sp; + + dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL); + dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL); + + /* Populate non-blocking mode client */ + dc_async->cl.dev = &pdev->dev; + dc_async->cl.rx_callback = message_from_remote; + dc_async->cl.tx_done = sample_sent; + dc_async->cl.tx_block = false; + dc_async->cl.tx_tout = 0; /* doesn't matter here */ + dc_async->cl.knows_txdone = false; /* depending upon protocol */ + dc_async->async = true; + init_completion(&dc_async->c); + + /* Populate blocking mode client */ + dc_sync->cl.dev = &pdev->dev; + dc_sync->cl.rx_callback = message_from_remote; + dc_sync->cl.tx_done = NULL; /* operate in blocking mode */ + dc_sync->cl.tx_block = true; + dc_sync->cl.tx_tout = 500; /* by half a second */ + dc_sync->cl.knows_txdone = false; /* depending upon protocol */ + dc_sync->async = false; + + /* ASync mailbox is listed second in 'mboxes' property */ + dc_async->mbox = mbox_request_channel(&dc_async->cl, 1); + /* Populate data packet */ + /* ap.xxx = 123; etc */ + /* Send async message to remote */ + mbox_send_message(dc_async->mbox, &ap); + + /* Sync mailbox is listed first in 'mboxes' property */ + dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0); + /* Populate data packet */ + /* sp.abc = 123; etc */ + /* Send message to remote in blocking mode */ + mbox_send_message(dc_sync->mbox, &sp); + /* At this point 'sp' has been sent */ + + /* Now wait for async chan to be done */ + wait_for_completion(&dc_async->c); + } diff --git a/Documentation/driver-api/md/index.rst b/Documentation/driver-api/md/index.rst new file mode 100644 index 0000000000..18f54a7d7d --- /dev/null +++ b/Documentation/driver-api/md/index.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==== +RAID +==== + +.. toctree:: + :maxdepth: 1 + + md-cluster + raid5-cache + raid5-ppl diff --git a/Documentation/driver-api/md/md-cluster.rst b/Documentation/driver-api/md/md-cluster.rst new file mode 100644 index 0000000000..e93f35e0e1 --- /dev/null +++ b/Documentation/driver-api/md/md-cluster.rst @@ -0,0 +1,385 @@ +========== +MD Cluster +========== + +The cluster MD is a shared-device RAID for a cluster, it supports +two levels: raid1 and raid10 (limited support). + + +1. On-disk format +================= + +Separate write-intent-bitmaps are used for each cluster node. +The bitmaps record all writes that may have been started on that node, +and may not yet have finished. The on-disk layout is:: + + 0 4k 8k 12k + ------------------------------------------------------------------- + | idle | md super | bm super [0] + bits | + | bm bits[0, contd] | bm super[1] + bits | bm bits[1, contd] | + | bm super[2] + bits | bm bits [2, contd] | bm super[3] + bits | + | bm bits [3, contd] | | | + +During "normal" functioning we assume the filesystem ensures that only +one node writes to any given block at a time, so a write request will + + - set the appropriate bit (if not already set) + - commit the write to all mirrors + - schedule the bit to be cleared after a timeout. + +Reads are just handled normally. It is up to the filesystem to ensure +one node doesn't read from a location where another node (or the same +node) is writing. + + +2. DLM Locks for management +=========================== + +There are three groups of locks for managing the device: + +2.1 Bitmap lock resource (bm_lockres) +------------------------------------- + + The bm_lockres protects individual node bitmaps. They are named in + the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a + node joins the cluster, it acquires the lock in PW mode and it stays + so during the lifetime the node is part of the cluster. The lock + resource number is based on the slot number returned by the DLM + subsystem. Since DLM starts node count from one and bitmap slots + start from zero, one is subtracted from the DLM slot number to arrive + at the bitmap slot number. + + The LVB of the bitmap lock for a particular node records the range + of sectors that are being re-synced by that node. No other + node may write to those sectors. This is used when a new nodes + joins the cluster. + +2.2 Message passing locks +------------------------- + + Each node has to communicate with other nodes when starting or ending + resync, and for metadata superblock updates. This communication is + managed through three locks: "token", "message", and "ack", together + with the Lock Value Block (LVB) of one of the "message" lock. + +2.3 new-device management +------------------------- + + A single lock: "no-new-dev" is used to coordinate the addition of + new devices - this must be synchronized across the array. + Normally all nodes hold a concurrent-read lock on this device. + +3. Communication +================ + + Messages can be broadcast to all nodes, and the sender waits for all + other nodes to acknowledge the message before proceeding. Only one + message can be processed at a time. + +3.1 Message Types +----------------- + + There are six types of messages which are passed: + +3.1.1 METADATA_UPDATED +^^^^^^^^^^^^^^^^^^^^^^ + + informs other nodes that the metadata has + been updated, and the node must re-read the md superblock. This is + performed synchronously. It is primarily used to signal device + failure. + +3.1.2 RESYNCING +^^^^^^^^^^^^^^^ + informs other nodes that a resync is initiated or + ended so that each node may suspend or resume the region. Each + RESYNCING message identifies a range of the devices that the + sending node is about to resync. This overrides any previous + notification from that node: only one ranged can be resynced at a + time per-node. + +3.1.3 NEWDISK +^^^^^^^^^^^^^ + + informs other nodes that a device is being added to + the array. Message contains an identifier for that device. See + below for further details. + +3.1.4 REMOVE +^^^^^^^^^^^^ + + A failed or spare device is being removed from the + array. The slot-number of the device is included in the message. + + 3.1.5 RE_ADD: + + A failed device is being re-activated - the assumption + is that it has been determined to be working again. + + 3.1.6 BITMAP_NEEDS_SYNC: + + If a node is stopped locally but the bitmap + isn't clean, then another node is informed to take the ownership of + resync. + +3.2 Communication mechanism +--------------------------- + + The DLM LVB is used to communicate within nodes of the cluster. There + are three resources used for the purpose: + +3.2.1 token +^^^^^^^^^^^ + The resource which protects the entire communication + system. The node having the token resource is allowed to + communicate. + +3.2.2 message +^^^^^^^^^^^^^ + The lock resource which carries the data to communicate. + +3.2.3 ack +^^^^^^^^^ + + The resource, acquiring which means the message has been + acknowledged by all nodes in the cluster. The BAST of the resource + is used to inform the receiving node that a node wants to + communicate. + +The algorithm is: + + 1. receive status - all nodes have concurrent-reader lock on "ack":: + + sender receiver receiver + "ack":CR "ack":CR "ack":CR + + 2. sender get EX on "token", + sender get EX on "message":: + + sender receiver receiver + "token":EX "ack":CR "ack":CR + "message":EX + "ack":CR + + Sender checks that it still needs to send a message. Messages + received or other events that happened while waiting for the + "token" may have made this message inappropriate or redundant. + + 3. sender writes LVB + + sender down-convert "message" from EX to CW + + sender try to get EX of "ack" + + :: + + [ wait until all receivers have *processed* the "message" ] + + [ triggered by bast of "ack" ] + receiver get CR on "message" + receiver read LVB + receiver processes the message + [ wait finish ] + receiver releases "ack" + receiver tries to get PR on "message" + + sender receiver receiver + "token":EX "message":CR "message":CR + "message":CW + "ack":EX + + 4. triggered by grant of EX on "ack" (indicating all receivers + have processed message) + + sender down-converts "ack" from EX to CR + + sender releases "message" + + sender releases "token" + + :: + + receiver upconvert to PR on "message" + receiver get CR of "ack" + receiver release "message" + + sender receiver receiver + "ack":CR "ack":CR "ack":CR + + +4. Handling Failures +==================== + +4.1 Node Failure +---------------- + + When a node fails, the DLM informs the cluster with the slot + number. The node starts a cluster recovery thread. The cluster + recovery thread: + + - acquires the bitmap<number> lock of the failed node + - opens the bitmap + - reads the bitmap of the failed node + - copies the set bitmap to local node + - cleans the bitmap of the failed node + - releases bitmap<number> lock of the failed node + - initiates resync of the bitmap on the current node + md_check_recovery is invoked within recover_bitmaps, + then md_check_recovery -> metadata_update_start/finish, + it will lock the communication by lock_comm. + Which means when one node is resyncing it blocks all + other nodes from writing anywhere on the array. + + The resync process is the regular md resync. However, in a clustered + environment when a resync is performed, it needs to tell other nodes + of the areas which are suspended. Before a resync starts, the node + send out RESYNCING with the (lo,hi) range of the area which needs to + be suspended. Each node maintains a suspend_list, which contains the + list of ranges which are currently suspended. On receiving RESYNCING, + the node adds the range to the suspend_list. Similarly, when the node + performing resync finishes, it sends RESYNCING with an empty range to + other nodes and other nodes remove the corresponding entry from the + suspend_list. + + A helper function, ->area_resyncing() can be used to check if a + particular I/O range should be suspended or not. + +4.2 Device Failure +================== + + Device failures are handled and communicated with the metadata update + routine. When a node detects a device failure it does not allow + any further writes to that device until the failure has been + acknowledged by all other nodes. + +5. Adding a new Device +---------------------- + + For adding a new device, it is necessary that all nodes "see" the new + device to be added. For this, the following algorithm is used: + + 1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues + ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD) + 2. Node 1 sends a NEWDISK message with uuid and slot number + 3. Other nodes issue kobject_uevent_env with uuid and slot number + (Steps 4,5 could be a udev rule) + 4. In userspace, the node searches for the disk, perhaps + using blkid -t SUB_UUID="" + 5. Other nodes issue either of the following depending on whether + the disk was found: + ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and + disc.number set to slot number) + ioctl(CLUSTERED_DISK_NACK) + 6. Other nodes drop lock on "no-new-devs" (CR) if device is found + 7. Node 1 attempts EX lock on "no-new-dev" + 8. If node 1 gets the lock, it sends METADATA_UPDATED after + unmarking the disk as SpareLocal + 9. If not (get "no-new-dev" lock), it fails the operation and sends + METADATA_UPDATED. + 10. Other nodes get the information whether a disk is added or not + by the following METADATA_UPDATED. + +6. Module interface +=================== + + There are 17 call-backs which the md core can make to the cluster + module. Understanding these can give a good overview of the whole + process. + +6.1 join(nodes) and leave() +--------------------------- + + These are called when an array is started with a clustered bitmap, + and when the array is stopped. join() ensures the cluster is + available and initializes the various resources. + Only the first 'nodes' nodes in the cluster can use the array. + +6.2 slot_number() +----------------- + + Reports the slot number advised by the cluster infrastructure. + Range is from 0 to nodes-1. + +6.3 resync_info_update() +------------------------ + + This updates the resync range that is stored in the bitmap lock. + The starting point is updated as the resync progresses. The + end point is always the end of the array. + It does *not* send a RESYNCING message. + +6.4 resync_start(), resync_finish() +----------------------------------- + + These are called when resync/recovery/reshape starts or stops. + They update the resyncing range in the bitmap lock and also + send a RESYNCING message. resync_start reports the whole + array as resyncing, resync_finish reports none of it. + + resync_finish() also sends a BITMAP_NEEDS_SYNC message which + allows some other node to take over. + +6.5 metadata_update_start(), metadata_update_finish(), metadata_update_cancel() +------------------------------------------------------------------------------- + + metadata_update_start is used to get exclusive access to + the metadata. If a change is still needed once that access is + gained, metadata_update_finish() will send a METADATA_UPDATE + message to all other nodes, otherwise metadata_update_cancel() + can be used to release the lock. + +6.6 area_resyncing() +-------------------- + + This combines two elements of functionality. + + Firstly, it will check if any node is currently resyncing + anything in a given range of sectors. If any resync is found, + then the caller will avoid writing or read-balancing in that + range. + + Secondly, while node recovery is happening it reports that + all areas are resyncing for READ requests. This avoids races + between the cluster-filesystem and the cluster-RAID handling + a node failure. + +6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack() +--------------------------------------------------------------- + + These are used to manage the new-disk protocol described above. + When a new device is added, add_new_disk_start() is called before + it is bound to the array and, if that succeeds, add_new_disk_finish() + is called the device is fully added. + + When a device is added in acknowledgement to a previous + request, or when the device is declared "unavailable", + new_disk_ack() is called. + +6.8 remove_disk() +----------------- + + This is called when a spare or failed device is removed from + the array. It causes a REMOVE message to be send to other nodes. + +6.9 gather_bitmaps() +-------------------- + + This sends a RE_ADD message to all other nodes and then + gathers bitmap information from all bitmaps. This combined + bitmap is then used to recovery the re-added device. + +6.10 lock_all_bitmaps() and unlock_all_bitmaps() +------------------------------------------------ + + These are called when change bitmap to none. If a node plans + to clear the cluster raid's bitmap, it need to make sure no other + nodes are using the raid which is achieved by lock all bitmap + locks within the cluster, and also those locks are unlocked + accordingly. + +7. Unsupported features +======================= + +There are somethings which are not supported by cluster MD yet. + +- change array_sectors. diff --git a/Documentation/driver-api/md/raid5-cache.rst b/Documentation/driver-api/md/raid5-cache.rst new file mode 100644 index 0000000000..5f947cbc2e --- /dev/null +++ b/Documentation/driver-api/md/raid5-cache.rst @@ -0,0 +1,111 @@ +================ +RAID 4/5/6 cache +================ + +Raid 4/5/6 could include an extra disk for data cache besides normal RAID +disks. The role of RAID disks isn't changed with the cache disk. The cache disk +caches data to the RAID disks. The cache can be in write-through (supported +since 4.4) or write-back mode (supported since 4.10). mdadm (supported since +3.4) has a new option '--write-journal' to create array with cache. Please +refer to mdadm manual for details. By default (RAID array starts), the cache is +in write-through mode. A user can switch it to write-back mode by:: + + echo "write-back" > /sys/block/md0/md/journal_mode + +And switch it back to write-through mode by:: + + echo "write-through" > /sys/block/md0/md/journal_mode + +In both modes, all writes to the array will hit cache disk first. This means +the cache disk must be fast and sustainable. + +write-through mode +================== + +This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean +shutdown can cause data in some stripes to not be in consistent state, eg, data +and parity don't match. The reason is that a stripe write involves several RAID +disks and it's possible the writes don't hit all RAID disks yet before the +unclean shutdown. We call an array degraded if it has inconsistent data. MD +tries to resync the array to bring it back to normal state. But before the +resync completes, any system crash will expose the chance of real data +corruption in the RAID array. This problem is called 'write hole'. + +The write-through cache will cache all data on cache disk first. After the data +is safe on the cache disk, the data will be flushed onto RAID disks. The +two-step write will guarantee MD can recover correct data after unclean +shutdown even the array is degraded. Thus the cache can close the 'write hole'. + +In write-through mode, MD reports IO completion to upper layer (usually +filesystems) after the data is safe on RAID disks, so cache disk failure +doesn't cause data loss. Of course cache disk failure means the array is +exposed to 'write hole' again. + +In write-through mode, the cache disk isn't required to be big. Several +hundreds megabytes are enough. + +write-back mode +=============== + +write-back mode fixes the 'write hole' issue too, since all write data is +cached on cache disk. But the main goal of 'write-back' cache is to speed up +write. If a write crosses all RAID disks of a stripe, we call it full-stripe +write. For non-full-stripe writes, MD must read old data before the new parity +can be calculated. These synchronous reads hurt write throughput. Some writes +which are sequential but not dispatched in the same time will suffer from this +overhead too. Write-back cache will aggregate the data and flush the data to +RAID disks only after the data becomes a full stripe write. This will +completely avoid the overhead, so it's very helpful for some workloads. A +typical workload which does sequential write followed by fsync is an example. + +In write-back mode, MD reports IO completion to upper layer (usually +filesystems) right after the data hits cache disk. The data is flushed to raid +disks later after specific conditions met. So cache disk failure will cause +data loss. + +In write-back mode, MD also caches data in memory. The memory cache includes +the same data stored on cache disk, so a power loss doesn't cause data loss. +The memory cache size has performance impact for the array. It's recommended +the size is big. A user can configure the size by:: + + echo "2048" > /sys/block/md0/md/stripe_cache_size + +Too small cache disk will make the write aggregation less efficient in this +mode depending on the workloads. It's recommended to use a cache disk with at +least several gigabytes size in write-back mode. + +The implementation +================== + +The write-through and write-back cache use the same disk format. The cache disk +is organized as a simple write log. The log consists of 'meta data' and 'data' +pairs. The meta data describes the data. It also includes checksum and sequence +ID for recovery identification. Data can be IO data and parity data. Data is +checksummed too. The checksum is stored in the meta data ahead of the data. The +checksum is an optimization because MD can write meta and data freely without +worry about the order. MD superblock has a field pointed to the valid meta data +of log head. + +The log implementation is pretty straightforward. The difficult part is the +order in which MD writes data to cache disk and RAID disks. Specifically, in +write-through mode, MD calculates parity for IO data, writes both IO data and +parity to the log, writes the data and parity to RAID disks after the data and +parity is settled down in log and finally the IO is finished. Read just reads +from raid disks as usual. + +In write-back mode, MD writes IO data to the log and reports IO completion. The +data is also fully cached in memory at that time, which means read must query +memory cache. If some conditions are met, MD will flush the data to RAID disks. +MD will calculate parity for the data and write parity into the log. After this +is finished, MD will write both data and parity into RAID disks, then MD can +release the memory cache. The flush conditions could be stripe becomes a full +stripe write, free cache disk space is low or free in-kernel memory cache space +is low. + +After an unclean shutdown, MD does recovery. MD reads all meta data and data +from the log. The sequence ID and checksum will help us detect corrupted meta +data and data. If MD finds a stripe with data and valid parities (1 parity for +raid4/5 and 2 for raid6), MD will write the data and parities to RAID disks. If +parities are incompleted, they are discarded. If part of data is corrupted, +they are discarded too. MD then loads valid data and writes them to RAID disks +in normal way. diff --git a/Documentation/driver-api/md/raid5-ppl.rst b/Documentation/driver-api/md/raid5-ppl.rst new file mode 100644 index 0000000000..357e5515bc --- /dev/null +++ b/Documentation/driver-api/md/raid5-ppl.rst @@ -0,0 +1,47 @@ +================== +Partial Parity Log +================== + +Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue +addressed by PPL is that after a dirty shutdown, parity of a particular stripe +may become inconsistent with data on other member disks. If the array is also +in degraded state, there is no way to recalculate parity, because one of the +disks is missing. This can lead to silent data corruption when rebuilding the +array or using it is as degraded - data calculated from parity for array blocks +that have not been touched by a write request during the unclean shutdown can +be incorrect. Such condition is known as the RAID5 Write Hole. Because of +this, md by default does not allow starting a dirty degraded array. + +Partial parity for a write operation is the XOR of stripe data chunks not +modified by this write. It is just enough data needed for recovering from the +write hole. XORing partial parity with the modified chunks produces parity for +the stripe, consistent with its state before the write operation, regardless of +which chunk writes have completed. If one of the not modified data disks of +this stripe is missing, this updated parity can be used to recover its +contents. PPL recovery is also performed when starting an array after an +unclean shutdown and all disks are available, eliminating the need to resync +the array. Because of this, using write-intent bitmap and PPL together is not +supported. + +When handling a write request PPL writes partial parity before new data and +parity are dispatched to disks. PPL is a distributed log - it is stored on +array member drives in the metadata area, on the parity drive of a particular +stripe. It does not require a dedicated journaling drive. Write performance is +reduced by up to 30%-40% but it scales with the number of drives in the array +and the journaling drive does not become a bottleneck or a single point of +failure. + +Unlike raid5-cache, the other solution in md for closing the write hole, PPL is +not a true journal. It does not protect from losing in-flight data, only from +silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is +performed for this stripe (parity is not updated). So it is possible to have +arbitrary data in the written part of a stripe if that disk is lost. In such +case the behavior is the same as in plain raid5. + +PPL is available for md version-1 metadata and external (specifically IMSM) +metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl. + +There is a limitation of maximum 64 disks in the array for PPL. It allows to +keep data structures and implementation simple. RAID5 arrays with so many disks +are not likely due to high risk of multiple disks failure. Such restriction +should not be a real life limitation. diff --git a/Documentation/driver-api/media/camera-sensor.rst b/Documentation/driver-api/media/camera-sensor.rst new file mode 100644 index 0000000000..93f4f2536c --- /dev/null +++ b/Documentation/driver-api/media/camera-sensor.rst @@ -0,0 +1,175 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Writing camera sensor drivers +============================= + +CSI-2 and parallel (BT.601 and BT.656) busses +--------------------------------------------- + +Please see :ref:`transmitter-receiver`. + +Handling clocks +--------------- + +Camera sensors have an internal clock tree including a PLL and a number of +divisors. The clock tree is generally configured by the driver based on a few +input parameters that are specific to the hardware:: the external clock frequency +and the link frequency. The two parameters generally are obtained from system +firmware. **No other frequencies should be used in any circumstances.** + +The reason why the clock frequencies are so important is that the clock signals +come out of the SoC, and in many cases a specific frequency is designed to be +used in the system. Using another frequency may cause harmful effects +elsewhere. Therefore only the pre-determined frequencies are configurable by the +user. + +ACPI +~~~~ + +Read the ``clock-frequency`` _DSD property to denote the frequency. The driver +can rely on this frequency being used. + +Devicetree +~~~~~~~~~~ + +The currently preferred way to achieve this is using ``assigned-clocks``, +``assigned-clock-parents`` and ``assigned-clock-rates`` properties. See +``Documentation/devicetree/bindings/clock/clock-bindings.txt`` for more +information. The driver then gets the frequency using ``clk_get_rate()``. + +This approach has the drawback that there's no guarantee that the frequency +hasn't been modified directly or indirectly by another driver, or supported by +the board's clock tree to begin with. Changes to the Common Clock Framework API +are required to ensure reliability. + +Frame size +---------- + +There are two distinct ways to configure the frame size produced by camera +sensors. + +Freely configurable camera sensor drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Freely configurable camera sensor drivers expose the device's internal +processing pipeline as one or more sub-devices with different cropping and +scaling configurations. The output size of the device is the result of a series +of cropping and scaling operations from the device's pixel array's size. + +An example of such a driver is the CCS driver (see ``drivers/media/i2c/ccs``). + +Register list based drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Register list based drivers generally, instead of able to configure the device +they control based on user requests, are limited to a number of preset +configurations that combine a number of different parameters that on hardware +level are independent. How a driver picks such configuration is based on the +format set on a source pad at the end of the device's internal pipeline. + +Most sensor drivers are implemented this way, see e.g. +``drivers/media/i2c/imx319.c`` for an example. + +Frame interval configuration +---------------------------- + +There are two different methods for obtaining possibilities for different frame +intervals as well as configuring the frame interval. Which one to implement +depends on the type of the device. + +Raw camera sensors +~~~~~~~~~~~~~~~~~~ + +Instead of a high level parameter such as frame interval, the frame interval is +a result of the configuration of a number of camera sensor implementation +specific parameters. Luckily, these parameters tend to be the same for more or +less all modern raw camera sensors. + +The frame interval is calculated using the following equation:: + + frame interval = (analogue crop width + horizontal blanking) * + (analogue crop height + vertical blanking) / pixel rate + +The formula is bus independent and is applicable for raw timing parameters on +large variety of devices beyond camera sensors. Devices that have no analogue +crop, use the full source image size, i.e. pixel array size. + +Horizontal and vertical blanking are specified by ``V4L2_CID_HBLANK`` and +``V4L2_CID_VBLANK``, respectively. The unit of the ``V4L2_CID_HBLANK`` control +is pixels and the unit of the ``V4L2_CID_VBLANK`` is lines. The pixel rate in +the sensor's **pixel array** is specified by ``V4L2_CID_PIXEL_RATE`` in the same +sub-device. The unit of that control is pixels per second. + +Register list based drivers need to implement read-only sub-device nodes for the +purpose. Devices that are not register list based need these to configure the +device's internal processing pipeline. + +The first entity in the linear pipeline is the pixel array. The pixel array may +be followed by other entities that are there to allow configuring binning, +skipping, scaling or digital crop :ref:`v4l2-subdev-selections`. + +USB cameras etc. devices +~~~~~~~~~~~~~~~~~~~~~~~~ + +USB video class hardware, as well as many cameras offering a similar higher +level interface natively, generally use the concept of frame interval (or frame +rate) on device level in firmware or hardware. This means lower level controls +implemented by raw cameras may not be used on uAPI (or even kAPI) to control the +frame interval on these devices. + +Power management +---------------- + +Always use runtime PM to manage the power states of your device. Camera sensor +drivers are in no way special in this respect: they are responsible for +controlling the power state of the device they otherwise control as well. In +general, the device must be powered on at least when its registers are being +accessed and when it is streaming. + +Existing camera sensor drivers may rely on the old +struct v4l2_subdev_core_ops->s_power() callback for bridge or ISP drivers to +manage their power state. This is however **deprecated**. If you feel you need +to begin calling an s_power from an ISP or a bridge driver, instead please add +runtime PM support to the sensor driver you are using. Likewise, new drivers +should not use s_power. + +Please see examples in e.g. ``drivers/media/i2c/ov8856.c`` and +``drivers/media/i2c/ccs/ccs-core.c``. The two drivers work in both ACPI +and DT based systems. + +Control framework +~~~~~~~~~~~~~~~~~ + +``v4l2_ctrl_handler_setup()`` function may not be used in the device's runtime +PM ``runtime_resume`` callback, as it has no way to figure out the power state +of the device. This is because the power state of the device is only changed +after the power state transition has taken place. The ``s_ctrl`` callback can be +used to obtain device's power state after the power state transition: + +.. c:function:: int pm_runtime_get_if_in_use(struct device *dev); + +The function returns a non-zero value if it succeeded getting the power count or +runtime PM was disabled, in either of which cases the driver may proceed to +access the device. + +Rotation, orientation and flipping +---------------------------------- + +Some systems have the camera sensor mounted upside down compared to its natural +mounting rotation. In such cases, drivers shall expose the information to +userspace with the :ref:`V4L2_CID_CAMERA_SENSOR_ROTATION +<v4l2-camera-sensor-rotation>` control. + +Sensor drivers shall also report the sensor's mounting orientation with the +:ref:`V4L2_CID_CAMERA_SENSOR_ORIENTATION <v4l2-camera-sensor-orientation>`. + +Use ``v4l2_fwnode_device_parse()`` to obtain rotation and orientation +information from system firmware and ``v4l2_ctrl_new_fwnode_properties()`` to +register the appropriate controls. + +Sensor drivers that have any vertical or horizontal flips embedded in the +register programming sequences shall initialize the V4L2_CID_HFLIP and +V4L2_CID_VFLIP controls with the values programmed by the register sequences. +The default values of these controls shall be 0 (disabled). Especially these +controls shall not be inverted, independently of the sensor's mounting +rotation. diff --git a/Documentation/driver-api/media/cec-core.rst b/Documentation/driver-api/media/cec-core.rst new file mode 100644 index 0000000000..f1ffdec388 --- /dev/null +++ b/Documentation/driver-api/media/cec-core.rst @@ -0,0 +1,502 @@ +.. SPDX-License-Identifier: GPL-2.0 + +CEC Kernel Support +================== + +The CEC framework provides a unified kernel interface for use with HDMI CEC +hardware. It is designed to handle a multiple types of hardware (receivers, +transmitters, USB dongles). The framework also gives the option to decide +what to do in the kernel driver and what should be handled by userspace +applications. In addition it integrates the remote control passthrough +feature into the kernel's remote control framework. + + +The CEC Protocol +---------------- + +The CEC protocol enables consumer electronic devices to communicate with each +other through the HDMI connection. The protocol uses logical addresses in the +communication. The logical address is strictly connected with the functionality +provided by the device. The TV acting as the communication hub is always +assigned address 0. The physical address is determined by the physical +connection between devices. + +The CEC framework described here is up to date with the CEC 2.0 specification. +It is documented in the HDMI 1.4 specification with the new 2.0 bits documented +in the HDMI 2.0 specification. But for most of the features the freely available +HDMI 1.3a specification is sufficient: + +https://www.hdmi.org/spec/index + + +CEC Adapter Interface +--------------------- + +The struct cec_adapter represents the CEC adapter hardware. It is created by +calling cec_allocate_adapter() and deleted by calling cec_delete_adapter(): + +.. c:function:: + struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops, \ + void *priv, const char *name, \ + u32 caps, u8 available_las); + +.. c:function:: + void cec_delete_adapter(struct cec_adapter *adap); + +To create an adapter you need to pass the following information: + +ops: + adapter operations which are called by the CEC framework and that you + have to implement. + +priv: + will be stored in adap->priv and can be used by the adapter ops. + Use cec_get_drvdata(adap) to get the priv pointer. + +name: + the name of the CEC adapter. Note: this name will be copied. + +caps: + capabilities of the CEC adapter. These capabilities determine the + capabilities of the hardware and which parts are to be handled + by userspace and which parts are handled by kernelspace. The + capabilities are returned by CEC_ADAP_G_CAPS. + +available_las: + the number of simultaneous logical addresses that this + adapter can handle. Must be 1 <= available_las <= CEC_MAX_LOG_ADDRS. + +To obtain the priv pointer use this helper function: + +.. c:function:: + void *cec_get_drvdata(const struct cec_adapter *adap); + +To register the /dev/cecX device node and the remote control device (if +CEC_CAP_RC is set) you call: + +.. c:function:: + int cec_register_adapter(struct cec_adapter *adap, \ + struct device *parent); + +where parent is the parent device. + +To unregister the devices call: + +.. c:function:: + void cec_unregister_adapter(struct cec_adapter *adap); + +Note: if cec_register_adapter() fails, then call cec_delete_adapter() to +clean up. But if cec_register_adapter() succeeded, then only call +cec_unregister_adapter() to clean up, never cec_delete_adapter(). The +unregister function will delete the adapter automatically once the last user +of that /dev/cecX device has closed its file handle. + + +Implementing the Low-Level CEC Adapter +-------------------------------------- + +The following low-level adapter operations have to be implemented in +your driver: + +.. c:struct:: cec_adap_ops + +.. code-block:: none + + struct cec_adap_ops + { + /* Low-level callbacks */ + int (*adap_enable)(struct cec_adapter *adap, bool enable); + int (*adap_monitor_all_enable)(struct cec_adapter *adap, bool enable); + int (*adap_monitor_pin_enable)(struct cec_adapter *adap, bool enable); + int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr); + void (*adap_unconfigured)(struct cec_adapter *adap); + int (*adap_transmit)(struct cec_adapter *adap, u8 attempts, + u32 signal_free_time, struct cec_msg *msg); + void (*adap_nb_transmit_canceled)(struct cec_adapter *adap, + const struct cec_msg *msg); + void (*adap_status)(struct cec_adapter *adap, struct seq_file *file); + void (*adap_free)(struct cec_adapter *adap); + + /* Error injection callbacks */ + ... + + /* High-level callback */ + ... + }; + +These low-level ops deal with various aspects of controlling the CEC adapter +hardware. They are all called with the mutex adap->lock held. + + +To enable/disable the hardware:: + + int (*adap_enable)(struct cec_adapter *adap, bool enable); + +This callback enables or disables the CEC hardware. Enabling the CEC hardware +means powering it up in a state where no logical addresses are claimed. The +physical address will always be valid if CEC_CAP_NEEDS_HPD is set. If that +capability is not set, then the physical address can change while the CEC +hardware is enabled. CEC drivers should not set CEC_CAP_NEEDS_HPD unless +the hardware design requires that as this will make it impossible to wake +up displays that pull the HPD low when in standby mode. The initial +state of the CEC adapter after calling cec_allocate_adapter() is disabled. + +Note that adap_enable must return 0 if enable is false. + + +To enable/disable the 'monitor all' mode:: + + int (*adap_monitor_all_enable)(struct cec_adapter *adap, bool enable); + +If enabled, then the adapter should be put in a mode to also monitor messages +that are not for us. Not all hardware supports this and this function is only +called if the CEC_CAP_MONITOR_ALL capability is set. This callback is optional +(some hardware may always be in 'monitor all' mode). + +Note that adap_monitor_all_enable must return 0 if enable is false. + + +To enable/disable the 'monitor pin' mode:: + + int (*adap_monitor_pin_enable)(struct cec_adapter *adap, bool enable); + +If enabled, then the adapter should be put in a mode to also monitor CEC pin +changes. Not all hardware supports this and this function is only called if +the CEC_CAP_MONITOR_PIN capability is set. This callback is optional +(some hardware may always be in 'monitor pin' mode). + +Note that adap_monitor_pin_enable must return 0 if enable is false. + + +To program a new logical address:: + + int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr); + +If logical_addr == CEC_LOG_ADDR_INVALID then all programmed logical addresses +are to be erased. Otherwise the given logical address should be programmed. +If the maximum number of available logical addresses is exceeded, then it +should return -ENXIO. Once a logical address is programmed the CEC hardware +can receive directed messages to that address. + +Note that adap_log_addr must return 0 if logical_addr is CEC_LOG_ADDR_INVALID. + + +Called when the adapter is unconfigured:: + + void (*adap_unconfigured)(struct cec_adapter *adap); + +The adapter is unconfigured. If the driver has to take specific actions after +unconfiguration, then that can be done through this optional callback. + + +To transmit a new message:: + + int (*adap_transmit)(struct cec_adapter *adap, u8 attempts, + u32 signal_free_time, struct cec_msg *msg); + +This transmits a new message. The attempts argument is the suggested number of +attempts for the transmit. + +The signal_free_time is the number of data bit periods that the adapter should +wait when the line is free before attempting to send a message. This value +depends on whether this transmit is a retry, a message from a new initiator or +a new message for the same initiator. Most hardware will handle this +automatically, but in some cases this information is needed. + +The CEC_FREE_TIME_TO_USEC macro can be used to convert signal_free_time to +microseconds (one data bit period is 2.4 ms). + + +To pass on the result of a canceled non-blocking transmit:: + + void (*adap_nb_transmit_canceled)(struct cec_adapter *adap, + const struct cec_msg *msg); + +This optional callback can be used to obtain the result of a canceled +non-blocking transmit with sequence number msg->sequence. This is +called if the transmit was aborted, the transmit timed out (i.e. the +hardware never signaled that the transmit finished), or the transmit +was successful, but the wait for the expected reply was either aborted +or it timed out. + + +To log the current CEC hardware status:: + + void (*adap_status)(struct cec_adapter *adap, struct seq_file *file); + +This optional callback can be used to show the status of the CEC hardware. +The status is available through debugfs: cat /sys/kernel/debug/cec/cecX/status + +To free any resources when the adapter is deleted:: + + void (*adap_free)(struct cec_adapter *adap); + +This optional callback can be used to free any resources that might have been +allocated by the driver. It's called from cec_delete_adapter. + + +Your adapter driver will also have to react to events (typically interrupt +driven) by calling into the framework in the following situations: + +When a transmit finished (successfully or otherwise):: + + void cec_transmit_done(struct cec_adapter *adap, u8 status, + u8 arb_lost_cnt, u8 nack_cnt, u8 low_drive_cnt, + u8 error_cnt); + +or:: + + void cec_transmit_attempt_done(struct cec_adapter *adap, u8 status); + +The status can be one of: + +CEC_TX_STATUS_OK: + the transmit was successful. + +CEC_TX_STATUS_ARB_LOST: + arbitration was lost: another CEC initiator + took control of the CEC line and you lost the arbitration. + +CEC_TX_STATUS_NACK: + the message was nacked (for a directed message) or + acked (for a broadcast message). A retransmission is needed. + +CEC_TX_STATUS_LOW_DRIVE: + low drive was detected on the CEC bus. This indicates that + a follower detected an error on the bus and requested a + retransmission. + +CEC_TX_STATUS_ERROR: + some unspecified error occurred: this can be one of ARB_LOST + or LOW_DRIVE if the hardware cannot differentiate or something + else entirely. Some hardware only supports OK and FAIL as the + result of a transmit, i.e. there is no way to differentiate + between the different possible errors. In that case map FAIL + to CEC_TX_STATUS_NACK and not to CEC_TX_STATUS_ERROR. + +CEC_TX_STATUS_MAX_RETRIES: + could not transmit the message after trying multiple times. + Should only be set by the driver if it has hardware support for + retrying messages. If set, then the framework assumes that it + doesn't have to make another attempt to transmit the message + since the hardware did that already. + +The hardware must be able to differentiate between OK, NACK and 'something +else'. + +The \*_cnt arguments are the number of error conditions that were seen. +This may be 0 if no information is available. Drivers that do not support +hardware retry can just set the counter corresponding to the transmit error +to 1, if the hardware does support retry then either set these counters to +0 if the hardware provides no feedback of which errors occurred and how many +times, or fill in the correct values as reported by the hardware. + +Be aware that calling these functions can immediately start a new transmit +if there is one pending in the queue. So make sure that the hardware is in +a state where new transmits can be started *before* calling these functions. + +The cec_transmit_attempt_done() function is a helper for cases where the +hardware never retries, so the transmit is always for just a single +attempt. It will call cec_transmit_done() in turn, filling in 1 for the +count argument corresponding to the status. Or all 0 if the status was OK. + +When a CEC message was received: + +.. c:function:: + void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg); + +Speaks for itself. + +Implementing the interrupt handler +---------------------------------- + +Typically the CEC hardware provides interrupts that signal when a transmit +finished and whether it was successful or not, and it provides and interrupt +when a CEC message was received. + +The CEC driver should always process the transmit interrupts first before +handling the receive interrupt. The framework expects to see the cec_transmit_done +call before the cec_received_msg call, otherwise it can get confused if the +received message was in reply to the transmitted message. + +Optional: Implementing Error Injection Support +---------------------------------------------- + +If the CEC adapter supports Error Injection functionality, then that can +be exposed through the Error Injection callbacks: + +.. code-block:: none + + struct cec_adap_ops { + /* Low-level callbacks */ + ... + + /* Error injection callbacks */ + int (*error_inj_show)(struct cec_adapter *adap, struct seq_file *sf); + bool (*error_inj_parse_line)(struct cec_adapter *adap, char *line); + + /* High-level CEC message callback */ + ... + }; + +If both callbacks are set, then an ``error-inj`` file will appear in debugfs. +The basic syntax is as follows: + +Leading spaces/tabs are ignored. If the next character is a ``#`` or the end of the +line was reached, then the whole line is ignored. Otherwise a command is expected. + +This basic parsing is done in the CEC Framework. It is up to the driver to decide +what commands to implement. The only requirement is that the command ``clear`` without +any arguments must be implemented and that it will remove all current error injection +commands. + +This ensures that you can always do ``echo clear >error-inj`` to clear any error +injections without having to know the details of the driver-specific commands. + +Note that the output of ``error-inj`` shall be valid as input to ``error-inj``. +So this must work: + +.. code-block:: none + + $ cat error-inj >einj.txt + $ cat einj.txt >error-inj + +The first callback is called when this file is read and it should show the +current error injection state:: + + int (*error_inj_show)(struct cec_adapter *adap, struct seq_file *sf); + +It is recommended that it starts with a comment block with basic usage +information. It returns 0 for success and an error otherwise. + +The second callback will parse commands written to the ``error-inj`` file:: + + bool (*error_inj_parse_line)(struct cec_adapter *adap, char *line); + +The ``line`` argument points to the start of the command. Any leading +spaces or tabs have already been skipped. It is a single line only (so there +are no embedded newlines) and it is 0-terminated. The callback is free to +modify the contents of the buffer. It is only called for lines containing a +command, so this callback is never called for empty lines or comment lines. + +Return true if the command was valid or false if there were syntax errors. + +Implementing the High-Level CEC Adapter +--------------------------------------- + +The low-level operations drive the hardware, the high-level operations are +CEC protocol driven. The high-level callbacks are called without the adap->lock +mutex being held. The following high-level callbacks are available: + +.. code-block:: none + + struct cec_adap_ops { + /* Low-level callbacks */ + ... + + /* Error injection callbacks */ + ... + + /* High-level CEC message callback */ + void (*configured)(struct cec_adapter *adap); + int (*received)(struct cec_adapter *adap, struct cec_msg *msg); + }; + +Called when the adapter is configured:: + + void (*configured)(struct cec_adapter *adap); + +The adapter is fully configured, i.e. all logical addresses have been +successfully claimed. If the driver has to take specific actions after +configuration, then that can be done through this optional callback. + + +The received() callback allows the driver to optionally handle a newly +received CEC message:: + + int (*received)(struct cec_adapter *adap, struct cec_msg *msg); + +If the driver wants to process a CEC message, then it can implement this +callback. If it doesn't want to handle this message, then it should return +-ENOMSG, otherwise the CEC framework assumes it processed this message and +it will not do anything with it. + + +CEC framework functions +----------------------- + +CEC Adapter drivers can call the following CEC framework functions: + +.. c:function:: + int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg, \ + bool block); + +Transmit a CEC message. If block is true, then wait until the message has been +transmitted, otherwise just queue it and return. + +.. c:function:: + void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block); + +Change the physical address. This function will set adap->phys_addr and +send an event if it has changed. If cec_s_log_addrs() has been called and +the physical address has become valid, then the CEC framework will start +claiming the logical addresses. If block is true, then this function won't +return until this process has finished. + +When the physical address is set to a valid value the CEC adapter will +be enabled (see the adap_enable op). When it is set to CEC_PHYS_ADDR_INVALID, +then the CEC adapter will be disabled. If you change a valid physical address +to another valid physical address, then this function will first set the +address to CEC_PHYS_ADDR_INVALID before enabling the new physical address. + +.. c:function:: + void cec_s_phys_addr_from_edid(struct cec_adapter *adap, \ + const struct edid *edid); + +A helper function that extracts the physical address from the edid struct +and calls cec_s_phys_addr() with that address, or CEC_PHYS_ADDR_INVALID +if the EDID did not contain a physical address or edid was a NULL pointer. + +.. c:function:: + int cec_s_log_addrs(struct cec_adapter *adap, \ + struct cec_log_addrs *log_addrs, bool block); + +Claim the CEC logical addresses. Should never be called if CEC_CAP_LOG_ADDRS +is set. If block is true, then wait until the logical addresses have been +claimed, otherwise just queue it and return. To unconfigure all logical +addresses call this function with log_addrs set to NULL or with +log_addrs->num_log_addrs set to 0. The block argument is ignored when +unconfiguring. This function will just return if the physical address is +invalid. Once the physical address becomes valid, then the framework will +attempt to claim these logical addresses. + +CEC Pin framework +----------------- + +Most CEC hardware operates on full CEC messages where the software provides +the message and the hardware handles the low-level CEC protocol. But some +hardware only drives the CEC pin and software has to handle the low-level +CEC protocol. The CEC pin framework was created to handle such devices. + +Note that due to the close-to-realtime requirements it can never be guaranteed +to work 100%. This framework uses highres timers internally, but if a +timer goes off too late by more than 300 microseconds wrong results can +occur. In reality it appears to be fairly reliable. + +One advantage of this low-level implementation is that it can be used as +a cheap CEC analyser, especially if interrupts can be used to detect +CEC pin transitions from low to high or vice versa. + +.. kernel-doc:: include/media/cec-pin.h + +CEC Notifier framework +---------------------- + +Most drm HDMI implementations have an integrated CEC implementation and no +notifier support is needed. But some have independent CEC implementations +that have their own driver. This could be an IP block for an SoC or a +completely separate chip that deals with the CEC pin. For those cases a +drm driver can install a notifier and use the notifier to inform the +CEC driver about changes in the physical address. + +.. kernel-doc:: include/media/cec-notifier.h diff --git a/Documentation/driver-api/media/drivers/bttv-devel.rst b/Documentation/driver-api/media/drivers/bttv-devel.rst new file mode 100644 index 0000000000..0885a04563 --- /dev/null +++ b/Documentation/driver-api/media/drivers/bttv-devel.rst @@ -0,0 +1,116 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The bttv driver +=============== + +bttv and sound mini howto +------------------------- + +There are a lot of different bt848/849/878/879 based boards available. +Making video work often is not a big deal, because this is handled +completely by the bt8xx chip, which is common on all boards. But +sound is handled in slightly different ways on each board. + +To handle the grabber boards correctly, there is a array tvcards[] in +bttv-cards.c, which holds the information required for each board. +Sound will work only, if the correct entry is used (for video it often +makes no difference). The bttv driver prints a line to the kernel +log, telling which card type is used. Like this one:: + + bttv0: model: BT848(Hauppauge old) [autodetected] + +You should verify this is correct. If it isn't, you have to pass the +correct board type as insmod argument, ``insmod bttv card=2`` for +example. The file Documentation/admin-guide/media/bttv-cardlist.rst has a list +of valid arguments for card. + +If your card isn't listed there, you might check the source code for +new entries which are not listed yet. If there isn't one for your +card, you can check if one of the existing entries does work for you +(just trial and error...). + +Some boards have an extra processor for sound to do stereo decoding +and other nice features. The msp34xx chips are used by Hauppauge for +example. If your board has one, you might have to load a helper +module like ``msp3400`` to make sound work. If there isn't one for the +chip used on your board: Bad luck. Start writing a new one. Well, +you might want to check the video4linux mailing list archive first... + +Of course you need a correctly installed soundcard unless you have the +speakers connected directly to the grabber board. Hint: check the +mixer settings too. ALSA for example has everything muted by default. + + +How sound works in detail +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Still doesn't work? Looks like some driver hacking is required. +Below is a do-it-yourself description for you. + +The bt8xx chips have 32 general purpose pins, and registers to control +these pins. One register is the output enable register +(``BT848_GPIO_OUT_EN``), it says which pins are actively driven by the +bt848 chip. Another one is the data register (``BT848_GPIO_DATA``), where +you can get/set the status if these pins. They can be used for input +and output. + +Most grabber board vendors use these pins to control an external chip +which does the sound routing. But every board is a little different. +These pins are also used by some companies to drive remote control +receiver chips. Some boards use the i2c bus instead of the gpio pins +to connect the mux chip. + +As mentioned above, there is a array which holds the required +information for each known board. You basically have to create a new +line for your board. The important fields are these two:: + + struct tvcard + { + [ ... ] + u32 gpiomask; + u32 audiomux[6]; /* Tuner, Radio, external, internal, mute, stereo */ + }; + +gpiomask specifies which pins are used to control the audio mux chip. +The corresponding bits in the output enable register +(``BT848_GPIO_OUT_EN``) will be set as these pins must be driven by the +bt848 chip. + +The ``audiomux[]`` array holds the data values for the different inputs +(i.e. which pins must be high/low for tuner/mute/...). This will be +written to the data register (``BT848_GPIO_DATA``) to switch the audio +mux. + + +What you have to do is figure out the correct values for gpiomask and +the audiomux array. If you have Windows and the drivers four your +card installed, you might to check out if you can read these registers +values used by the windows driver. A tool to do this is available +from http://btwincap.sourceforge.net/download.html. + +You might also dig around in the ``*.ini`` files of the Windows applications. +You can have a look at the board to see which of the gpio pins are +connected at all and then start trial-and-error ... + + +Starting with release 0.7.41 bttv has a number of insmod options to +make the gpio debugging easier: + + ================= ============================================== + bttv_gpio=0/1 enable/disable gpio debug messages + gpiomask=n set the gpiomask value + audiomux=i,j,... set the values of the audiomux array + audioall=a set the values of the audiomux array (one + value for all array elements, useful to check + out which effect the particular value has). + ================= ============================================== + +The messages printed with ``bttv_gpio=1`` look like this:: + + bttv0: gpio: en=00000027, out=00000024 in=00ffffd8 [audio: off] + + en = output _en_able register (BT848_GPIO_OUT_EN) + out = _out_put bits of the data register (BT848_GPIO_DATA), + i.e. BT848_GPIO_DATA & BT848_GPIO_OUT_EN + in = _in_put bits of the data register, + i.e. BT848_GPIO_DATA & ~BT848_GPIO_OUT_EN diff --git a/Documentation/driver-api/media/drivers/ccs/ccs-regs.asc b/Documentation/driver-api/media/drivers/ccs/ccs-regs.asc new file mode 100644 index 0000000000..bbf9213c33 --- /dev/null +++ b/Documentation/driver-api/media/drivers/ccs/ccs-regs.asc @@ -0,0 +1,1041 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause +# Copyright (C) 2019--2020 Intel Corporation + +# register rflags +# - f field LSB MSB rflags +# - e enum value # after a field +# - e enum value [LSB MSB] +# - b bool bit +# - l arg name min max elsize [discontig...] +# +# rflags +# 8, 16, 32 register bits (default is 8) +# v1.1 defined in version 1.1 +# f formula +# float_ireal iReal or IEEE 754; 32 bits +# ireal unsigned iReal + +# general status registers +module_model_id 0x0000 16 +module_revision_number_major 0x0002 8 +frame_count 0x0005 8 +pixel_order 0x0006 8 +- e GRBG 0 +- e RGGB 1 +- e BGGR 2 +- e GBRG 3 +MIPI_CCS_version 0x0007 8 +- e v1_0 0x10 +- e v1_1 0x11 +- f major 4 7 +- f minor 0 3 +data_pedestal 0x0008 16 +module_manufacturer_id 0x000e 16 +module_revision_number_minor 0x0010 8 +module_date_year 0x0012 8 +module_date_month 0x0013 8 +module_date_day 0x0014 8 +module_date_phase 0x0015 8 +- f 0 2 +- e ts 0 +- e es 1 +- e cs 2 +- e mp 3 +sensor_model_id 0x0016 16 +sensor_revision_number 0x0018 8 +sensor_firmware_version 0x001a 8 +serial_number 0x001c 32 +sensor_manufacturer_id 0x0020 16 +sensor_revision_number_16 0x0022 16 + +# frame format description registers +frame_format_model_type 0x0040 8 +- e 2-byte 1 +- e 4-byte 2 +frame_format_model_subtype 0x0041 8 +- f rows 0 3 +- f columns 4 7 +frame_format_descriptor(n) 0x0042 16 f +- l n 0 14 2 +- f pixels 0 11 +- f pcode 12 15 +- e embedded 1 +- e dummy_pixel 2 +- e black_pixel 3 +- e dark_pixel 4 +- e visible_pixel 5 +- e manuf_specific_0 8 +- e manuf_specific_1 9 +- e manuf_specific_2 10 +- e manuf_specific_3 11 +- e manuf_specific_4 12 +- e manuf_specific_5 13 +- e manuf_specific_6 14 +frame_format_descriptor_4(n) 0x0060 32 f +- l n 0 7 4 +- f pixels 0 15 +- f pcode 28 31 +- e embedded 1 +- e dummy_pixel 2 +- e black_pixel 3 +- e dark_pixel 4 +- e visible_pixel 5 +- e manuf_specific_0 8 +- e manuf_specific_1 9 +- e manuf_specific_2 10 +- e manuf_specific_3 11 +- e manuf_specific_4 12 +- e manuf_specific_5 13 +- e manuf_specific_6 14 + +# analog gain description registers +analog_gain_capability 0x0080 16 +- e global 0 +- e alternate_global 2 +analog_gain_code_min 0x0084 16 +analog_gain_code_max 0x0086 16 +analog_gain_code_step 0x0088 16 +analog_gain_type 0x008a 16 +analog_gain_m0 0x008c 16 +analog_gain_c0 0x008e 16 +analog_gain_m1 0x0090 16 +analog_gain_c1 0x0092 16 +analog_linear_gain_min 0x0094 16 v1.1 +analog_linear_gain_max 0x0096 16 v1.1 +analog_linear_gain_step_size 0x0098 16 v1.1 +analog_exponential_gain_min 0x009a 16 v1.1 +analog_exponential_gain_max 0x009c 16 v1.1 +analog_exponential_gain_step_size 0x009e 16 v1.1 + +# data format description registers +data_format_model_type 0x00c0 8 +- e normal 1 +- e extended 2 +data_format_model_subtype 0x00c1 8 +- f rows 0 3 +- f columns 4 7 +data_format_descriptor(n) 0x00c2 16 f +- l n 0 15 2 +- f compressed 0 7 +- f uncompressed 8 15 + +# general set-up registers +mode_select 0x0100 8 +- e software_standby 0 +- e streaming 1 +image_orientation 0x0101 8 +- b horizontal_mirror 0 +- b vertical_flip 1 +software_reset 0x0103 8 +- e off 0 +- e on 1 +grouped_parameter_hold 0x0104 8 +mask_corrupted_frames 0x0105 8 +- e allow 0 +- e mask 1 +fast_standby_ctrl 0x0106 8 +- e complete_frames 0 +- e frame_truncation 1 +CCI_address_ctrl 0x0107 8 +2nd_CCI_if_ctrl 0x0108 8 +- b enable 0 +- b ack 1 +2nd_CCI_address_ctrl 0x0109 8 +CSI_channel_identifier 0x0110 8 +CSI_signaling_mode 0x0111 8 +- e csi_2_dphy 2 +- e csi_2_cphy 3 +CSI_data_format 0x0112 16 +CSI_lane_mode 0x0114 8 +DPCM_Frame_DT 0x011d 8 +Bottom_embedded_data_DT 0x011e 8 +Bottom_embedded_data_VC 0x011f 8 + +gain_mode 0x0120 8 +- e global 0 +- e alternate 1 +ADC_bit_depth 0x0121 8 +emb_data_ctrl 0x0122 v1.1 +- b raw8_packing_for_raw16 0 +- b raw10_packing_for_raw20 1 +- b raw12_packing_for_raw24 2 + +GPIO_TRIG_mode 0x0130 8 +extclk_frequency_mhz 0x0136 16 ireal +temp_sensor_ctrl 0x0138 8 +- b enable 0 +temp_sensor_mode 0x0139 8 +temp_sensor_output 0x013a 8 + +# integration time registers +fine_integration_time 0x0200 16 +coarse_integration_time 0x0202 16 + +# analog gain registers +analog_gain_code_global 0x0204 16 +analog_linear_gain_global 0x0206 16 v1.1 +analog_exponential_gain_global 0x0208 16 v1.1 + +# digital gain registers +digital_gain_global 0x020e 16 + +# hdr control registers +Short_analog_gain_global 0x0216 16 +Short_digital_gain_global 0x0218 16 + +HDR_mode 0x0220 8 +- b enabled 0 +- b separate_analog_gain 1 +- b upscaling 2 +- b reset_sync 3 +- b timing_mode 4 +- b exposure_ctrl_direct 5 +- b separate_digital_gain 6 +HDR_resolution_reduction 0x0221 8 +- f row 0 3 +- f column 4 7 +Exposure_ratio 0x0222 8 +HDR_internal_bit_depth 0x0223 8 +Direct_short_integration_time 0x0224 16 +Short_analog_linear_gain_global 0x0226 16 v1.1 +Short_analog_exponential_gain_global 0x0228 16 v1.1 + +# clock set-up registers +vt_pix_clk_div 0x0300 16 +vt_sys_clk_div 0x0302 16 +pre_pll_clk_div 0x0304 16 +#vt_pre_pll_clk_div 0x0304 16 +pll_multiplier 0x0306 16 +#vt_pll_multiplier 0x0306 16 +op_pix_clk_div 0x0308 16 +op_sys_clk_div 0x030a 16 +op_pre_pll_clk_div 0x030c 16 +op_pll_multiplier 0x030e 16 +pll_mode 0x0310 8 +- f 0 0 +- e single 0 +- e dual 1 +op_pix_clk_div_rev 0x0312 16 v1.1 +op_sys_clk_div_rev 0x0314 16 v1.1 + +# frame timing registers +frame_length_lines 0x0340 16 +line_length_pck 0x0342 16 + +# image size registers +x_addr_start 0x0344 16 +y_addr_start 0x0346 16 +x_addr_end 0x0348 16 +y_addr_end 0x034a 16 +x_output_size 0x034c 16 +y_output_size 0x034e 16 + +# timing mode registers +Frame_length_ctrl 0x0350 8 +- b automatic 0 +Timing_mode_ctrl 0x0352 8 +- b manual_readout 0 +- b delayed_exposure 1 +Start_readout_rs 0x0353 8 +- b manual_readout_start 0 +Frame_margin 0x0354 16 + +# sub-sampling registers +x_even_inc 0x0380 16 +x_odd_inc 0x0382 16 +y_even_inc 0x0384 16 +y_odd_inc 0x0386 16 + +# monochrome readout registers +monochrome_en 0x0390 v1.1 +- e enabled 0 + +# image scaling registers +Scaling_mode 0x0400 16 +- e no_scaling 0 +- e horizontal 1 +scale_m 0x0404 16 +scale_n 0x0406 16 +digital_crop_x_offset 0x0408 16 +digital_crop_y_offset 0x040a 16 +digital_crop_image_width 0x040c 16 +digital_crop_image_height 0x040e 16 + +# image compression registers +compression_mode 0x0500 16 +- e none 0 +- e dpcm_pcm_simple 1 + +# test pattern registers +test_pattern_mode 0x0600 16 +- e none 0 +- e solid_color 1 +- e color_bars 2 +- e fade_to_grey 3 +- e pn9 4 +- e color_tile 5 +test_data_red 0x0602 16 +test_data_greenR 0x0604 16 +test_data_blue 0x0606 16 +test_data_greenB 0x0608 16 +value_step_size_smooth 0x060a 8 +value_step_size_quantised 0x060b 8 + +# phy configuration registers +tclk_post 0x0800 8 +ths_prepare 0x0801 8 +ths_zero_min 0x0802 8 +ths_trail 0x0803 8 +tclk_trail_min 0x0804 8 +tclk_prepare 0x0805 8 +tclk_zero 0x0806 8 +tlpx 0x0807 8 +phy_ctrl 0x0808 8 +- e auto 0 +- e UI 1 +- e manual 2 +tclk_post_ex 0x080a 16 +ths_prepare_ex 0x080c 16 +ths_zero_min_ex 0x080e 16 +ths_trail_ex 0x0810 16 +tclk_trail_min_ex 0x0812 16 +tclk_prepare_ex 0x0814 16 +tclk_zero_ex 0x0816 16 +tlpx_ex 0x0818 16 + +# link rate register +requested_link_rate 0x0820 32 u16.16 + +# equalization control registers +DPHY_equalization_mode 0x0824 8 v1.1 +- b eq2 0 +PHY_equalization_ctrl 0x0825 8 v1.1 +- b enable 0 + +# d-phy preamble control registers +DPHY_preamble_ctrl 0x0826 8 v1.1 +- b enable 0 +DPHY_preamble_length 0x0826 8 v1.1 + +# d-phy spread spectrum control registers +PHY_SSC_ctrl 0x0828 8 v1.1 +- b enable 0 + +# manual lp control register +manual_LP_ctrl 0x0829 8 v1.1 +- b enable 0 + +# additional phy configuration registers +twakeup 0x082a v1.1 +tinit 0x082b v1.1 +ths_exit 0x082c v1.1 +ths_exit_ex 0x082e 16 v1.1 + +# phy calibration configuration registers +PHY_periodic_calibration_ctrl 0x0830 8 +- b frame_blanking 0 +PHY_periodic_calibration_interval 0x0831 8 +PHY_init_calibration_ctrl 0x0832 8 +- b stream_start 0 +DPHY_calibration_mode 0x0833 8 v1.1 +- b also_alternate 0 +CPHY_calibration_mode 0x0834 8 v1.1 +- e format_1 0 +- e format_2 1 +- e format_3 2 +t3_calpreamble_length 0x0835 8 v1.1 +t3_calpreamble_length_per 0x0836 8 v1.1 +t3_calaltseq_length 0x0837 8 v1.1 +t3_calaltseq_length_per 0x0838 8 v1.1 +FM2_init_seed 0x083a 16 v1.1 +t3_caludefseq_length 0x083c 16 v1.1 +t3_caludefseq_length_per 0x083e 16 v1.1 + +# c-phy manual control registers +TGR_Preamble_Length 0x0841 8 +- b preamable_prog_seq 7 +- f begin_preamble_length 0 5 +TGR_Post_Length 0x0842 8 +- f post_length 0 4 +TGR_Preamble_Prog_Sequence(n2) 0x0843 +- l n2 0 6 1 +- f symbol_n_1 3 5 +- f symbol_n 0 2 +t3_prepare 0x084e 16 +t3_lpx 0x0850 16 + +# alps control register +ALPS_ctrl 0x085a 8 +- b lvlp_dphy 0 +- b lvlp_cphy 1 +- b alp_cphy 2 + +# lrte control registers +TX_REG_CSI_EPD_EN_SSP_cphy 0x0860 16 +TX_REG_CSI_EPD_OP_SLP_cphy 0x0862 16 +TX_REG_CSI_EPD_EN_SSP_dphy 0x0864 16 +TX_REG_CSI_EPD_OP_SLP_dphy 0x0866 16 +TX_REG_CSI_EPD_MISC_OPTION_cphy 0x0868 v1.1 +TX_REG_CSI_EPD_MISC_OPTION_dphy 0x0869 v1.1 + +# scrambling control registers +Scrambling_ctrl 0x0870 +- b enabled 0 +- f 2 3 +- e 1_seed_cphy 0 +- e 4_seed_cphy 3 +lane_seed_value(seed, lane) 0x0872 16 +- l seed 0 3 0x10 +- l lane 0 7 0x2 + +# usl control registers +TX_USL_REV_ENTRY 0x08c0 16 v1.1 +TX_USL_REV_Clock_Counter 0x08c2 16 v1.1 +TX_USL_REV_LP_Counter 0x08c4 16 v1.1 +TX_USL_REV_Frame_Counter 0x08c6 16 v1.1 +TX_USL_REV_Chronological_Timer 0x08c8 16 v1.1 +TX_USL_FWD_ENTRY 0x08ca 16 v1.1 +TX_USL_GPIO 0x08cc 16 v1.1 +TX_USL_Operation 0x08ce 16 v1.1 +- b reset 0 +TX_USL_ALP_ctrl 0x08d0 16 v1.1 +- b clock_pause 0 +TX_USL_APP_BTA_ACK_TIMEOUT 0x08d2 16 v1.1 +TX_USL_SNS_BTA_ACK_TIMEOUT 0x08d2 16 v1.1 +USL_Clock_Mode_d_ctrl 0x08d2 v1.1 +- b cont_clock_standby 0 +- b cont_clock_vblank 1 +- b cont_clock_hblank 2 + +# binning configuration registers +binning_mode 0x0900 8 +binning_type 0x0901 8 +binning_weighting 0x0902 8 + +# data transfer interface registers +data_transfer_if_1_ctrl 0x0a00 8 +- b enable 0 +- b write 1 +- b clear_error 2 +data_transfer_if_1_status 0x0a01 8 +- b read_if_ready 0 +- b write_if_ready 1 +- b data_corrupted 2 +- b improper_if_usage 3 +data_transfer_if_1_page_select 0x0a02 8 +data_transfer_if_1_data(p) 0x0a04 8 f +- l p 0 63 1 + +# image processing and sensor correction configuration registers +shading_correction_en 0x0b00 8 +- b enable 0 +luminance_correction_level 0x0b01 8 +green_imbalance_filter_en 0x0b02 8 +- b enable 0 +mapped_defect_correct_en 0x0b05 8 +- b enable 0 +single_defect_correct_en 0x0b06 8 +- b enable 0 +dynamic_couplet_correct_en 0x0b08 8 +- b enable 0 +combined_defect_correct_en 0x0b0a 8 +- b enable 0 +module_specific_correction_en 0x0b0c 8 +- b enable 0 +dynamic_triplet_defect_correct_en 0x0b13 8 +- b enable 0 +NF_ctrl 0x0b15 8 +- b luma 0 +- b chroma 1 +- b combined 2 + +# optical black pixel readout registers +OB_readout_control 0x0b30 8 +- b enable 0 +- b interleaving 1 +OB_virtual_channel 0x0b31 8 +OB_DT 0x0b32 8 +OB_data_format 0x0b33 8 + +# color temperature feedback registers +color_temperature 0x0b8c 16 +absolute_gain_greenr 0x0b8e 16 +absolute_gain_red 0x0b90 16 +absolute_gain_blue 0x0b92 16 +absolute_gain_greenb 0x0b94 16 + +# cfa conversion registers +CFA_conversion_ctrl 0x0ba0 v1.1 +- b bayer_conversion_enable 0 + +# flash strobe and sa strobe control registers +flash_strobe_adjustment 0x0c12 8 +flash_strobe_start_point 0x0c14 16 +tflash_strobe_delay_rs_ctrl 0x0c16 16 +tflash_strobe_width_high_rs_ctrl 0x0c18 16 +flash_mode_rs 0x0c1a 8 +- b continuous 0 +- b truncate 1 +- b async 3 +flash_trigger_rs 0x0c1b 8 +flash_status 0x0c1c 8 +- b retimed 0 +sa_strobe_mode 0x0c1d 8 +- b continuous 0 +- b truncate 1 +- b async 3 +- b adjust_edge 4 +sa_strobe_start_point 0x0c1e 16 +tsa_strobe_delay_ctrl 0x0c20 16 +tsa_strobe_width_ctrl 0x0c22 16 +sa_strobe_trigger 0x0c24 8 +sa_strobe_status 0x0c25 8 +- b retimed 0 +tSA_strobe_re_delay_ctrl 0x0c30 16 +tSA_strobe_fe_delay_ctrl 0x0c32 16 + +# pdaf control registers +PDAF_ctrl 0x0d00 16 +- b enable 0 +- b processed 1 +- b interleaved 2 +- b visible_pdaf_correction 3 +PDAF_VC 0x0d02 8 +PDAF_DT 0x0d03 8 +pd_x_addr_start 0x0d04 16 +pd_y_addr_start 0x0d06 16 +pd_x_addr_end 0x0d08 16 +pd_y_addr_end 0x0d0a 16 + +# bracketing interface configuration registers +bracketing_LUT_ctrl 0x0e00 8 +bracketing_LUT_mode 0x0e01 8 +- b continue_streaming 0 +- b loop_mode 1 +bracketing_LUT_entry_ctrl 0x0e02 8 +bracketing_LUT_frame(n) 0x0e10 v1.1 f +- l n 0 0xef 1 + +# integration time and gain parameter limit registers +integration_time_capability 0x1000 16 +- b fine 0 +coarse_integration_time_min 0x1004 16 +coarse_integration_time_max_margin 0x1006 16 +fine_integration_time_min 0x1008 16 +fine_integration_time_max_margin 0x100a 16 + +# digital gain parameter limit registers +digital_gain_capability 0x1081 +- e none 0 +- e global 2 +digital_gain_min 0x1084 16 +digital_gain_max 0x1086 16 +digital_gain_step_size 0x1088 16 + +# data pedestal capability registers +Pedestal_capability 0x10e0 8 v1.1 + +# adc capability registers +ADC_capability 0x10f0 8 +- b bit_depth_ctrl 0 +ADC_bit_depth_capability 0x10f4 32 v1.1 + +# video timing parameter limit registers +min_ext_clk_freq_mhz 0x1100 32 float_ireal +max_ext_clk_freq_mhz 0x1104 32 float_ireal +min_pre_pll_clk_div 0x1108 16 +# min_vt_pre_pll_clk_div 0x1108 16 +max_pre_pll_clk_div 0x110a 16 +# max_vt_pre_pll_clk_div 0x110a 16 +min_pll_ip_clk_freq_mhz 0x110c 32 float_ireal +# min_vt_pll_ip_clk_freq_mhz 0x110c 32 float_ireal +max_pll_ip_clk_freq_mhz 0x1110 32 float_ireal +# max_vt_pll_ip_clk_freq_mhz 0x1110 32 float_ireal +min_pll_multiplier 0x1114 16 +# min_vt_pll_multiplier 0x1114 16 +max_pll_multiplier 0x1116 16 +# max_vt_pll_multiplier 0x1116 16 +min_pll_op_clk_freq_mhz 0x1118 32 float_ireal +max_pll_op_clk_freq_mhz 0x111c 32 float_ireal + +# video timing set-up capability registers +min_vt_sys_clk_div 0x1120 16 +max_vt_sys_clk_div 0x1122 16 +min_vt_sys_clk_freq_mhz 0x1124 32 float_ireal +max_vt_sys_clk_freq_mhz 0x1128 32 float_ireal +min_vt_pix_clk_freq_mhz 0x112c 32 float_ireal +max_vt_pix_clk_freq_mhz 0x1130 32 float_ireal +min_vt_pix_clk_div 0x1134 16 +max_vt_pix_clk_div 0x1136 16 +clock_calculation 0x1138 +- b lane_speed 0 +- b link_decoupled 1 +- b dual_pll_op_sys_ddr 2 +- b dual_pll_op_pix_ddr 3 +num_of_vt_lanes 0x1139 +num_of_op_lanes 0x113a +op_bits_per_lane 0x113b 8 v1.1 + +# frame timing parameter limits +min_frame_length_lines 0x1140 16 +max_frame_length_lines 0x1142 16 +min_line_length_pck 0x1144 16 +max_line_length_pck 0x1146 16 +min_line_blanking_pck 0x1148 16 +min_frame_blanking_lines 0x114a 16 +min_line_length_pck_step_size 0x114c +timing_mode_capability 0x114d +- b auto_frame_length 0 +- b rolling_shutter_manual_readout 2 +- b delayed_exposure_start 3 +- b manual_exposure_embedded_data 4 +frame_margin_max_value 0x114e 16 +frame_margin_min_value 0x1150 +gain_delay_type 0x1151 +- e fixed 0 +- e variable 1 + +# output clock set-up capability registers +min_op_sys_clk_div 0x1160 16 +max_op_sys_clk_div 0x1162 16 +min_op_sys_clk_freq_mhz 0x1164 32 float_ireal +max_op_sys_clk_freq_mhz 0x1168 32 float_ireal +min_op_pix_clk_div 0x116c 16 +max_op_pix_clk_div 0x116e 16 +min_op_pix_clk_freq_mhz 0x1170 32 float_ireal +max_op_pix_clk_freq_mhz 0x1174 32 float_ireal + +# image size parameter limit registers +x_addr_min 0x1180 16 +y_addr_min 0x1182 16 +x_addr_max 0x1184 16 +y_addr_max 0x1186 16 +min_x_output_size 0x1188 16 +min_y_output_size 0x118a 16 +max_x_output_size 0x118c 16 +max_y_output_size 0x118e 16 + +x_addr_start_div_constant 0x1190 v1.1 +y_addr_start_div_constant 0x1191 v1.1 +x_addr_end_div_constant 0x1192 v1.1 +y_addr_end_div_constant 0x1193 v1.1 +x_size_div 0x1194 v1.1 +y_size_div 0x1195 v1.1 +x_output_div 0x1196 v1.1 +y_output_div 0x1197 v1.1 +non_flexible_resolution_support 0x1198 v1.1 +- b new_pix_addr 0 +- b new_output_res 1 +- b output_crop_no_pad 2 +- b output_size_lane_dep 3 + +min_op_pre_pll_clk_div 0x11a0 16 +max_op_pre_pll_clk_div 0x11a2 16 +min_op_pll_ip_clk_freq_mhz 0x11a4 32 float_ireal +max_op_pll_ip_clk_freq_mhz 0x11a8 32 float_ireal +min_op_pll_multiplier 0x11ac 16 +max_op_pll_multiplier 0x11ae 16 +min_op_pll_op_clk_freq_mhz 0x11b0 32 float_ireal +max_op_pll_op_clk_freq_mhz 0x11b4 32 float_ireal +clock_tree_pll_capability 0x11b8 8 +- b dual_pll 0 +- b single_pll 1 +- b ext_divider 2 +- b flexible_op_pix_clk_div 3 +clock_capa_type_capability 0x11b9 v1.1 +- b ireal 0 + +# sub-sampling parameters limit registers +min_even_inc 0x11c0 16 +min_odd_inc 0x11c2 16 +max_even_inc 0x11c4 16 +max_odd_inc 0x11c6 16 +aux_subsamp_capability 0x11c8 v1.1 +- b factor_power_of_2 1 +aux_subsamp_mono_capability 0x11c9 v1.1 +- b factor_power_of_2 1 +monochrome_capability 0x11ca v1.1 +- e inc_odd 0 +- e inc_even 1 +pixel_readout_capability 0x11cb v1.1 +- e bayer 0 +- e monochrome 1 +- e bayer_and_mono 2 +min_even_inc_mono 0x11cc 16 v1.1 +max_even_inc_mono 0x11ce 16 v1.1 +min_odd_inc_mono 0x11d0 16 v1.1 +max_odd_inc_mono 0x11d2 16 v1.1 +min_even_inc_bc2 0x11d4 16 v1.1 +max_even_inc_bc2 0x11d6 16 v1.1 +min_odd_inc_bc2 0x11d8 16 v1.1 +max_odd_inc_bc2 0x11da 16 v1.1 +min_even_inc_mono_bc2 0x11dc 16 v1.1 +max_even_inc_mono_bc2 0x11de 16 v1.1 +min_odd_inc_mono_bc2 0x11f0 16 v1.1 +max_odd_inc_mono_bc2 0x11f2 16 v1.1 + +# image scaling limit parameters +scaling_capability 0x1200 16 +- e none 0 +- e horizontal 1 +- e reserved 2 +scaler_m_min 0x1204 16 +scaler_m_max 0x1206 16 +scaler_n_min 0x1208 16 +scaler_n_max 0x120a 16 +digital_crop_capability 0x120e +- e none 0 +- e input_crop 1 + +# hdr limit registers +hdr_capability_1 0x1210 +- b 2x2_binning 0 +- b combined_analog_gain 1 +- b separate_analog_gain 2 +- b upscaling 3 +- b reset_sync 4 +- b direct_short_exp_timing 5 +- b direct_short_exp_synthesis 6 +min_hdr_bit_depth 0x1211 +hdr_resolution_sub_types 0x1212 +hdr_resolution_sub_type(n) 0x1213 +- l n 0 1 1 +- f row 0 3 +- f column 4 7 +hdr_capability_2 0x121b +- b combined_digital_gain 0 +- b separate_digital_gain 1 +- b timing_mode 3 +- b synthesis_mode 4 +max_hdr_bit_depth 0x121c + +# usl capability register +usl_support_capability 0x1230 v1.1 +- b clock_tree 0 +- b rev_clock_tree 1 +- b rev_clock_calc 2 +usl_clock_mode_d_capability 0x1231 v1.1 +- b cont_clock_standby 0 +- b cont_clock_vblank 1 +- b cont_clock_hblank 2 +- b noncont_clock_standby 3 +- b noncont_clock_vblank 4 +- b noncont_clock_hblank 5 +min_op_sys_clk_div_rev 0x1234 v1.1 +max_op_sys_clk_div_rev 0x1236 v1.1 +min_op_pix_clk_div_rev 0x1238 v1.1 +max_op_pix_clk_div_rev 0x123a v1.1 +min_op_sys_clk_freq_rev_mhz 0x123c 32 v1.1 float_ireal +max_op_sys_clk_freq_rev_mhz 0x1240 32 v1.1 float_ireal +min_op_pix_clk_freq_rev_mhz 0x1244 32 v1.1 float_ireal +max_op_pix_clk_freq_rev_mhz 0x1248 32 v1.1 float_ireal +max_bitrate_rev_d_mode_mbps 0x124c 32 v1.1 ireal +max_symrate_rev_c_mode_msps 0x1250 32 v1.1 ireal + +# image compression capability registers +compression_capability 0x1300 +- b dpcm_pcm_simple 0 + +# test mode capability registers +test_mode_capability 0x1310 16 +- b solid_color 0 +- b color_bars 1 +- b fade_to_grey 2 +- b pn9 3 +- b color_tile 5 +pn9_data_format1 0x1312 +pn9_data_format2 0x1313 +pn9_data_format3 0x1314 +pn9_data_format4 0x1315 +pn9_misc_capability 0x1316 +- f num_pixels 0 2 +- b compression 3 +test_pattern_capability 0x1317 v1.1 +- b no_repeat 1 +pattern_size_div_m1 0x1318 v1.1 + +# fifo capability registers +fifo_support_capability 0x1502 +- e none 0 +- e derating 1 +- e derating_overrating 2 + +# csi-2 capability registers +phy_ctrl_capability 0x1600 +- b auto_phy_ctl 0 +- b ui_phy_ctl 1 +- b dphy_time_ui_reg_1_ctl 2 +- b dphy_time_ui_reg_2_ctl 3 +- b dphy_time_ctl 4 +- b dphy_ext_time_ui_reg_1_ctl 5 +- b dphy_ext_time_ui_reg_2_ctl 6 +- b dphy_ext_time_ctl 7 +csi_dphy_lane_mode_capability 0x1601 +- b 1_lane 0 +- b 2_lane 1 +- b 3_lane 2 +- b 4_lane 3 +- b 5_lane 4 +- b 6_lane 5 +- b 7_lane 6 +- b 8_lane 7 +csi_signaling_mode_capability 0x1602 +- b csi_dphy 2 +- b csi_cphy 3 +fast_standby_capability 0x1603 +- e no_frame_truncation 0 +- e frame_truncation 1 +csi_address_control_capability 0x1604 +- b cci_addr_change 0 +- b 2nd_cci_addr 1 +- b sw_changeable_2nd_cci_addr 2 +data_type_capability 0x1605 +- b dpcm_programmable 0 +- b bottom_embedded_dt_programmable 1 +- b bottom_embedded_vc_programmable 2 +- b ext_vc_range 3 +csi_cphy_lane_mode_capability 0x1606 +- b 1_lane 0 +- b 2_lane 1 +- b 3_lane 2 +- b 4_lane 3 +- b 5_lane 4 +- b 6_lane 5 +- b 7_lane 6 +- b 8_lane 7 +emb_data_capability 0x1607 v1.1 +- b two_bytes_per_raw16 0 +- b two_bytes_per_raw20 1 +- b two_bytes_per_raw24 2 +- b no_one_byte_per_raw16 3 +- b no_one_byte_per_raw20 4 +- b no_one_byte_per_raw24 5 +max_per_lane_bitrate_lane_d_mode_mbps(n) 0x1608 32 ireal +- l n 0 7 4 4,0x32 +temp_sensor_capability 0x1618 +- b supported 0 +- b CCS_format 1 +- b reset_0x80 2 +max_per_lane_bitrate_lane_c_mode_mbps(n) 0x161a 32 ireal +- l n 0 7 4 4,0x30 +dphy_equalization_capability 0x162b +- b equalization_ctrl 0 +- b eq1 1 +- b eq2 2 +cphy_equalization_capability 0x162c +- b equalization_ctrl 0 +dphy_preamble_capability 0x162d +- b preamble_seq_ctrl 0 +dphy_ssc_capability 0x162e +- b supported 0 +cphy_calibration_capability 0x162f +- b manual 0 +- b manual_streaming 1 +- b format_1_ctrl 2 +- b format_2_ctrl 3 +- b format_3_ctrl 4 +dphy_calibration_capability 0x1630 +- b manual 0 +- b manual_streaming 1 +- b alternate_seq 2 +phy_ctrl_capability_2 0x1631 +- b tgr_length 0 +- b tgr_preamble_prog_seq 1 +- b extra_cphy_manual_timing 2 +- b clock_based_manual_cdphy 3 +- b clock_based_manual_dphy 4 +- b clock_based_manual_cphy 5 +- b manual_lp_dphy 6 +- b manual_lp_cphy 7 +lrte_cphy_capability 0x1632 +- b pdq_short 0 +- b spacer_short 1 +- b pdq_long 2 +- b spacer_long 3 +- b spacer_no_pdq 4 +lrte_dphy_capability 0x1633 +- b pdq_short_opt1 0 +- b spacer_short_opt1 1 +- b pdq_long_opt1 2 +- b spacer_long_opt1 3 +- b spacer_short_opt2 4 +- b spacer_long_opt2 5 +- b spacer_no_pdq_opt1 6 +- b spacer_variable_opt2 7 +alps_capability_dphy 0x1634 +- e lvlp_not_supported 0 0x3 +- e lvlp_supported 1 0x3 +- e controllable_lvlp 2 0x3 +alps_capability_cphy 0x1635 +- e lvlp_not_supported 0 0x3 +- e lvlp_supported 1 0x3 +- e controllable_lvlp 2 0x3 +- e alp_not_supported 0xc 0xc +- e alp_supported 0xd 0xc +- e controllable_alp 0xe 0xc +scrambling_capability 0x1636 +- b scrambling_supported 0 +- f max_seeds_per_lane_c 1 2 +- e 1 0 +- e 4 3 +- f num_seed_regs 3 5 +- e 0 0 +- e 1 1 +- e 4 4 +- b num_seed_per_lane 6 +dphy_manual_constant 0x1637 +cphy_manual_constant 0x1638 +CSI2_interface_capability_misc 0x1639 v1.1 +- b eotp_short_pkt_opt2 0 +PHY_ctrl_capability_3 0x165c v1.1 +- b dphy_timing_not_multiple 0 +- b dphy_min_timing_value_1 1 +- b twakeup_supported 2 +- b tinit_supported 3 +- b ths_exit_supported 4 +- b cphy_timing_not_multiple 5 +- b cphy_min_timing_value_1 6 +dphy_sf 0x165d v1.1 +cphy_sf 0x165e v1.1 +- f twakeup 0 3 +- f tinit 4 7 +dphy_limits_1 0x165f v1.1 +- f ths_prepare 0 3 +- f ths_zero 4 7 +dphy_limits_2 0x1660 v1.1 +- f ths_trail 0 3 +- f tclk_trail_min 4 7 +dphy_limits_3 0x1661 v1.1 +- f tclk_prepare 0 3 +- f tclk_zero 4 7 +dphy_limits_4 0x1662 v1.1 +- f tclk_post 0 3 +- f tlpx 4 7 +dphy_limits_5 0x1663 v1.1 +- f ths_exit 0 3 +- f twakeup 4 7 +dphy_limits_6 0x1664 v1.1 +- f tinit 0 3 +cphy_limits_1 0x1665 v1.1 +- f t3_prepare_max 0 3 +- f t3_lpx_max 4 7 +cphy_limits_2 0x1666 v1.1 +- f ths_exit_max 0 3 +- f twakeup_max 4 7 +cphy_limits_3 0x1667 v1.1 +- f tinit_max 0 3 + +# binning capability registers +min_frame_length_lines_bin 0x1700 16 +max_frame_length_lines_bin 0x1702 16 +min_line_length_pck_bin 0x1704 16 +max_line_length_pck_bin 0x1706 16 +min_line_blanking_pck_bin 0x1708 16 +fine_integration_time_min_bin 0x170a 16 +fine_integration_time_max_margin_bin 0x170c 16 +binning_capability 0x1710 +- e unsupported 0 +- e binning_then_subsampling 1 +- e subsampling_then_binning 2 +binning_weighting_capability 0x1711 +- b averaged 0 +- b summed 1 +- b bayer_corrected 2 +- b module_specific_weight 3 +binning_sub_types 0x1712 +binning_sub_type(n) 0x1713 +- l n 0 63 1 +- f row 0 3 +- f column 4 7 +binning_weighting_mono_capability 0x1771 v1.1 +- b averaged 0 +- b summed 1 +- b bayer_corrected 2 +- b module_specific_weight 3 +binning_sub_types_mono 0x1772 v1.1 +binning_sub_type_mono(n) 0x1773 v1.1 f +- l n 0 63 1 + +# data transfer interface capability registers +data_transfer_if_capability 0x1800 +- b supported 0 +- b polling 2 + +# sensor correction capability registers +shading_correction_capability 0x1900 +- b color_shading 0 +- b luminance_correction 1 +green_imbalance_capability 0x1901 +- b supported 0 +module_specific_correction_capability 0x1903 +defect_correction_capability 0x1904 16 +- b mapped_defect 0 +- b dynamic_couplet 2 +- b dynamic_single 5 +- b combined_dynamic 8 +defect_correction_capability_2 0x1906 16 +- b dynamic_triplet 3 +nf_capability 0x1908 +- b luma 0 +- b chroma 1 +- b combined 2 + +# optical black readout capability registers +ob_readout_capability 0x1980 +- b controllable_readout 0 +- b visible_pixel_readout 1 +- b different_vc_readout 2 +- b different_dt_readout 3 +- b prog_data_format 4 + +# color feedback capability registers +color_feedback_capability 0x1987 +- b kelvin 0 +- b awb_gain 1 + +# cfa pattern capability registers +CFA_pattern_capability 0x1990 v1.1 +- e bayer 0 +- e monochrome 1 +- e 4x4_quad_bayer 2 +- e vendor_specific 3 +CFA_pattern_conversion_capability 0x1991 v1.1 +- b bayer 0 + +# timer capability registers +flash_mode_capability 0x1a02 +- b single_strobe 0 +sa_strobe_mode_capability 0x1a03 +- b fixed_width 0 +- b edge_ctrl 1 + +# soft reset capability registers +reset_max_delay 0x1a10 v1.1 +reset_min_time 0x1a11 v1.1 + +# pdaf capability registers +pdaf_capability_1 0x1b80 +- b supported 0 +- b processed_bottom_embedded 1 +- b processed_interleaved 2 +- b raw_bottom_embedded 3 +- b raw_interleaved 4 +- b visible_pdaf_correction 5 +- b vc_interleaving 6 +- b dt_interleaving 7 +pdaf_capability_2 0x1b81 +- b ROI 0 +- b after_digital_crop 1 +- b ctrl_retimed 2 + +# bracketing interface capability registers +bracketing_lut_capability_1 0x1c00 +- b coarse_integration 0 +- b global_analog_gain 1 +- b flash 4 +- b global_digital_gain 5 +- b alternate_global_analog_gain 6 +bracketing_lut_capability_2 0x1c01 +- b single_bracketing_mode 0 +- b looped_bracketing_mode 1 +bracketing_lut_size 0x1c02 diff --git a/Documentation/driver-api/media/drivers/ccs/ccs.rst b/Documentation/driver-api/media/drivers/ccs/ccs.rst new file mode 100644 index 0000000000..7389204afc --- /dev/null +++ b/Documentation/driver-api/media/drivers/ccs/ccs.rst @@ -0,0 +1,117 @@ +.. SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause + +.. include:: <isonum.txt> + +MIPI CCS camera sensor driver +============================= + +The MIPI CCS camera sensor driver is a generic driver for `MIPI CCS +<https://www.mipi.org/specifications/camera-command-set>`_ compliant +camera sensors. It exposes three sub-devices representing the pixel array, +the binner and the scaler. + +As the capabilities of individual devices vary, the driver exposes +interfaces based on the capabilities that exist in hardware. + +Pixel Array sub-device +---------------------- + +The pixel array sub-device represents the camera sensor's pixel matrix, as well +as analogue crop functionality present in many compliant devices. The analogue +crop is configured using the ``V4L2_SEL_TGT_CROP`` on the source pad (0) of the +entity. The size of the pixel matrix can be obtained by getting the +``V4L2_SEL_TGT_NATIVE_SIZE`` target. + +Binner +------ + +The binner sub-device represents the binning functionality on the sensor. For +that purpose, selection target ``V4L2_SEL_TGT_COMPOSE`` is supported on the +sink pad (0). + +Additionally, if a device has no scaler or digital crop functionality, the +source pad (1) expses another digital crop selection rectangle that can only +crop at the end of the lines and frames. + +Scaler +------ + +The scaler sub-device represents the digital crop and scaling functionality of +the sensor. The V4L2 selection target ``V4L2_SEL_TGT_CROP`` is used to +configure the digital crop on the sink pad (0) when digital crop is supported. +Scaling is configured using selection target ``V4L2_SEL_TGT_COMPOSE`` on the +sink pad (0) as well. + +Additionally, if the scaler sub-device exists, its source pad (1) exposes +another digital crop selection rectangle that can only crop at the end of the +lines and frames. + +Digital and analogue crop +------------------------- + +Digital crop functionality is referred to as cropping that effectively works by +dropping some data on the floor. Analogue crop, on the other hand, means that +the cropped information is never retrieved. In case of camera sensors, the +analogue data is never read from the pixel matrix that are outside the +configured selection rectangle that designates crop. The difference has an +effect in device timing and likely also in power consumption. + +CCS static data +--------------- + +The MIPI CCS driver supports CCS static data for all compliant devices, +including not just those compliant with CCS 1.1 but also CCS 1.0 and SMIA(++). +For CCS the file names are formed as + + ccs/ccs-sensor-vvvv-mmmm-rrrr.fw (sensor) and + ccs/ccs-module-vvvv-mmmm-rrrr.fw (module). + +For SMIA++ compliant devices the corresponding file names are + + ccs/smiapp-sensor-vv-mmmm-rr.fw (sensor) and + ccs/smiapp-module-vv-mmmm-rrrr.fw (module). + +For SMIA (non-++) compliant devices the static data file name is + + ccs/smia-sensor-vv-mmmm-rr.fw (sensor). + +vvvv or vv denotes MIPI and SMIA manufacturer IDs respectively, mmmm model ID +and rrrr or rr revision number. + +Register definition generator +----------------------------- + +The ccs-regs.asc file contains MIPI CCS register definitions that are used +to produce C source code files for definitions that can be better used by +programs written in C language. As there are many dependencies between the +produced files, please do not modify them manually as it's error-prone and +in vain, but instead change the script producing them. + +Usage +~~~~~ + +Conventionally the script is called this way to update the CCS driver +definitions: + +.. code-block:: none + + $ Documentation/driver-api/media/drivers/ccs/mk-ccs-regs -k \ + -e drivers/media/i2c/ccs/ccs-regs.h \ + -L drivers/media/i2c/ccs/ccs-limits.h \ + -l drivers/media/i2c/ccs/ccs-limits.c \ + -c Documentation/driver-api/media/drivers/ccs/ccs-regs.asc + +CCS PLL calculator +================== + +The CCS PLL calculator is used to compute the PLL configuration, given sensor's +capabilities as well as board configuration and user specified configuration. As +the configuration space that encompasses all these configurations is vast, the +PLL calculator isn't entirely trivial. Yet it is relatively simple to use for a +driver. + +The PLL model implemented by the PLL calculator corresponds to MIPI CCS 1.1. + +.. kernel-doc:: drivers/media/i2c/ccs-pll.h + +**Copyright** |copy| 2020 Intel Corporation diff --git a/Documentation/driver-api/media/drivers/ccs/mk-ccs-regs b/Documentation/driver-api/media/drivers/ccs/mk-ccs-regs new file mode 100755 index 0000000000..2a4edc7e05 --- /dev/null +++ b/Documentation/driver-api/media/drivers/ccs/mk-ccs-regs @@ -0,0 +1,434 @@ +#!/usr/bin/perl -w +# SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause +# Copyright (C) 2019--2020 Intel Corporation + +use Getopt::Long qw(:config no_ignore_case); +use File::Basename; + +my $ccsregs = "ccs-regs.asc"; +my $header; +my $regarray; +my $limitc; +my $limith; +my $kernel; +my $help; + +GetOptions("ccsregs|c=s" => \$ccsregs, + "header|e=s" => \$header, + "regarray|r=s" => \$regarray, + "limitc|l=s" => \$limitc, + "limith|L=s" => \$limith, + "kernel|k" => \$kernel, + "help|h" => \$help) or die "can't parse options"; + +$help = 1 if ! defined $header || ! defined $limitc || ! defined $limith; + +if (defined $help) { + print <<EOH +$0 - Create CCS register definitions for C + +usage: $0 -c ccs-regs.asc -e header -r regarray -l limit-c -L limit-header [-k] + + -c ccs register file + -e header file name + -r register description array file name + -l limit and capability array file name + -L limit and capability header file name + -k generate files for kernel space consumption +EOH + ; + exit 0; +} + +my $lh_hdr = ! defined $kernel + ? '#include "ccs-os.h"' . "\n" + : "#include <linux/bits.h>\n#include <linux/types.h>\n"; +my $uint32_t = ! defined $kernel ? 'uint32_t' : 'u32'; +my $uint16_t = ! defined $kernel ? 'uint16_t' : 'u16'; + +open(my $R, "< $ccsregs") or die "can't open $ccsregs"; + +open(my $H, "> $header") or die "can't open $header"; +my $A; +if (defined $regarray) { + open($A, "> $regarray") or die "can't open $regarray"; +} +open(my $LC, "> $limitc") or die "can't open $limitc"; +open(my $LH, "> $limith") or die "can't open $limith"; + +my %this; + +sub is_limit_reg($) { + my $addr = hex $_[0]; + + return 0 if $addr < 0x40; # weed out status registers + return 0 if $addr >= 0x100 && $addr < 0xfff; # weed out configuration registers + + return 1; +} + +my $uc_header = basename uc $header; +$uc_header =~ s/[^A-Z0-9]/_/g; + +my $copyright = "/* Copyright (C) 2019--2020 Intel Corporation */\n"; +my $license = "SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause"; +my $note = "/*\n * Generated by $0;\n * do not modify.\n */\n"; + +for my $fh ($A, $LC) { + print $fh "// $license\n$copyright$note\n" if defined $fh; +} + +for my $fh ($H, $LH) { + print $fh "/* $license */\n$copyright$note\n"; +} + +sub bit_def($) { + my $bit = shift @_; + + return "BIT($bit)" if defined $kernel; + return "(1U << $bit)" if $bit =~ /^[a-zA-Z0-9_]+$/; + return "(1U << ($bit))"; +} + +print $H <<EOF +#ifndef __${uc_header}__ +#define __${uc_header}__ + +EOF + ; + +print $H "#include <linux/bits.h>\n\n" if defined $kernel; + +print $H <<EOF +#define CCS_FL_BASE 16 +EOF + ; + +print $H "#define CCS_FL_16BIT " . bit_def("CCS_FL_BASE") . "\n"; +print $H "#define CCS_FL_32BIT " . bit_def("CCS_FL_BASE + 1") . "\n"; +print $H "#define CCS_FL_FLOAT_IREAL " . bit_def("CCS_FL_BASE + 2") . "\n"; +print $H "#define CCS_FL_IREAL " . bit_def("CCS_FL_BASE + 3") . "\n"; + +print $H <<EOF +#define CCS_R_ADDR(r) ((r) & 0xffff) + +EOF + ; + +print $A <<EOF +#include <stdint.h> +#include <stdio.h> +#include "ccs-extra.h" +#include "ccs-regs.h" + +EOF + if defined $A; + +my $uc_limith = basename uc $limith; +$uc_limith =~ s/[^A-Z0-9]/_/g; + +print $LH <<EOF +#ifndef __${uc_limith}__ +#define __${uc_limith}__ + +$lh_hdr +struct ccs_limit { + $uint32_t reg; + $uint16_t size; + $uint16_t flags; + const char *name; +}; + +EOF + ; +print $LH "#define CCS_L_FL_SAME_REG " . bit_def(0) . "\n\n"; + +print $LH <<EOF +extern const struct ccs_limit ccs_limits[]; + +EOF + ; + +print $LC <<EOF +#include "ccs-limits.h" +#include "ccs-regs.h" + +const struct ccs_limit ccs_limits[] = { +EOF + ; + +my $limitcount = 0; +my $argdescs; +my $reglist = "const struct ccs_reg_desc ccs_reg_desc[] = {\n"; + +sub name_split($$) { + my ($name, $addr) = @_; + my $args; + + $name =~ /([^\(]+?)(\(.*)/; + ($name, $args) = ($1, $2); + $args = [split /,\s*/, $args]; + foreach my $t (@$args) { + $t =~ s/[\(\)]//g; + $t =~ s/\//\\\//g; + } + + return ($name, $addr, $args); +} + +sub tabconv($) { + $_ = shift; + + my @l = split "\n", $_; + + map { + s/ {8,8}/\t/g; + s/\t\K +//; + } @l; + + return (join "\n", @l) . "\n"; +} + +sub elem_size(@) { + my @flags = @_; + + return 2 if grep /^16$/, @flags; + return 4 if grep /^32$/, @flags; + return 1; +} + +sub arr_size($) { + my $this = $_[0]; + my $size = $this->{elsize}; + my $h = $this->{argparams}; + + foreach my $arg (@{$this->{args}}) { + my $apref = $h->{$arg}; + + $size *= $apref->{max} - $apref->{min} + 1; + } + + return $size; +} + +sub print_args($$$) { + my ($this, $postfix, $is_same_reg) = @_; + my ($args, $argparams, $name) = + ($this->{args}, $this->{argparams}, $this->{name}); + my $varname = "ccs_reg_arg_" . (lc $name) . $postfix; + my @mins; + my @sorted_args = @{$this->{sorted_args}}; + my $lim_arg; + my $size = arr_size($this); + + $argdescs .= "static const struct ccs_reg_arg " . $varname . "[] = {\n"; + + foreach my $sorted_arg (@sorted_args) { + push @mins, $argparams->{$sorted_arg}->{min}; + } + + foreach my $sorted_arg (@sorted_args) { + my $h = $argparams->{$sorted_arg}; + + $argdescs .= "\t{ \"$sorted_arg\", $h->{min}, $h->{max}, $h->{elsize} },\n"; + + $lim_arg .= defined $lim_arg ? ", $h->{min}" : "$h->{min}"; + } + + $argdescs .= "};\n\n"; + + $reglist .= "\t{ CCS_R_" . (uc $name) . "(" . (join ",", (@mins)) . + "), $size, sizeof($varname) / sizeof(*$varname)," . + " \"" . (lc $name) . "\", $varname },\n"; + + print $LC tabconv sprintf "\t{ CCS_R_" . (uc $name) . "($lim_arg), " . + $size . ", " . ($is_same_reg ? "CCS_L_FL_SAME_REG" : "0") . + ", \"$name" . (defined $this->{discontig} ? " $lim_arg" : "") . "\" },\n" + if is_limit_reg $this->{base_addr}; +} + +my $hdr_data; + +while (<$R>) { + chop; + s/^\s*//; + next if /^[#;]/ || /^$/; + if (s/^-\s*//) { + if (s/^b\s*//) { + my ($bit, $addr) = split /\t+/; + $bit = uc $bit; + $hdr_data .= sprintf "#define %-62s %s", "CCS_" . (uc ${this{name}}) ."_$bit", bit_def($addr) . "\n"; + } elsif (s/^f\s*//) { + s/[,\.-]/_/g; + my @a = split /\s+/; + my ($msb, $lsb, $this_field) = reverse @a; + @a = ( { "name" => "SHIFT", "addr" => $lsb, "fmt" => "%uU", }, + { "name" => "MASK", "addr" => (1 << ($msb + 1)) - 1 - ((1 << $lsb) - 1), "fmt" => "0x%" . join(".", ($this{"elsize"} >> 2) x 2) . "x" } ); + $this{"field"} = $this_field; + foreach my $ar (@a) { + #print $ar->{fmt}."\n"; + $hdr_data .= sprintf "#define %-62s " . $ar->{"fmt"} . "\n", "CCS_" . (uc $this{"name"}) . (defined $this_field ? "_" . uc $this_field : "") . "_" . $ar->{"name"}, $ar->{"addr"} . "\n"; + } + } elsif (s/^e\s*//) { + s/[,\.-]/_/g; + my ($enum, $addr) = split /\s+/; + $enum = uc $enum; + $hdr_data .= sprintf "#define %-62s %s", "CCS_" . (uc ${this{name}}) . (defined $this{"field"} ? "_" . uc $this{"field"} : "") ."_$enum", $addr . ($addr =~ /0x/i ? "" : "U") . "\n"; + } elsif (s/^l\s*//) { + my ($arg, $min, $max, $elsize, @discontig) = split /\s+/; + my $size; + + foreach my $num ($min, $max) { + $num = hex $num if $num =~ /0x/i; + } + + $hdr_data .= sprintf "#define %-62s %s", "CCS_LIM_" . (uc ${this{name}} . "_MIN_$arg"), $min . ($min =~ /0x/i ? "" : "U") . "\n"; + $hdr_data .= sprintf "#define %-62s %s", "CCS_LIM_" . (uc ${this{name}} . "_MAX_$arg"), $max . ($max =~ /0x/i ? "" : "U") . "\n"; + + my $h = $this{argparams}; + + $h->{$arg} = { "min" => $min, + "max" => $max, + "elsize" => $elsize =~ /^0x/ ? hex $elsize : $elsize, + "discontig" => \@discontig }; + + $this{discontig} = $arg if @discontig; + + next if $#{$this{args}} + 1 != scalar keys %{$this{argparams}}; + + my $reg_formula = "($this{addr}"; + my $lim_formula; + + foreach my $arg (@{$this{args}}) { + my $d = $h->{$arg}->{discontig}; + my $times = $h->{$arg}->{elsize} != 1 ? + " * " . $h->{$arg}->{elsize} : ""; + + if (@$d) { + my ($lim, $offset) = split /,/, $d->[0]; + + $reg_formula .= " + (($arg) < $lim ? ($arg)$times : $offset + (($arg) - $lim)$times)"; + } else { + $reg_formula .= " + ($arg)$times"; + } + + $lim_formula .= (defined $lim_formula ? " + " : "") . "($arg)$times"; + } + + $reg_formula .= ")\n"; + $lim_formula =~ s/^\(([a-z0-9]+)\)$/$1/i; + + print $H tabconv sprintf("#define %-62s %s", "CCS_R_" . (uc $this{name}) . + $this{arglist}, $reg_formula); + + print $H tabconv $hdr_data; + undef $hdr_data; + + # Sort arguments in descending order by size + @{$this{sorted_args}} = sort { + $h->{$a}->{elsize} <= $h->{$b}->{elsize} + } @{$this{args}}; + + if (defined $this{discontig}) { + my $da = $this{argparams}->{$this{discontig}}; + my ($first_discontig) = split /,/, $da->{discontig}->[0]; + my $max = $da->{max}; + + $da->{max} = $first_discontig - 1; + print_args(\%this, "", 0); + + $da->{min} = $da->{max} + 1; + $da->{max} = $max; + print_args(\%this, $first_discontig, 1); + } else { + print_args(\%this, "", 0); + } + + next unless is_limit_reg $this{base_addr}; + + print $LH tabconv sprintf "#define %-63s%s\n", + "CCS_L_" . (uc $this{name}) . "_OFFSET(" . + (join ", ", @{$this{args}}) . ")", "($lim_formula)"; + } + + if (! @{$this{args}}) { + print $H tabconv($hdr_data); + undef $hdr_data; + } + + next; + } + + my ($name, $addr, @flags) = split /\t+/, $_; + my $args = []; + + my $sp; + + ($name, $addr, $args) = name_split($name, $addr) if /\(.*\)/; + + $name =~ s/[,\.-]/_/g; + + my $flagstring = ""; + my $size = elem_size(@flags); + $flagstring .= "| CCS_FL_16BIT " if $size eq "2"; + $flagstring .= "| CCS_FL_32BIT " if $size eq "4"; + $flagstring .= "| CCS_FL_FLOAT_IREAL " if grep /^float_ireal$/, @flags; + $flagstring .= "| CCS_FL_IREAL " if grep /^ireal$/, @flags; + $flagstring =~ s/^\| //; + $flagstring =~ s/ $//; + $flagstring = "($flagstring)" if $flagstring =~ /\|/; + my $base_addr = $addr; + $addr = "($addr | $flagstring)" if $flagstring ne ""; + + my $arglist = @$args ? "(" . (join ", ", @$args) . ")" : ""; + $hdr_data .= sprintf "#define %-62s %s\n", "CCS_R_" . (uc $name), $addr + if !@$args; + + $name =~ s/\(.*//; + + %this = ( name => $name, + addr => $addr, + base_addr => $base_addr, + argparams => {}, + args => $args, + arglist => $arglist, + elsize => $size, + ); + + if (!@$args) { + $reglist .= "\t{ CCS_R_" . (uc $name) . ", 1, 0, \"" . (lc $name) . "\", NULL },\n"; + print $H tabconv $hdr_data; + undef $hdr_data; + + print $LC tabconv sprintf "\t{ CCS_R_" . (uc $name) . ", " . + $this{elsize} . ", 0, \"$name\" },\n" + if is_limit_reg $this{base_addr}; + } + + print $LH tabconv sprintf "#define %-63s%s\n", + "CCS_L_" . (uc $this{name}), $limitcount++ + if is_limit_reg $this{base_addr}; +} + +if (defined $A) { + print $A $argdescs, $reglist; + + print $A "\t{ 0 }\n"; + + print $A "};\n"; +} + +print $H "\n#endif /* __${uc_header}__ */\n"; + +print $LH tabconv sprintf "#define %-63s%s\n", "CCS_L_LAST", $limitcount; + +print $LH "\n#endif /* __${uc_limith}__ */\n"; + +print $LC "\t{ 0 } /* Guardian */\n"; +print $LC "};\n"; + +close($R); +close($H); +close($A) if defined $A; +close($LC); +close($LH); diff --git a/Documentation/driver-api/media/drivers/contributors.rst b/Documentation/driver-api/media/drivers/contributors.rst new file mode 100644 index 0000000000..f23b6e6faf --- /dev/null +++ b/Documentation/driver-api/media/drivers/contributors.rst @@ -0,0 +1,131 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Contributors +============ + +.. note:: + + This documentation is outdated. There are several other DVB contributors + that aren't listed below. + +Thanks go to the following people for patches and contributions: + +- Michael Hunold <m.hunold@gmx.de> + + - for the initial saa7146 driver and its recent overhaul + +- Christian Theiss + + - for his work on the initial Linux DVB driver + +- Marcus Metzler <mocm@metzlerbros.de> and + Ralph Metzler <rjkm@metzlerbros.de> + + - for their continuing work on the DVB driver + +- Michael Holzt <kju@debian.org> + + - for his contributions to the dvb-net driver + +- Diego Picciani <d.picciani@novacomp.it> + + - for CyberLogin for Linux which allows logging onto EON + (in case you are wondering where CyberLogin is, EON changed its login + procedure and CyberLogin is no longer used.) + +- Martin Schaller <martin@smurf.franken.de> + + - for patching the cable card decoder driver + +- Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de> + + - for various fixes regarding tuning, OSD and CI stuff and his work on VDR + +- Steve Brown <sbrown@cortland.com> + + - for his AFC kernel thread + +- Christoph Martin <martin@uni-mainz.de> + + - for his LIRC infrared handler + +- Andreas Oberritter <obi@linuxtv.org>, + Dennis Noermann <dennis.noermann@noernet.de>, + Felix Domke <tmbinc@elitedvb.net>, + Florian Schirmer <jolt@tuxbox.org>, + Ronny Strutz <3des@elitedvb.de>, + Wolfram Joost <dbox2@frokaschwei.de> + and all the other dbox2 people + + - for many bugfixes in the generic DVB Core, frontend drivers and + their work on the dbox2 port of the DVB driver + +- Oliver Endriss <o.endriss@gmx.de> + + - for many bugfixes + +- Andrew de Quincey <adq_dvb@lidskialf.net> + + - for the tda1004x frontend driver, and various bugfixes + +- Peter Schildmann <peter.schildmann@web.de> + + - for the driver for the Technisat SkyStar2 PCI DVB card + +- Vadim Catana <skystar@moldova.cc>, + Roberto Ragusa <r.ragusa@libero.it> and + Augusto Cardoso <augusto@carhil.net> + + - for all the work for the FlexCopII chipset by B2C2,Inc. + +- Davor Emard <emard@softhome.net> + + - for his work on the budget drivers, the demux code, + the module unloading problems, ... + +- Hans-Frieder Vogt <hfvogt@arcor.de> + + - for his work on calculating and checking the crc's for the + TechnoTrend/Hauppauge DEC driver firmware + +- Michael Dreher <michael@5dot1.de> and + Andreas 'randy' Weinberger + + - for the support of the Fujitsu-Siemens Activy budget DVB-S + +- Kenneth Aafløy <ke-aa@frisurf.no> + + - for adding support for Typhoon DVB-S budget card + +- Ernst Peinlich <e.peinlich@inode.at> + + - for tuning/DiSEqC support for the DEC 3000-s + +- Peter Beutner <p.beutner@gmx.net> + + - for the IR code for the ttusb-dec driver + +- Wilson Michaels <wilsonmichaels@earthlink.net> + + - for the lgdt330x frontend driver, and various bugfixes + +- Michael Krufky <mkrufky@linuxtv.org> + + - for maintaining v4l/dvb inter-tree dependencies + +- Taylor Jacob <rtjacob@earthlink.net> + + - for the nxt2002 frontend driver + +- Jean-Francois Thibert <jeanfrancois@sagetv.com> + + - for the nxt2004 frontend driver + +- Kirk Lapray <kirk.lapray@gmail.com> + + - for the or51211 and or51132 frontend drivers, and + for merging the nxt2002 and nxt2004 modules into a + single nxt200x frontend driver. + +(If you think you should be in this list, but you are not, drop a +line to the DVB mailing list) diff --git a/Documentation/driver-api/media/drivers/cx2341x-devel.rst b/Documentation/driver-api/media/drivers/cx2341x-devel.rst new file mode 100644 index 0000000000..97699df6ea --- /dev/null +++ b/Documentation/driver-api/media/drivers/cx2341x-devel.rst @@ -0,0 +1,3685 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The cx2341x driver +================== + +Memory at cx2341x chips +----------------------- + +This section describes the cx2341x memory map and documents some of the +register space. + +.. note:: the memory long words are little-endian ('intel format'). + +.. warning:: + + This information was figured out from searching through the memory + and registers, this information may not be correct and is certainly + not complete, and was not derived from anything more than searching + through the memory space with commands like: + + .. code-block:: none + + ivtvctl -O min=0x02000000,max=0x020000ff + + So take this as is, I'm always searching for more stuff, it's a large + register space :-). + +Memory Map +~~~~~~~~~~ + +The cx2341x exposes its entire 64M memory space to the PCI host via the PCI BAR0 +(Base Address Register 0). The addresses here are offsets relative to the +address held in BAR0. + +.. code-block:: none + + 0x00000000-0x00ffffff Encoder memory space + 0x00000000-0x0003ffff Encode.rom + ???-??? MPEG buffer(s) + ???-??? Raw video capture buffer(s) + ???-??? Raw audio capture buffer(s) + ???-??? Display buffers (6 or 9) + + 0x01000000-0x01ffffff Decoder memory space + 0x01000000-0x0103ffff Decode.rom + ???-??? MPEG buffers(s) + 0x0114b000-0x0115afff Audio.rom (deprecated?) + + 0x02000000-0x0200ffff Register Space + +Registers +~~~~~~~~~ + +The registers occupy the 64k space starting at the 0x02000000 offset from BAR0. +All of these registers are 32 bits wide. + +.. code-block:: none + + DMA Registers 0x000-0xff: + + 0x00 - Control: + 0=reset/cancel, 1=read, 2=write, 4=stop + 0x04 - DMA status: + 1=read busy, 2=write busy, 4=read error, 8=write error, 16=link list error + 0x08 - pci DMA pointer for read link list + 0x0c - pci DMA pointer for write link list + 0x10 - read/write DMA enable: + 1=read enable, 2=write enable + 0x14 - always 0xffffffff, if set any lower instability occurs, 0x00 crashes + 0x18 - ?? + 0x1c - always 0x20 or 32, smaller values slow down DMA transactions + 0x20 - always value of 0x780a010a + 0x24-0x3c - usually just random values??? + 0x40 - Interrupt status + 0x44 - Write a bit here and shows up in Interrupt status 0x40 + 0x48 - Interrupt Mask + 0x4C - always value of 0xfffdffff, + if changed to 0xffffffff DMA write interrupts break. + 0x50 - always 0xffffffff + 0x54 - always 0xffffffff (0x4c, 0x50, 0x54 seem like interrupt masks, are + 3 processors on chip, Java ones, VPU, SPU, APU, maybe these are the + interrupt masks???). + 0x60-0x7C - random values + 0x80 - first write linked list reg, for Encoder Memory addr + 0x84 - first write linked list reg, for pci memory addr + 0x88 - first write linked list reg, for length of buffer in memory addr + (|0x80000000 or this for last link) + 0x8c-0xdc - rest of write linked list reg, 8 sets of 3 total, DMA goes here + from linked list addr in reg 0x0c, firmware must push through or + something. + 0xe0 - first (and only) read linked list reg, for pci memory addr + 0xe4 - first (and only) read linked list reg, for Decoder memory addr + 0xe8 - first (and only) read linked list reg, for length of buffer + 0xec-0xff - Nothing seems to be in these registers, 0xec-f4 are 0x00000000. + +Memory locations for Encoder Buffers 0x700-0x7ff: + +These registers show offsets of memory locations pertaining to each +buffer area used for encoding, have to shift them by <<1 first. + +- 0x07F8: Encoder SDRAM refresh +- 0x07FC: Encoder SDRAM pre-charge + +Memory locations for Decoder Buffers 0x800-0x8ff: + +These registers show offsets of memory locations pertaining to each +buffer area used for decoding, have to shift them by <<1 first. + +- 0x08F8: Decoder SDRAM refresh +- 0x08FC: Decoder SDRAM pre-charge + +Other memory locations: + +- 0x2800: Video Display Module control +- 0x2D00: AO (audio output?) control +- 0x2D24: Bytes Flushed +- 0x7000: LSB I2C write clock bit (inverted) +- 0x7004: LSB I2C write data bit (inverted) +- 0x7008: LSB I2C read clock bit +- 0x700c: LSB I2C read data bit +- 0x9008: GPIO get input state +- 0x900c: GPIO set output state +- 0x9020: GPIO direction (Bit7 (GPIO 0..7) - 0:input, 1:output) +- 0x9050: SPU control +- 0x9054: Reset HW blocks +- 0x9058: VPU control +- 0xA018: Bit6: interrupt pending? +- 0xA064: APU command + + +Interrupt Status Register +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The definition of the bits in the interrupt status register 0x0040, and the +interrupt mask 0x0048. If a bit is cleared in the mask, then we want our ISR to +execute. + +- bit 31 Encoder Start Capture +- bit 30 Encoder EOS +- bit 29 Encoder VBI capture +- bit 28 Encoder Video Input Module reset event +- bit 27 Encoder DMA complete +- bit 24 Decoder audio mode change detection event (through event notification) +- bit 22 Decoder data request +- bit 20 Decoder DMA complete +- bit 19 Decoder VBI re-insertion +- bit 18 Decoder DMA err (linked-list bad) + +Missing documentation +--------------------- + +- Encoder API post(?) +- Decoder API post(?) +- Decoder VTRACE event + + +The cx2341x firmware upload +--------------------------- + +This document describes how to upload the cx2341x firmware to the card. + +How to find +~~~~~~~~~~~ + +See the web pages of the various projects that uses this chip for information +on how to obtain the firmware. + +The firmware stored in a Windows driver can be detected as follows: + +- Each firmware image is 256k bytes. +- The 1st 32-bit word of the Encoder image is 0x0000da7 +- The 1st 32-bit word of the Decoder image is 0x00003a7 +- The 2nd 32-bit word of both images is 0xaa55bb66 + +How to load +~~~~~~~~~~~ + +- Issue the FWapi command to stop the encoder if it is running. Wait for the + command to complete. +- Issue the FWapi command to stop the decoder if it is running. Wait for the + command to complete. +- Issue the I2C command to the digitizer to stop emitting VSYNC events. +- Issue the FWapi command to halt the encoder's firmware. +- Sleep for 10ms. +- Issue the FWapi command to halt the decoder's firmware. +- Sleep for 10ms. +- Write 0x00000000 to register 0x2800 to stop the Video Display Module. +- Write 0x00000005 to register 0x2D00 to stop the AO (audio output?). +- Write 0x00000000 to register 0xA064 to ping? the APU. +- Write 0xFFFFFFFE to register 0x9058 to stop the VPU. +- Write 0xFFFFFFFF to register 0x9054 to reset the HW blocks. +- Write 0x00000001 to register 0x9050 to stop the SPU. +- Sleep for 10ms. +- Write 0x0000001A to register 0x07FC to init the Encoder SDRAM's pre-charge. +- Write 0x80000640 to register 0x07F8 to init the Encoder SDRAM's refresh to 1us. +- Write 0x0000001A to register 0x08FC to init the Decoder SDRAM's pre-charge. +- Write 0x80000640 to register 0x08F8 to init the Decoder SDRAM's refresh to 1us. +- Sleep for 512ms. (600ms is recommended) +- Transfer the encoder's firmware image to offset 0 in Encoder memory space. +- Transfer the decoder's firmware image to offset 0 in Decoder memory space. +- Use a read-modify-write operation to Clear bit 0 of register 0x9050 to + re-enable the SPU. +- Sleep for 1 second. +- Use a read-modify-write operation to Clear bits 3 and 0 of register 0x9058 + to re-enable the VPU. +- Sleep for 1 second. +- Issue status API commands to both firmware images to verify. + + +How to call the firmware API +---------------------------- + +The preferred calling convention is known as the firmware mailbox. The +mailboxes are basically a fixed length array that serves as the call-stack. + +Firmware mailboxes can be located by searching the encoder and decoder memory +for a 16 byte signature. That signature will be located on a 256-byte boundary. + +Signature: + +.. code-block:: none + + 0x78, 0x56, 0x34, 0x12, 0x12, 0x78, 0x56, 0x34, + 0x34, 0x12, 0x78, 0x56, 0x56, 0x34, 0x12, 0x78 + +The firmware implements 20 mailboxes of 20 32-bit words. The first 10 are +reserved for API calls. The second 10 are used by the firmware for event +notification. + + ====== ================= + Index Name + ====== ================= + 0 Flags + 1 Command + 2 Return value + 3 Timeout + 4-19 Parameter/Result + ====== ================= + + +The flags are defined in the following table. The direction is from the +perspective of the firmware. + + ==== ========== ============================================ + Bit Direction Purpose + ==== ========== ============================================ + 2 O Firmware has processed the command. + 1 I Driver has finished setting the parameters. + 0 I Driver is using this mailbox. + ==== ========== ============================================ + +The command is a 32-bit enumerator. The API specifics may be found in this +chapter. + +The return value is a 32-bit enumerator. Only two values are currently defined: + +- 0=success +- -1=command undefined. + +There are 16 parameters/results 32-bit fields. The driver populates these fields +with values for all the parameters required by the call. The driver overwrites +these fields with result values returned by the call. + +The timeout value protects the card from a hung driver thread. If the driver +doesn't handle the completed call within the timeout specified, the firmware +will reset that mailbox. + +To make an API call, the driver iterates over each mailbox looking for the +first one available (bit 0 has been cleared). The driver sets that bit, fills +in the command enumerator, the timeout value and any required parameters. The +driver then sets the parameter ready bit (bit 1). The firmware scans the +mailboxes for pending commands, processes them, sets the result code, populates +the result value array with that call's return values and sets the call +complete bit (bit 2). Once bit 2 is set, the driver should retrieve the results +and clear all the flags. If the driver does not perform this task within the +time set in the timeout register, the firmware will reset that mailbox. + +Event notifications are sent from the firmware to the host. The host tells the +firmware which events it is interested in via an API call. That call tells the +firmware which notification mailbox to use. The firmware signals the host via +an interrupt. Only the 16 Results fields are used, the Flags, Command, Return +value and Timeout words are not used. + + +OSD firmware API description +---------------------------- + +.. note:: this API is part of the decoder firmware, so it's cx23415 only. + + + +CX2341X_OSD_GET_FRAMEBUFFER +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 65/0x41 + +Description +^^^^^^^^^^^ + +Return base and length of contiguous OSD memory. + +Result[0] +^^^^^^^^^ + +OSD base address + +Result[1] +^^^^^^^^^ + +OSD length + + + +CX2341X_OSD_GET_PIXEL_FORMAT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 66/0x42 + +Description +^^^^^^^^^^^ + +Query OSD format + +Result[0] +^^^^^^^^^ + +0=8bit index +1=16bit RGB 5:6:5 +2=16bit ARGB 1:5:5:5 +3=16bit ARGB 1:4:4:4 +4=32bit ARGB 8:8:8:8 + + + +CX2341X_OSD_SET_PIXEL_FORMAT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 67/0x43 + +Description +^^^^^^^^^^^ + +Assign pixel format + +Param[0] +^^^^^^^^ + +- 0=8bit index +- 1=16bit RGB 5:6:5 +- 2=16bit ARGB 1:5:5:5 +- 3=16bit ARGB 1:4:4:4 +- 4=32bit ARGB 8:8:8:8 + + + +CX2341X_OSD_GET_STATE +~~~~~~~~~~~~~~~~~~~~~ + +Enum: 68/0x44 + +Description +^^^^^^^^^^^ + +Query OSD state + +Result[0] +^^^^^^^^^ + +- Bit 0 0=off, 1=on +- Bits 1:2 alpha control +- Bits 3:5 pixel format + + + +CX2341X_OSD_SET_STATE +~~~~~~~~~~~~~~~~~~~~~ + +Enum: 69/0x45 + +Description +^^^^^^^^^^^ + +OSD switch + +Param[0] +^^^^^^^^ + +0=off, 1=on + + + +CX2341X_OSD_GET_OSD_COORDS +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 70/0x46 + +Description +^^^^^^^^^^^ + +Retrieve coordinates of OSD area blended with video + +Result[0] +^^^^^^^^^ + +OSD buffer address + +Result[1] +^^^^^^^^^ + +Stride in pixels + +Result[2] +^^^^^^^^^ + +Lines in OSD buffer + +Result[3] +^^^^^^^^^ + +Horizontal offset in buffer + +Result[4] +^^^^^^^^^ + +Vertical offset in buffer + + + +CX2341X_OSD_SET_OSD_COORDS +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 71/0x47 + +Description +^^^^^^^^^^^ + +Assign the coordinates of the OSD area to blend with video + +Param[0] +^^^^^^^^ + +buffer address + +Param[1] +^^^^^^^^ + +buffer stride in pixels + +Param[2] +^^^^^^^^ + +lines in buffer + +Param[3] +^^^^^^^^ + +horizontal offset + +Param[4] +^^^^^^^^ + +vertical offset + + + +CX2341X_OSD_GET_SCREEN_COORDS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 72/0x48 + +Description +^^^^^^^^^^^ + +Retrieve OSD screen area coordinates + +Result[0] +^^^^^^^^^ + +top left horizontal offset + +Result[1] +^^^^^^^^^ + +top left vertical offset + +Result[2] +^^^^^^^^^ + +bottom right horizontal offset + +Result[3] +^^^^^^^^^ + +bottom right vertical offset + + + +CX2341X_OSD_SET_SCREEN_COORDS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 73/0x49 + +Description +^^^^^^^^^^^ + +Assign the coordinates of the screen area to blend with video + +Param[0] +^^^^^^^^ + +top left horizontal offset + +Param[1] +^^^^^^^^ + +top left vertical offset + +Param[2] +^^^^^^^^ + +bottom left horizontal offset + +Param[3] +^^^^^^^^ + +bottom left vertical offset + + + +CX2341X_OSD_GET_GLOBAL_ALPHA +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 74/0x4A + +Description +^^^^^^^^^^^ + +Retrieve OSD global alpha + +Result[0] +^^^^^^^^^ + +global alpha: 0=off, 1=on + +Result[1] +^^^^^^^^^ + +bits 0:7 global alpha + + + +CX2341X_OSD_SET_GLOBAL_ALPHA +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 75/0x4B + +Description +^^^^^^^^^^^ + +Update global alpha + +Param[0] +^^^^^^^^ + +global alpha: 0=off, 1=on + +Param[1] +^^^^^^^^ + +global alpha (8 bits) + +Param[2] +^^^^^^^^ + +local alpha: 0=on, 1=off + + + +CX2341X_OSD_SET_BLEND_COORDS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 78/0x4C + +Description +^^^^^^^^^^^ + +Move start of blending area within display buffer + +Param[0] +^^^^^^^^ + +horizontal offset in buffer + +Param[1] +^^^^^^^^ + +vertical offset in buffer + + + +CX2341X_OSD_GET_FLICKER_STATE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 79/0x4F + +Description +^^^^^^^^^^^ + +Retrieve flicker reduction module state + +Result[0] +^^^^^^^^^ + +flicker state: 0=off, 1=on + + + +CX2341X_OSD_SET_FLICKER_STATE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 80/0x50 + +Description +^^^^^^^^^^^ + +Set flicker reduction module state + +Param[0] +^^^^^^^^ + +State: 0=off, 1=on + + + +CX2341X_OSD_BLT_COPY +~~~~~~~~~~~~~~~~~~~~ + +Enum: 82/0x52 + +Description +^^^^^^^^^^^ + +BLT copy + +Param[0] +^^^^^^^^ + +.. code-block:: none + + '0000' zero + '0001' ~destination AND ~source + '0010' ~destination AND source + '0011' ~destination + '0100' destination AND ~source + '0101' ~source + '0110' destination XOR source + '0111' ~destination OR ~source + '1000' ~destination AND ~source + '1001' destination XNOR source + '1010' source + '1011' ~destination OR source + '1100' destination + '1101' destination OR ~source + '1110' destination OR source + '1111' one + + +Param[1] +^^^^^^^^ + +Resulting alpha blending + +- '01' source_alpha +- '10' destination_alpha +- '11' source_alpha*destination_alpha+1 + (zero if both source and destination alpha are zero) + +Param[2] +^^^^^^^^ + +.. code-block:: none + + '00' output_pixel = source_pixel + + '01' if source_alpha=0: + output_pixel = destination_pixel + if 256 > source_alpha > 1: + output_pixel = ((source_alpha + 1)*source_pixel + + (255 - source_alpha)*destination_pixel)/256 + + '10' if destination_alpha=0: + output_pixel = source_pixel + if 255 > destination_alpha > 0: + output_pixel = ((255 - destination_alpha)*source_pixel + + (destination_alpha + 1)*destination_pixel)/256 + + '11' if source_alpha=0: + source_temp = 0 + if source_alpha=255: + source_temp = source_pixel*256 + if 255 > source_alpha > 0: + source_temp = source_pixel*(source_alpha + 1) + if destination_alpha=0: + destination_temp = 0 + if destination_alpha=255: + destination_temp = destination_pixel*256 + if 255 > destination_alpha > 0: + destination_temp = destination_pixel*(destination_alpha + 1) + output_pixel = (source_temp + destination_temp)/256 + +Param[3] +^^^^^^^^ + +width + +Param[4] +^^^^^^^^ + +height + +Param[5] +^^^^^^^^ + +destination pixel mask + +Param[6] +^^^^^^^^ + +destination rectangle start address + +Param[7] +^^^^^^^^ + +destination stride in dwords + +Param[8] +^^^^^^^^ + +source stride in dwords + +Param[9] +^^^^^^^^ + +source rectangle start address + + + +CX2341X_OSD_BLT_FILL +~~~~~~~~~~~~~~~~~~~~ + +Enum: 83/0x53 + +Description +^^^^^^^^^^^ + +BLT fill color + +Param[0] +^^^^^^^^ + +Same as Param[0] on API 0x52 + +Param[1] +^^^^^^^^ + +Same as Param[1] on API 0x52 + +Param[2] +^^^^^^^^ + +Same as Param[2] on API 0x52 + +Param[3] +^^^^^^^^ + +width + +Param[4] +^^^^^^^^ + +height + +Param[5] +^^^^^^^^ + +destination pixel mask + +Param[6] +^^^^^^^^ + +destination rectangle start address + +Param[7] +^^^^^^^^ + +destination stride in dwords + +Param[8] +^^^^^^^^ + +color fill value + + + +CX2341X_OSD_BLT_TEXT +~~~~~~~~~~~~~~~~~~~~ + +Enum: 84/0x54 + +Description +^^^^^^^^^^^ + +BLT for 8 bit alpha text source + +Param[0] +^^^^^^^^ + +Same as Param[0] on API 0x52 + +Param[1] +^^^^^^^^ + +Same as Param[1] on API 0x52 + +Param[2] +^^^^^^^^ + +Same as Param[2] on API 0x52 + +Param[3] +^^^^^^^^ + +width + +Param[4] +^^^^^^^^ + +height + +Param[5] +^^^^^^^^ + +destination pixel mask + +Param[6] +^^^^^^^^ + +destination rectangle start address + +Param[7] +^^^^^^^^ + +destination stride in dwords + +Param[8] +^^^^^^^^ + +source stride in dwords + +Param[9] +^^^^^^^^ + +source rectangle start address + +Param[10] +^^^^^^^^^ + +color fill value + + + +CX2341X_OSD_SET_FRAMEBUFFER_WINDOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 86/0x56 + +Description +^^^^^^^^^^^ + +Positions the main output window on the screen. The coordinates must be +such that the entire window fits on the screen. + +Param[0] +^^^^^^^^ + +window width + +Param[1] +^^^^^^^^ + +window height + +Param[2] +^^^^^^^^ + +top left window corner horizontal offset + +Param[3] +^^^^^^^^ + +top left window corner vertical offset + + + +CX2341X_OSD_SET_CHROMA_KEY +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 96/0x60 + +Description +^^^^^^^^^^^ + +Chroma key switch and color + +Param[0] +^^^^^^^^ + +state: 0=off, 1=on + +Param[1] +^^^^^^^^ + +color + + + +CX2341X_OSD_GET_ALPHA_CONTENT_INDEX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 97/0x61 + +Description +^^^^^^^^^^^ + +Retrieve alpha content index + +Result[0] +^^^^^^^^^ + +alpha content index, Range 0:15 + + + +CX2341X_OSD_SET_ALPHA_CONTENT_INDEX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 98/0x62 + +Description +^^^^^^^^^^^ + +Assign alpha content index + +Param[0] +^^^^^^^^ + +alpha content index, range 0:15 + + +Encoder firmware API description +-------------------------------- + +CX2341X_ENC_PING_FW +~~~~~~~~~~~~~~~~~~~ + +Enum: 128/0x80 + +Description +^^^^^^^^^^^ + +Does nothing. Can be used to check if the firmware is responding. + + + +CX2341X_ENC_START_CAPTURE +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 129/0x81 + +Description +^^^^^^^^^^^ + +Commences the capture of video, audio and/or VBI data. All encoding +parameters must be initialized prior to this API call. Captures frames +continuously or until a predefined number of frames have been captured. + +Param[0] +^^^^^^^^ + +Capture stream type: + + - 0=MPEG + - 1=Raw + - 2=Raw passthrough + - 3=VBI + + +Param[1] +^^^^^^^^ + +Bitmask: + + - Bit 0 when set, captures YUV + - Bit 1 when set, captures PCM audio + - Bit 2 when set, captures VBI (same as param[0]=3) + - Bit 3 when set, the capture destination is the decoder + (same as param[0]=2) + - Bit 4 when set, the capture destination is the host + +.. note:: this parameter is only meaningful for RAW capture type. + + + +CX2341X_ENC_STOP_CAPTURE +~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 130/0x82 + +Description +^^^^^^^^^^^ + +Ends a capture in progress + +Param[0] +^^^^^^^^ + +- 0=stop at end of GOP (generates IRQ) +- 1=stop immediate (no IRQ) + +Param[1] +^^^^^^^^ + +Stream type to stop, see param[0] of API 0x81 + +Param[2] +^^^^^^^^ + +Subtype, see param[1] of API 0x81 + + + +CX2341X_ENC_SET_AUDIO_ID +~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 137/0x89 + +Description +^^^^^^^^^^^ + +Assigns the transport stream ID of the encoded audio stream + +Param[0] +^^^^^^^^ + +Audio Stream ID + + + +CX2341X_ENC_SET_VIDEO_ID +~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 139/0x8B + +Description +^^^^^^^^^^^ + +Set video transport stream ID + +Param[0] +^^^^^^^^ + +Video stream ID + + + +CX2341X_ENC_SET_PCR_ID +~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 141/0x8D + +Description +^^^^^^^^^^^ + +Assigns the transport stream ID for PCR packets + +Param[0] +^^^^^^^^ + +PCR Stream ID + + + +CX2341X_ENC_SET_FRAME_RATE +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 143/0x8F + +Description +^^^^^^^^^^^ + +Set video frames per second. Change occurs at start of new GOP. + +Param[0] +^^^^^^^^ + +- 0=30fps +- 1=25fps + + + +CX2341X_ENC_SET_FRAME_SIZE +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 145/0x91 + +Description +^^^^^^^^^^^ + +Select video stream encoding resolution. + +Param[0] +^^^^^^^^ + +Height in lines. Default 480 + +Param[1] +^^^^^^^^ + +Width in pixels. Default 720 + + + +CX2341X_ENC_SET_BIT_RATE +~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 149/0x95 + +Description +^^^^^^^^^^^ + +Assign average video stream bitrate. + +Param[0] +^^^^^^^^ + +0=variable bitrate, 1=constant bitrate + +Param[1] +^^^^^^^^ + +bitrate in bits per second + +Param[2] +^^^^^^^^ + +peak bitrate in bits per second, divided by 400 + +Param[3] +^^^^^^^^ + +Mux bitrate in bits per second, divided by 400. May be 0 (default). + +Param[4] +^^^^^^^^ + +Rate Control VBR Padding + +Param[5] +^^^^^^^^ + +VBV Buffer used by encoder + +.. note:: + + #) Param\[3\] and Param\[4\] seem to be always 0 + #) Param\[5\] doesn't seem to be used. + + + +CX2341X_ENC_SET_GOP_PROPERTIES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 151/0x97 + +Description +^^^^^^^^^^^ + +Setup the GOP structure + +Param[0] +^^^^^^^^ + +GOP size (maximum is 34) + +Param[1] +^^^^^^^^ + +Number of B frames between the I and P frame, plus 1. +For example: IBBPBBPBBPBB --> GOP size: 12, number of B frames: 2+1 = 3 + +.. note:: + + GOP size must be a multiple of (B-frames + 1). + + + +CX2341X_ENC_SET_ASPECT_RATIO +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 153/0x99 + +Description +^^^^^^^^^^^ + +Sets the encoding aspect ratio. Changes in the aspect ratio take effect +at the start of the next GOP. + +Param[0] +^^^^^^^^ + +- '0000' forbidden +- '0001' 1:1 square +- '0010' 4:3 +- '0011' 16:9 +- '0100' 2.21:1 +- '0101' to '1111' reserved + + + +CX2341X_ENC_SET_DNR_FILTER_MODE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 155/0x9B + +Description +^^^^^^^^^^^ + +Assign Dynamic Noise Reduction operating mode + +Param[0] +^^^^^^^^ + +Bit0: Spatial filter, set=auto, clear=manual +Bit1: Temporal filter, set=auto, clear=manual + +Param[1] +^^^^^^^^ + +Median filter: + +- 0=Disabled +- 1=Horizontal +- 2=Vertical +- 3=Horiz/Vert +- 4=Diagonal + + + +CX2341X_ENC_SET_DNR_FILTER_PROPS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 157/0x9D + +Description +^^^^^^^^^^^ + +These Dynamic Noise Reduction filter values are only meaningful when +the respective filter is set to "manual" (See API 0x9B) + +Param[0] +^^^^^^^^ + +Spatial filter: default 0, range 0:15 + +Param[1] +^^^^^^^^ + +Temporal filter: default 0, range 0:31 + + + +CX2341X_ENC_SET_CORING_LEVELS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 159/0x9F + +Description +^^^^^^^^^^^ + +Assign Dynamic Noise Reduction median filter properties. + +Param[0] +^^^^^^^^ + +Threshold above which the luminance median filter is enabled. +Default: 0, range 0:255 + +Param[1] +^^^^^^^^ + +Threshold below which the luminance median filter is enabled. +Default: 255, range 0:255 + +Param[2] +^^^^^^^^ + +Threshold above which the chrominance median filter is enabled. +Default: 0, range 0:255 + +Param[3] +^^^^^^^^ + +Threshold below which the chrominance median filter is enabled. +Default: 255, range 0:255 + + + +CX2341X_ENC_SET_SPATIAL_FILTER_TYPE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 161/0xA1 + +Description +^^^^^^^^^^^ + +Assign spatial prefilter parameters + +Param[0] +^^^^^^^^ + +Luminance filter + +- 0=Off +- 1=1D Horizontal +- 2=1D Vertical +- 3=2D H/V Separable (default) +- 4=2D Symmetric non-separable + +Param[1] +^^^^^^^^ + +Chrominance filter + +- 0=Off +- 1=1D Horizontal (default) + + + +CX2341X_ENC_SET_VBI_LINE +~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 183/0xB7 + +Description +^^^^^^^^^^^ + +Selects VBI line number. + +Param[0] +^^^^^^^^ + +- Bits 0:4 line number +- Bit 31 0=top_field, 1=bottom_field +- Bits 0:31 all set specifies "all lines" + +Param[1] +^^^^^^^^ + +VBI line information features: 0=disabled, 1=enabled + +Param[2] +^^^^^^^^ + +Slicing: 0=None, 1=Closed Caption +Almost certainly not implemented. Set to 0. + +Param[3] +^^^^^^^^ + +Luminance samples in this line. +Almost certainly not implemented. Set to 0. + +Param[4] +^^^^^^^^ + +Chrominance samples in this line +Almost certainly not implemented. Set to 0. + + + +CX2341X_ENC_SET_STREAM_TYPE +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 185/0xB9 + +Description +^^^^^^^^^^^ + +Assign stream type + +.. note:: + + Transport stream is not working in recent firmwares. + And in older firmwares the timestamps in the TS seem to be + unreliable. + +Param[0] +^^^^^^^^ + +- 0=Program stream +- 1=Transport stream +- 2=MPEG1 stream +- 3=PES A/V stream +- 5=PES Video stream +- 7=PES Audio stream +- 10=DVD stream +- 11=VCD stream +- 12=SVCD stream +- 13=DVD_S1 stream +- 14=DVD_S2 stream + + + +CX2341X_ENC_SET_OUTPUT_PORT +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 187/0xBB + +Description +^^^^^^^^^^^ + +Assign stream output port. Normally 0 when the data is copied through +the PCI bus (DMA), and 1 when the data is streamed to another chip +(pvrusb and cx88-blackbird). + +Param[0] +^^^^^^^^ + +- 0=Memory (default) +- 1=Streaming +- 2=Serial + +Param[1] +^^^^^^^^ + +Unknown, but leaving this to 0 seems to work best. Indications are that +this might have to do with USB support, although passing anything but 0 +only breaks things. + + + +CX2341X_ENC_SET_AUDIO_PROPERTIES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 189/0xBD + +Description +^^^^^^^^^^^ + +Set audio stream properties, may be called while encoding is in progress. + +.. note:: + + All bitfields are consistent with ISO11172 documentation except + bits 2:3 which ISO docs define as: + + - '11' Layer I + - '10' Layer II + - '01' Layer III + - '00' Undefined + + This discrepancy may indicate a possible error in the documentation. + Testing indicated that only Layer II is actually working, and that + the minimum bitrate should be 192 kbps. + +Param[0] +^^^^^^^^ + +Bitmask: + +.. code-block:: none + + 0:1 '00' 44.1Khz + '01' 48Khz + '10' 32Khz + '11' reserved + + 2:3 '01'=Layer I + '10'=Layer II + + 4:7 Bitrate: + Index | Layer I | Layer II + ------+-------------+------------ + '0000' | free format | free format + '0001' | 32 kbit/s | 32 kbit/s + '0010' | 64 kbit/s | 48 kbit/s + '0011' | 96 kbit/s | 56 kbit/s + '0100' | 128 kbit/s | 64 kbit/s + '0101' | 160 kbit/s | 80 kbit/s + '0110' | 192 kbit/s | 96 kbit/s + '0111' | 224 kbit/s | 112 kbit/s + '1000' | 256 kbit/s | 128 kbit/s + '1001' | 288 kbit/s | 160 kbit/s + '1010' | 320 kbit/s | 192 kbit/s + '1011' | 352 kbit/s | 224 kbit/s + '1100' | 384 kbit/s | 256 kbit/s + '1101' | 416 kbit/s | 320 kbit/s + '1110' | 448 kbit/s | 384 kbit/s + + .. note:: + + For Layer II, not all combinations of total bitrate + and mode are allowed. See ISO11172-3 3-Annex B, + Table 3-B.2 + + 8:9 '00'=Stereo + '01'=JointStereo + '10'=Dual + '11'=Mono + + .. note:: + + The cx23415 cannot decode Joint Stereo properly. + + 10:11 Mode Extension used in joint_stereo mode. + In Layer I and II they indicate which subbands are in + intensity_stereo. All other subbands are coded in stereo. + '00' subbands 4-31 in intensity_stereo, bound==4 + '01' subbands 8-31 in intensity_stereo, bound==8 + '10' subbands 12-31 in intensity_stereo, bound==12 + '11' subbands 16-31 in intensity_stereo, bound==16 + + 12:13 Emphasis: + '00' None + '01' 50/15uS + '10' reserved + '11' CCITT J.17 + + 14 CRC: + '0' off + '1' on + + 15 Copyright: + '0' off + '1' on + + 16 Generation: + '0' copy + '1' original + + + +CX2341X_ENC_HALT_FW +~~~~~~~~~~~~~~~~~~~ + +Enum: 195/0xC3 + +Description +^^^^^^^^^^^ + +The firmware is halted and no further API calls are serviced until the +firmware is uploaded again. + + + +CX2341X_ENC_GET_VERSION +~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 196/0xC4 + +Description +^^^^^^^^^^^ + +Returns the version of the encoder firmware. + +Result[0] +^^^^^^^^^ + +Version bitmask: +- Bits 0:15 build +- Bits 16:23 minor +- Bits 24:31 major + + + +CX2341X_ENC_SET_GOP_CLOSURE +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 197/0xC5 + +Description +^^^^^^^^^^^ + +Assigns the GOP open/close property. + +Param[0] +^^^^^^^^ + +- 0=Open +- 1=Closed + + + +CX2341X_ENC_GET_SEQ_END +~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 198/0xC6 + +Description +^^^^^^^^^^^ + +Obtains the sequence end code of the encoder's buffer. When a capture +is started a number of interrupts are still generated, the last of +which will have Result[0] set to 1 and Result[1] will contain the size +of the buffer. + +Result[0] +^^^^^^^^^ + +State of the transfer (1 if last buffer) + +Result[1] +^^^^^^^^^ + +If Result[0] is 1, this contains the size of the last buffer, undefined +otherwise. + + + +CX2341X_ENC_SET_PGM_INDEX_INFO +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 199/0xC7 + +Description +^^^^^^^^^^^ + +Sets the Program Index Information. +The information is stored as follows: + +.. code-block:: c + + struct info { + u32 length; // Length of this frame + u32 offset_low; // Offset in the file of the + u32 offset_high; // start of this frame + u32 mask1; // Bits 0-2 are the type mask: + // 1=I, 2=P, 4=B + // 0=End of Program Index, other fields + // are invalid. + u32 pts; // The PTS of the frame + u32 mask2; // Bit 0 is bit 32 of the pts. + }; + u32 table_ptr; + struct info index[400]; + +The table_ptr is the encoder memory address in the table were +*new* entries will be written. + +.. note:: This is a ringbuffer, so the table_ptr will wraparound. + +Param[0] +^^^^^^^^ + +Picture Mask: +- 0=No index capture +- 1=I frames +- 3=I,P frames +- 7=I,P,B frames + +(Seems to be ignored, it always indexes I, P and B frames) + +Param[1] +^^^^^^^^ + +Elements requested (up to 400) + +Result[0] +^^^^^^^^^ + +Offset in the encoder memory of the start of the table. + +Result[1] +^^^^^^^^^ + +Number of allocated elements up to a maximum of Param[1] + + + +CX2341X_ENC_SET_VBI_CONFIG +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 200/0xC8 + +Description +^^^^^^^^^^^ + +Configure VBI settings + +Param[0] +^^^^^^^^ + +Bitmap: + +.. code-block:: none + + 0 Mode '0' Sliced, '1' Raw + 1:3 Insertion: + '000' insert in extension & user data + '001' insert in private packets + '010' separate stream and user data + '111' separate stream and private data + 8:15 Stream ID (normally 0xBD) + +Param[1] +^^^^^^^^ + +Frames per interrupt (max 8). Only valid in raw mode. + +Param[2] +^^^^^^^^ + +Total raw VBI frames. Only valid in raw mode. + +Param[3] +^^^^^^^^ + +Start codes + +Param[4] +^^^^^^^^ + +Stop codes + +Param[5] +^^^^^^^^ + +Lines per frame + +Param[6] +^^^^^^^^ + +Byte per line + +Result[0] +^^^^^^^^^ + +Observed frames per interrupt in raw mode only. Rage 1 to Param[1] + +Result[1] +^^^^^^^^^ + +Observed number of frames in raw mode. Range 1 to Param[2] + +Result[2] +^^^^^^^^^ + +Memory offset to start or raw VBI data + + + +CX2341X_ENC_SET_DMA_BLOCK_SIZE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 201/0xC9 + +Description +^^^^^^^^^^^ + +Set DMA transfer block size + +Param[0] +^^^^^^^^ + +DMA transfer block size in bytes or frames. When unit is bytes, +supported block sizes are 2^7, 2^8 and 2^9 bytes. + +Param[1] +^^^^^^^^ + +Unit: 0=bytes, 1=frames + + + +CX2341X_ENC_GET_PREV_DMA_INFO_MB_10 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 202/0xCA + +Description +^^^^^^^^^^^ + +Returns information on the previous DMA transfer in conjunction with +bit 27 of the interrupt mask. Uses mailbox 10. + +Result[0] +^^^^^^^^^ + +Type of stream + +Result[1] +^^^^^^^^^ + +Address Offset + +Result[2] +^^^^^^^^^ + +Maximum size of transfer + + + +CX2341X_ENC_GET_PREV_DMA_INFO_MB_9 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 203/0xCB + +Description +^^^^^^^^^^^ + +Returns information on the previous DMA transfer in conjunction with +bit 27 or 18 of the interrupt mask. Uses mailbox 9. + +Result[0] +^^^^^^^^^ + +Status bits: +- 0 read completed +- 1 write completed +- 2 DMA read error +- 3 DMA write error +- 4 Scatter-Gather array error + +Result[1] +^^^^^^^^^ + +DMA type + +Result[2] +^^^^^^^^^ + +Presentation Time Stamp bits 0..31 + +Result[3] +^^^^^^^^^ + +Presentation Time Stamp bit 32 + + + +CX2341X_ENC_SCHED_DMA_TO_HOST +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 204/0xCC + +Description +^^^^^^^^^^^ + +Setup DMA to host operation + +Param[0] +^^^^^^^^ + +Memory address of link list + +Param[1] +^^^^^^^^ + +Length of link list (wtf: what units ???) + +Param[2] +^^^^^^^^ + +DMA type (0=MPEG) + + + +CX2341X_ENC_INITIALIZE_INPUT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 205/0xCD + +Description +^^^^^^^^^^^ + +Initializes the video input + + + +CX2341X_ENC_SET_FRAME_DROP_RATE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 208/0xD0 + +Description +^^^^^^^^^^^ + +For each frame captured, skip specified number of frames. + +Param[0] +^^^^^^^^ + +Number of frames to skip + + + +CX2341X_ENC_PAUSE_ENCODER +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 210/0xD2 + +Description +^^^^^^^^^^^ + +During a pause condition, all frames are dropped instead of being encoded. + +Param[0] +^^^^^^^^ + +- 0=Pause encoding +- 1=Continue encoding + + + +CX2341X_ENC_REFRESH_INPUT +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 211/0xD3 + +Description +^^^^^^^^^^^ + +Refreshes the video input + + + +CX2341X_ENC_SET_COPYRIGHT +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 212/0xD4 + +Description +^^^^^^^^^^^ + +Sets stream copyright property + +Param[0] +^^^^^^^^ + + +- 0=Stream is not copyrighted +- 1=Stream is copyrighted + + + +CX2341X_ENC_SET_EVENT_NOTIFICATION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 213/0xD5 + +Description +^^^^^^^^^^^ + +Setup firmware to notify the host about a particular event. Host must +unmask the interrupt bit. + +Param[0] +^^^^^^^^ + +Event (0=refresh encoder input) + +Param[1] +^^^^^^^^ + +Notification 0=disabled 1=enabled + +Param[2] +^^^^^^^^ + +Interrupt bit + +Param[3] +^^^^^^^^ + +Mailbox slot, -1 if no mailbox required. + + + +CX2341X_ENC_SET_NUM_VSYNC_LINES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 214/0xD6 + +Description +^^^^^^^^^^^ + +Depending on the analog video decoder used, this assigns the number +of lines for field 1 and 2. + +Param[0] +^^^^^^^^ + +Field 1 number of lines: +- 0x00EF for SAA7114 +- 0x00F0 for SAA7115 +- 0x0105 for Micronas + +Param[1] +^^^^^^^^ + +Field 2 number of lines: +- 0x00EF for SAA7114 +- 0x00F0 for SAA7115 +- 0x0106 for Micronas + + + +CX2341X_ENC_SET_PLACEHOLDER +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 215/0xD7 + +Description +^^^^^^^^^^^ + +Provides a mechanism of inserting custom user data in the MPEG stream. + +Param[0] +^^^^^^^^ + +- 0=extension & user data +- 1=private packet with stream ID 0xBD + +Param[1] +^^^^^^^^ + +Rate at which to insert data, in units of frames (for private packet) +or GOPs (for ext. & user data) + +Param[2] +^^^^^^^^ + +Number of data DWORDs (below) to insert + +Param[3] +^^^^^^^^ + +Custom data 0 + +Param[4] +^^^^^^^^ + +Custom data 1 + +Param[5] +^^^^^^^^ + +Custom data 2 + +Param[6] +^^^^^^^^ + +Custom data 3 + +Param[7] +^^^^^^^^ + +Custom data 4 + +Param[8] +^^^^^^^^ + +Custom data 5 + +Param[9] +^^^^^^^^ + +Custom data 6 + +Param[10] +^^^^^^^^^ + +Custom data 7 + +Param[11] +^^^^^^^^^ + +Custom data 8 + + + +CX2341X_ENC_MUTE_VIDEO +~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 217/0xD9 + +Description +^^^^^^^^^^^ + +Video muting + +Param[0] +^^^^^^^^ + +Bit usage: + +.. code-block:: none + + 0 '0'=video not muted + '1'=video muted, creates frames with the YUV color defined below + 1:7 Unused + 8:15 V chrominance information + 16:23 U chrominance information + 24:31 Y luminance information + + + +CX2341X_ENC_MUTE_AUDIO +~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 218/0xDA + +Description +^^^^^^^^^^^ + +Audio muting + +Param[0] +^^^^^^^^ + +- 0=audio not muted +- 1=audio muted (produces silent mpeg audio stream) + + + +CX2341X_ENC_SET_VERT_CROP_LINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 219/0xDB + +Description +^^^^^^^^^^^ + +Something to do with 'Vertical Crop Line' + +Param[0] +^^^^^^^^ + +If saa7114 and raw VBI capture and 60 Hz, then set to 10001. +Else 0. + + + +CX2341X_ENC_MISC +~~~~~~~~~~~~~~~~ + +Enum: 220/0xDC + +Description +^^^^^^^^^^^ + +Miscellaneous actions. Not known for 100% what it does. It's really a +sort of ioctl call. The first parameter is a command number, the second +the value. + +Param[0] +^^^^^^^^ + +Command number: + +.. code-block:: none + + 1=set initial SCR value when starting encoding (works). + 2=set quality mode (apparently some test setting). + 3=setup advanced VIM protection handling. + Always 1 for the cx23416 and 0 for cx23415. + 4=generate DVD compatible PTS timestamps + 5=USB flush mode + 6=something to do with the quantization matrix + 7=set navigation pack insertion for DVD: adds 0xbf (private stream 2) + packets to the MPEG. The size of these packets is 2048 bytes (including + the header of 6 bytes: 0x000001bf + length). The payload is zeroed and + it is up to the application to fill them in. These packets are apparently + inserted every four frames. + 8=enable scene change detection (seems to be a failure) + 9=set history parameters of the video input module + 10=set input field order of VIM + 11=set quantization matrix + 12=reset audio interface after channel change or input switch (has no argument). + Needed for the cx2584x, not needed for the mspx4xx, but it doesn't seem to + do any harm calling it regardless. + 13=set audio volume delay + 14=set audio delay + + +Param[1] +^^^^^^^^ + +Command value. + +Decoder firmware API description +-------------------------------- + +.. note:: this API is part of the decoder firmware, so it's cx23415 only. + + + +CX2341X_DEC_PING_FW +~~~~~~~~~~~~~~~~~~~ + +Enum: 0/0x00 + +Description +^^^^^^^^^^^ + +This API call does nothing. It may be used to check if the firmware +is responding. + + + +CX2341X_DEC_START_PLAYBACK +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 1/0x01 + +Description +^^^^^^^^^^^ + +Begin or resume playback. + +Param[0] +^^^^^^^^ + +0 based frame number in GOP to begin playback from. + +Param[1] +^^^^^^^^ + +Specifies the number of muted audio frames to play before normal +audio resumes. (This is not implemented in the firmware, leave at 0) + + + +CX2341X_DEC_STOP_PLAYBACK +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 2/0x02 + +Description +^^^^^^^^^^^ + +Ends playback and clears all decoder buffers. If PTS is not zero, +playback stops at specified PTS. + +Param[0] +^^^^^^^^ + +Display 0=last frame, 1=black + +.. note:: + + this takes effect immediately, so if you want to wait for a PTS, + then use '0', otherwise the screen goes to black at once. + You can call this later (even if there is no playback) with a 1 value + to set the screen to black. + +Param[1] +^^^^^^^^ + +PTS low + +Param[2] +^^^^^^^^ + +PTS high + + + +CX2341X_DEC_SET_PLAYBACK_SPEED +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 3/0x03 + +Description +^^^^^^^^^^^ + +Playback stream at speed other than normal. There are two modes of +operation: + + - Smooth: host transfers entire stream and firmware drops unused + frames. + - Coarse: host drops frames based on indexing as required to achieve + desired speed. + +Param[0] +^^^^^^^^ + +.. code-block:: none + + Bitmap: + 0:7 0 normal + 1 fast only "1.5 times" + n nX fast, 1/nX slow + 30 Framedrop: + '0' during 1.5 times play, every other B frame is dropped + '1' during 1.5 times play, stream is unchanged (bitrate + must not exceed 8mbps) + 31 Speed: + '0' slow + '1' fast + +.. note:: + + n is limited to 2. Anything higher does not result in + faster playback. Instead the host should start dropping frames. + +Param[1] +^^^^^^^^ + +Direction: 0=forward, 1=reverse + +.. note:: + + to make reverse playback work you have to write full GOPs in + reverse order. + +Param[2] +^^^^^^^^ + +.. code-block:: none + + Picture mask: + 1=I frames + 3=I, P frames + 7=I, P, B frames + +Param[3] +^^^^^^^^ + +B frames per GOP (for reverse play only) + +.. note:: + + for reverse playback the Picture Mask should be set to I or I, P. + Adding B frames to the mask will result in corrupt video. This field + has to be set to the correct value in order to keep the timing correct. + +Param[4] +^^^^^^^^ + +Mute audio: 0=disable, 1=enable + +Param[5] +^^^^^^^^ + +Display 0=frame, 1=field + +Param[6] +^^^^^^^^ + +Specifies the number of muted audio frames to play before normal audio +resumes. (Not implemented in the firmware, leave at 0) + + + +CX2341X_DEC_STEP_VIDEO +~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 5/0x05 + +Description +^^^^^^^^^^^ + +Each call to this API steps the playback to the next unit defined below +in the current playback direction. + +Param[0] +^^^^^^^^ + +0=frame, 1=top field, 2=bottom field + + + +CX2341X_DEC_SET_DMA_BLOCK_SIZE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 8/0x08 + +Description +^^^^^^^^^^^ + +Set DMA transfer block size. Counterpart to API 0xC9 + +Param[0] +^^^^^^^^ + +DMA transfer block size in bytes. A different size may be specified +when issuing the DMA transfer command. + + + +CX2341X_DEC_GET_XFER_INFO +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 9/0x09 + +Description +^^^^^^^^^^^ + +This API call may be used to detect an end of stream condition. + +Result[0] +^^^^^^^^^ + +Stream type + +Result[1] +^^^^^^^^^ + +Address offset + +Result[2] +^^^^^^^^^ + +Maximum bytes to transfer + +Result[3] +^^^^^^^^^ + +Buffer fullness + + + +CX2341X_DEC_GET_DMA_STATUS +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 10/0x0A + +Description +^^^^^^^^^^^ + +Status of the last DMA transfer + +Result[0] +^^^^^^^^^ + +Bit 1 set means transfer complete +Bit 2 set means DMA error +Bit 3 set means linked list error + +Result[1] +^^^^^^^^^ + +DMA type: 0=MPEG, 1=OSD, 2=YUV + + + +CX2341X_DEC_SCHED_DMA_FROM_HOST +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 11/0x0B + +Description +^^^^^^^^^^^ + +Setup DMA from host operation. Counterpart to API 0xCC + +Param[0] +^^^^^^^^ + +Memory address of link list + +Param[1] +^^^^^^^^ + +Total # of bytes to transfer + +Param[2] +^^^^^^^^ + +DMA type (0=MPEG, 1=OSD, 2=YUV) + + + +CX2341X_DEC_PAUSE_PLAYBACK +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 13/0x0D + +Description +^^^^^^^^^^^ + +Freeze playback immediately. In this mode, when internal buffers are +full, no more data will be accepted and data request IRQs will be +masked. + +Param[0] +^^^^^^^^ + +Display: 0=last frame, 1=black + + + +CX2341X_DEC_HALT_FW +~~~~~~~~~~~~~~~~~~~ + +Enum: 14/0x0E + +Description +^^^^^^^^^^^ + +The firmware is halted and no further API calls are serviced until +the firmware is uploaded again. + + + +CX2341X_DEC_SET_STANDARD +~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 16/0x10 + +Description +^^^^^^^^^^^ + +Selects display standard + +Param[0] +^^^^^^^^ + +0=NTSC, 1=PAL + + + +CX2341X_DEC_GET_VERSION +~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 17/0x11 + +Description +^^^^^^^^^^^ + +Returns decoder firmware version information + +Result[0] +^^^^^^^^^ + +Version bitmask: + - Bits 0:15 build + - Bits 16:23 minor + - Bits 24:31 major + + + +CX2341X_DEC_SET_STREAM_INPUT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 20/0x14 + +Description +^^^^^^^^^^^ + +Select decoder stream input port + +Param[0] +^^^^^^^^ + +0=memory (default), 1=streaming + + + +CX2341X_DEC_GET_TIMING_INFO +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 21/0x15 + +Description +^^^^^^^^^^^ + +Returns timing information from start of playback + +Result[0] +^^^^^^^^^ + +Frame count by decode order + +Result[1] +^^^^^^^^^ + +Video PTS bits 0:31 by display order + +Result[2] +^^^^^^^^^ + +Video PTS bit 32 by display order + +Result[3] +^^^^^^^^^ + +SCR bits 0:31 by display order + +Result[4] +^^^^^^^^^ + +SCR bit 32 by display order + + + +CX2341X_DEC_SET_AUDIO_MODE +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 22/0x16 + +Description +^^^^^^^^^^^ + +Select audio mode + +Param[0] +^^^^^^^^ + +Dual mono mode action + 0=Stereo, 1=Left, 2=Right, 3=Mono, 4=Swap, -1=Unchanged + +Param[1] +^^^^^^^^ + +Stereo mode action: + 0=Stereo, 1=Left, 2=Right, 3=Mono, 4=Swap, -1=Unchanged + + + +CX2341X_DEC_SET_EVENT_NOTIFICATION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 23/0x17 + +Description +^^^^^^^^^^^ + +Setup firmware to notify the host about a particular event. +Counterpart to API 0xD5 + +Param[0] +^^^^^^^^ + +Event: + - 0=Audio mode change between mono, (joint) stereo and dual channel. + - 3=Decoder started + - 4=Unknown: goes off 10-15 times per second while decoding. + - 5=Some sync event: goes off once per frame. + +Param[1] +^^^^^^^^ + +Notification 0=disabled, 1=enabled + +Param[2] +^^^^^^^^ + +Interrupt bit + +Param[3] +^^^^^^^^ + +Mailbox slot, -1 if no mailbox required. + + + +CX2341X_DEC_SET_DISPLAY_BUFFERS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 24/0x18 + +Description +^^^^^^^^^^^ + +Number of display buffers. To decode all frames in reverse playback you +must use nine buffers. + +Param[0] +^^^^^^^^ + +0=six buffers, 1=nine buffers + + + +CX2341X_DEC_EXTRACT_VBI +~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 25/0x19 + +Description +^^^^^^^^^^^ + +Extracts VBI data + +Param[0] +^^^^^^^^ + +0=extract from extension & user data, 1=extract from private packets + +Result[0] +^^^^^^^^^ + +VBI table location + +Result[1] +^^^^^^^^^ + +VBI table size + + + +CX2341X_DEC_SET_DECODER_SOURCE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 26/0x1A + +Description +^^^^^^^^^^^ + +Selects decoder source. Ensure that the parameters passed to this +API match the encoder settings. + +Param[0] +^^^^^^^^ + +Mode: 0=MPEG from host, 1=YUV from encoder, 2=YUV from host + +Param[1] +^^^^^^^^ + +YUV picture width + +Param[2] +^^^^^^^^ + +YUV picture height + +Param[3] +^^^^^^^^ + +Bitmap: see Param[0] of API 0xBD + + + +CX2341X_DEC_SET_PREBUFFERING +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enum: 30/0x1E + +Description +^^^^^^^^^^^ + +Decoder prebuffering, when enabled up to 128KB are buffered for +streams <8mpbs or 640KB for streams >8mbps + +Param[0] +^^^^^^^^ + +0=off, 1=on + +PVR350 Video decoder registers 0x02002800 -> 0x02002B00 +------------------------------------------------------- + +Author: Ian Armstrong <ian@iarmst.demon.co.uk> + +Version: v0.4 + +Date: 12 March 2007 + + +This list has been worked out through trial and error. There will be mistakes +and omissions. Some registers have no obvious effect so it's hard to say what +they do, while others interact with each other, or require a certain load +sequence. Horizontal filter setup is one example, with six registers working +in unison and requiring a certain load sequence to correctly configure. The +indexed colour palette is much easier to set at just two registers, but again +it requires a certain load sequence. + +Some registers are fussy about what they are set to. Load in a bad value & the +decoder will fail. A firmware reload will often recover, but sometimes a reset +is required. For registers containing size information, setting them to 0 is +generally a bad idea. For other control registers i.e. 2878, you'll only find +out what values are bad when it hangs. + +.. code-block:: none + + -------------------------------------------------------------------------------- + 2800 + bit 0 + Decoder enable + 0 = disable + 1 = enable + -------------------------------------------------------------------------------- + 2804 + bits 0:31 + Decoder horizontal Y alias register 1 + --------------- + 2808 + bits 0:31 + Decoder horizontal Y alias register 2 + --------------- + 280C + bits 0:31 + Decoder horizontal Y alias register 3 + --------------- + 2810 + bits 0:31 + Decoder horizontal Y alias register 4 + --------------- + 2814 + bits 0:31 + Decoder horizontal Y alias register 5 + --------------- + 2818 + bits 0:31 + Decoder horizontal Y alias trigger + + These six registers control the horizontal aliasing filter for the Y plane. + The first five registers must all be loaded before accessing the trigger + (2818), as this register actually clocks the data through for the first + five. + + To correctly program set the filter, this whole procedure must be done 16 + times. The actual register contents are copied from a lookup-table in the + firmware which contains 4 different filter settings. + + -------------------------------------------------------------------------------- + 281C + bits 0:31 + Decoder horizontal UV alias register 1 + --------------- + 2820 + bits 0:31 + Decoder horizontal UV alias register 2 + --------------- + 2824 + bits 0:31 + Decoder horizontal UV alias register 3 + --------------- + 2828 + bits 0:31 + Decoder horizontal UV alias register 4 + --------------- + 282C + bits 0:31 + Decoder horizontal UV alias register 5 + --------------- + 2830 + bits 0:31 + Decoder horizontal UV alias trigger + + These six registers control the horizontal aliasing for the UV plane. + Operation is the same as the Y filter, with 2830 being the trigger + register. + + -------------------------------------------------------------------------------- + 2834 + bits 0:15 + Decoder Y source width in pixels + + bits 16:31 + Decoder Y destination width in pixels + --------------- + 2838 + bits 0:15 + Decoder UV source width in pixels + + bits 16:31 + Decoder UV destination width in pixels + + NOTE: For both registers, the resulting image must be fully visible on + screen. If the image exceeds the right edge both the source and destination + size must be adjusted to reflect the visible portion. For the source width, + you must take into account the scaling when calculating the new value. + -------------------------------------------------------------------------------- + + 283C + bits 0:31 + Decoder Y horizontal scaling + Normally = Reg 2854 >> 2 + --------------- + 2840 + bits 0:31 + Decoder ?? unknown - horizontal scaling + Usually 0x00080514 + --------------- + 2844 + bits 0:31 + Decoder UV horizontal scaling + Normally = Reg 2854 >> 2 + --------------- + 2848 + bits 0:31 + Decoder ?? unknown - horizontal scaling + Usually 0x00100514 + --------------- + 284C + bits 0:31 + Decoder ?? unknown - Y plane + Usually 0x00200020 + --------------- + 2850 + bits 0:31 + Decoder ?? unknown - UV plane + Usually 0x00200020 + --------------- + 2854 + bits 0:31 + Decoder 'master' value for horizontal scaling + --------------- + 2858 + bits 0:31 + Decoder ?? unknown + Usually 0 + --------------- + 285C + bits 0:31 + Decoder ?? unknown + Normally = Reg 2854 >> 1 + --------------- + 2860 + bits 0:31 + Decoder ?? unknown + Usually 0 + --------------- + 2864 + bits 0:31 + Decoder ?? unknown + Normally = Reg 2854 >> 1 + --------------- + 2868 + bits 0:31 + Decoder ?? unknown + Usually 0 + + Most of these registers either control horizontal scaling, or appear linked + to it in some way. Register 2854 contains the 'master' value & the other + registers can be calculated from that one. You must also remember to + correctly set the divider in Reg 2874. + + To enlarge: + Reg 2854 = (source_width * 0x00200000) / destination_width + Reg 2874 = No divide + + To reduce from full size down to half size: + Reg 2854 = (source_width/2 * 0x00200000) / destination width + Reg 2874 = Divide by 2 + + To reduce from half size down to quarter size: + Reg 2854 = (source_width/4 * 0x00200000) / destination width + Reg 2874 = Divide by 4 + + The result is always rounded up. + + -------------------------------------------------------------------------------- + 286C + bits 0:15 + Decoder horizontal Y buffer offset + + bits 15:31 + Decoder horizontal UV buffer offset + + Offset into the video image buffer. If the offset is gradually incremented, + the on screen image will move left & wrap around higher up on the right. + + -------------------------------------------------------------------------------- + 2870 + bits 0:15 + Decoder horizontal Y output offset + + bits 16:31 + Decoder horizontal UV output offset + + Offsets the actual video output. Controls output alignment of the Y & UV + planes. The higher the value, the greater the shift to the left. Use + reg 2890 to move the image right. + + -------------------------------------------------------------------------------- + 2874 + bits 0:1 + Decoder horizontal Y output size divider + 00 = No divide + 01 = Divide by 2 + 10 = Divide by 3 + + bits 4:5 + Decoder horizontal UV output size divider + 00 = No divide + 01 = Divide by 2 + 10 = Divide by 3 + + bit 8 + Decoder ?? unknown + 0 = Normal + 1 = Affects video output levels + + bit 16 + Decoder ?? unknown + 0 = Normal + 1 = Disable horizontal filter + + -------------------------------------------------------------------------------- + 2878 + bit 0 + ?? unknown + + bit 1 + osd on/off + 0 = osd off + 1 = osd on + + bit 2 + Decoder + osd video timing + 0 = NTSC + 1 = PAL + + bits 3:4 + ?? unknown + + bit 5 + Decoder + osd + Swaps upper & lower fields + + -------------------------------------------------------------------------------- + 287C + bits 0:10 + Decoder & osd ?? unknown + Moves entire screen horizontally. Starts at 0x005 with the screen + shifted heavily to the right. Incrementing in steps of 0x004 will + gradually shift the screen to the left. + + bits 11:31 + ?? unknown + + Normally contents are 0x00101111 (NTSC) or 0x1010111d (PAL) + + -------------------------------------------------------------------------------- + 2880 -------- ?? unknown + 2884 -------- ?? unknown + -------------------------------------------------------------------------------- + 2888 + bit 0 + Decoder + osd ?? unknown + 0 = Normal + 1 = Misaligned fields (Correctable through 289C & 28A4) + + bit 4 + ?? unknown + + bit 8 + ?? unknown + + Warning: Bad values will require a firmware reload to recover. + Known to be bad are 0x000,0x011,0x100,0x111 + -------------------------------------------------------------------------------- + 288C + bits 0:15 + osd ?? unknown + Appears to affect the osd position stability. The higher the value the + more unstable it becomes. Decoder output remains stable. + + bits 16:31 + osd ?? unknown + Same as bits 0:15 + + -------------------------------------------------------------------------------- + 2890 + bits 0:11 + Decoder output horizontal offset. + + Horizontal offset moves the video image right. A small left shift is + possible, but it's better to use reg 2870 for that due to its greater + range. + + NOTE: Video corruption will occur if video window is shifted off the right + edge. To avoid this read the notes for 2834 & 2838. + -------------------------------------------------------------------------------- + 2894 + bits 0:23 + Decoder output video surround colour. + + Contains the colour (in yuv) used to fill the screen when the video is + running in a window. + -------------------------------------------------------------------------------- + 2898 + bits 0:23 + Decoder video window colour + Contains the colour (in yuv) used to fill the video window when the + video is turned off. + + bit 24 + Decoder video output + 0 = Video on + 1 = Video off + + bit 28 + Decoder plane order + 0 = Y,UV + 1 = UV,Y + + bit 29 + Decoder second plane byte order + 0 = Normal (UV) + 1 = Swapped (VU) + + In normal usage, the first plane is Y & the second plane is UV. Though the + order of the planes can be swapped, only the byte order of the second plane + can be swapped. This isn't much use for the Y plane, but can be useful for + the UV plane. + + -------------------------------------------------------------------------------- + 289C + bits 0:15 + Decoder vertical field offset 1 + + bits 16:31 + Decoder vertical field offset 2 + + Controls field output vertical alignment. The higher the number, the lower + the image on screen. Known starting values are 0x011E0017 (NTSC) & + 0x01500017 (PAL) + -------------------------------------------------------------------------------- + 28A0 + bits 0:15 + Decoder & osd width in pixels + + bits 16:31 + Decoder & osd height in pixels + + All output from the decoder & osd are disabled beyond this area. Decoder + output will simply go black outside of this region. If the osd tries to + exceed this area it will become corrupt. + -------------------------------------------------------------------------------- + 28A4 + bits 0:11 + osd left shift. + + Has a range of 0x770->0x7FF. With the exception of 0, any value outside of + this range corrupts the osd. + -------------------------------------------------------------------------------- + 28A8 + bits 0:15 + osd vertical field offset 1 + + bits 16:31 + osd vertical field offset 2 + + Controls field output vertical alignment. The higher the number, the lower + the image on screen. Known starting values are 0x011E0017 (NTSC) & + 0x01500017 (PAL) + -------------------------------------------------------------------------------- + 28AC -------- ?? unknown + | + V + 28BC -------- ?? unknown + -------------------------------------------------------------------------------- + 28C0 + bit 0 + Current output field + 0 = first field + 1 = second field + + bits 16:31 + Current scanline + The scanline counts from the top line of the first field + through to the last line of the second field. + -------------------------------------------------------------------------------- + 28C4 -------- ?? unknown + | + V + 28F8 -------- ?? unknown + -------------------------------------------------------------------------------- + 28FC + bit 0 + ?? unknown + 0 = Normal + 1 = Breaks decoder & osd output + -------------------------------------------------------------------------------- + 2900 + bits 0:31 + Decoder vertical Y alias register 1 + --------------- + 2904 + bits 0:31 + Decoder vertical Y alias register 2 + --------------- + 2908 + bits 0:31 + Decoder vertical Y alias trigger + + These three registers control the vertical aliasing filter for the Y plane. + Operation is similar to the horizontal Y filter (2804). The only real + difference is that there are only two registers to set before accessing + the trigger register (2908). As for the horizontal filter, the values are + taken from a lookup table in the firmware, and the procedure must be + repeated 16 times to fully program the filter. + -------------------------------------------------------------------------------- + 290C + bits 0:31 + Decoder vertical UV alias register 1 + --------------- + 2910 + bits 0:31 + Decoder vertical UV alias register 2 + --------------- + 2914 + bits 0:31 + Decoder vertical UV alias trigger + + These three registers control the vertical aliasing filter for the UV + plane. Operation is the same as the Y filter, with 2914 being the trigger. + -------------------------------------------------------------------------------- + 2918 + bits 0:15 + Decoder Y source height in pixels + + bits 16:31 + Decoder Y destination height in pixels + --------------- + 291C + bits 0:15 + Decoder UV source height in pixels divided by 2 + + bits 16:31 + Decoder UV destination height in pixels + + NOTE: For both registers, the resulting image must be fully visible on + screen. If the image exceeds the bottom edge both the source and + destination size must be adjusted to reflect the visible portion. For the + source height, you must take into account the scaling when calculating the + new value. + -------------------------------------------------------------------------------- + 2920 + bits 0:31 + Decoder Y vertical scaling + Normally = Reg 2930 >> 2 + --------------- + 2924 + bits 0:31 + Decoder Y vertical scaling + Normally = Reg 2920 + 0x514 + --------------- + 2928 + bits 0:31 + Decoder UV vertical scaling + When enlarging = Reg 2930 >> 2 + When reducing = Reg 2930 >> 3 + --------------- + 292C + bits 0:31 + Decoder UV vertical scaling + Normally = Reg 2928 + 0x514 + --------------- + 2930 + bits 0:31 + Decoder 'master' value for vertical scaling + --------------- + 2934 + bits 0:31 + Decoder ?? unknown - Y vertical scaling + --------------- + 2938 + bits 0:31 + Decoder Y vertical scaling + Normally = Reg 2930 + --------------- + 293C + bits 0:31 + Decoder ?? unknown - Y vertical scaling + --------------- + 2940 + bits 0:31 + Decoder UV vertical scaling + When enlarging = Reg 2930 >> 1 + When reducing = Reg 2930 + --------------- + 2944 + bits 0:31 + Decoder ?? unknown - UV vertical scaling + --------------- + 2948 + bits 0:31 + Decoder UV vertical scaling + Normally = Reg 2940 + --------------- + 294C + bits 0:31 + Decoder ?? unknown - UV vertical scaling + + Most of these registers either control vertical scaling, or appear linked + to it in some way. Register 2930 contains the 'master' value & all other + registers can be calculated from that one. You must also remember to + correctly set the divider in Reg 296C + + To enlarge: + Reg 2930 = (source_height * 0x00200000) / destination_height + Reg 296C = No divide + + To reduce from full size down to half size: + Reg 2930 = (source_height/2 * 0x00200000) / destination height + Reg 296C = Divide by 2 + + To reduce from half down to quarter. + Reg 2930 = (source_height/4 * 0x00200000) / destination height + Reg 296C = Divide by 4 + + -------------------------------------------------------------------------------- + 2950 + bits 0:15 + Decoder Y line index into display buffer, first field + + bits 16:31 + Decoder Y vertical line skip, first field + -------------------------------------------------------------------------------- + 2954 + bits 0:15 + Decoder Y line index into display buffer, second field + + bits 16:31 + Decoder Y vertical line skip, second field + -------------------------------------------------------------------------------- + 2958 + bits 0:15 + Decoder UV line index into display buffer, first field + + bits 16:31 + Decoder UV vertical line skip, first field + -------------------------------------------------------------------------------- + 295C + bits 0:15 + Decoder UV line index into display buffer, second field + + bits 16:31 + Decoder UV vertical line skip, second field + -------------------------------------------------------------------------------- + 2960 + bits 0:15 + Decoder destination height minus 1 + + bits 16:31 + Decoder destination height divided by 2 + -------------------------------------------------------------------------------- + 2964 + bits 0:15 + Decoder Y vertical offset, second field + + bits 16:31 + Decoder Y vertical offset, first field + + These two registers shift the Y plane up. The higher the number, the + greater the shift. + -------------------------------------------------------------------------------- + 2968 + bits 0:15 + Decoder UV vertical offset, second field + + bits 16:31 + Decoder UV vertical offset, first field + + These two registers shift the UV plane up. The higher the number, the + greater the shift. + -------------------------------------------------------------------------------- + 296C + bits 0:1 + Decoder vertical Y output size divider + 00 = No divide + 01 = Divide by 2 + 10 = Divide by 4 + + bits 8:9 + Decoder vertical UV output size divider + 00 = No divide + 01 = Divide by 2 + 10 = Divide by 4 + -------------------------------------------------------------------------------- + 2970 + bit 0 + Decoder ?? unknown + 0 = Normal + 1 = Affect video output levels + + bit 16 + Decoder ?? unknown + 0 = Normal + 1 = Disable vertical filter + + -------------------------------------------------------------------------------- + 2974 -------- ?? unknown + | + V + 29EF -------- ?? unknown + -------------------------------------------------------------------------------- + 2A00 + bits 0:2 + osd colour mode + 000 = 8 bit indexed + 001 = 16 bit (565) + 010 = 15 bit (555) + 011 = 12 bit (444) + 100 = 32 bit (8888) + + bits 4:5 + osd display bpp + 01 = 8 bit + 10 = 16 bit + 11 = 32 bit + + bit 8 + osd global alpha + 0 = Off + 1 = On + + bit 9 + osd local alpha + 0 = Off + 1 = On + + bit 10 + osd colour key + 0 = Off + 1 = On + + bit 11 + osd ?? unknown + Must be 1 + + bit 13 + osd colour space + 0 = ARGB + 1 = AYVU + + bits 16:31 + osd ?? unknown + Must be 0x001B (some kind of buffer pointer ?) + + When the bits-per-pixel is set to 8, the colour mode is ignored and + assumed to be 8 bit indexed. For 16 & 32 bits-per-pixel the colour depth + is honoured, and when using a colour depth that requires fewer bytes than + allocated the extra bytes are used as padding. So for a 32 bpp with 8 bit + index colour, there are 3 padding bytes per pixel. It's also possible to + select 16bpp with a 32 bit colour mode. This results in the pixel width + being doubled, but the color key will not work as expected in this mode. + + Colour key is as it suggests. You designate a colour which will become + completely transparent. When using 565, 555 or 444 colour modes, the + colour key is always 16 bits wide. The colour to key on is set in Reg 2A18. + + Local alpha works differently depending on the colour mode. For 32bpp & 8 + bit indexed, local alpha is a per-pixel 256 step transparency, with 0 being + transparent and 255 being solid. For the 16bpp modes 555 & 444, the unused + bit(s) act as a simple transparency switch, with 0 being solid & 1 being + fully transparent. There is no local alpha support for 16bit 565. + + Global alpha is a 256 step transparency that applies to the entire osd, + with 0 being transparent & 255 being solid. + + It's possible to combine colour key, local alpha & global alpha. + -------------------------------------------------------------------------------- + 2A04 + bits 0:15 + osd x coord for left edge + + bits 16:31 + osd y coord for top edge + --------------- + 2A08 + bits 0:15 + osd x coord for right edge + + bits 16:31 + osd y coord for bottom edge + + For both registers, (0,0) = top left corner of the display area. These + registers do not control the osd size, only where it's positioned & how + much is visible. The visible osd area cannot exceed the right edge of the + display, otherwise the osd will become corrupt. See reg 2A10 for + setting osd width. + -------------------------------------------------------------------------------- + 2A0C + bits 0:31 + osd buffer index + + An index into the osd buffer. Slowly incrementing this moves the osd left, + wrapping around onto the right edge + -------------------------------------------------------------------------------- + 2A10 + bits 0:11 + osd buffer 32 bit word width + + Contains the width of the osd measured in 32 bit words. This means that all + colour modes are restricted to a byte width which is divisible by 4. + -------------------------------------------------------------------------------- + 2A14 + bits 0:15 + osd height in pixels + + bits 16:32 + osd line index into buffer + osd will start displaying from this line. + -------------------------------------------------------------------------------- + 2A18 + bits 0:31 + osd colour key + + Contains the colour value which will be transparent. + -------------------------------------------------------------------------------- + 2A1C + bits 0:7 + osd global alpha + + Contains the global alpha value (equiv ivtvfbctl --alpha XX) + -------------------------------------------------------------------------------- + 2A20 -------- ?? unknown + | + V + 2A2C -------- ?? unknown + -------------------------------------------------------------------------------- + 2A30 + bits 0:7 + osd colour to change in indexed palette + --------------- + 2A34 + bits 0:31 + osd colour for indexed palette + + To set the new palette, first load the index of the colour to change into + 2A30, then load the new colour into 2A34. The full palette is 256 colours, + so the index range is 0x00-0xFF + -------------------------------------------------------------------------------- + 2A38 -------- ?? unknown + 2A3C -------- ?? unknown + -------------------------------------------------------------------------------- + 2A40 + bits 0:31 + osd ?? unknown + + Affects overall brightness, wrapping around to black + -------------------------------------------------------------------------------- + 2A44 + bits 0:31 + osd ?? unknown + + Green tint + -------------------------------------------------------------------------------- + 2A48 + bits 0:31 + osd ?? unknown + + Red tint + -------------------------------------------------------------------------------- + 2A4C + bits 0:31 + osd ?? unknown + + Affects overall brightness, wrapping around to black + -------------------------------------------------------------------------------- + 2A50 + bits 0:31 + osd ?? unknown + + Colour shift + -------------------------------------------------------------------------------- + 2A54 + bits 0:31 + osd ?? unknown + + Colour shift + -------------------------------------------------------------------------------- + 2A58 -------- ?? unknown + | + V + 2AFC -------- ?? unknown + -------------------------------------------------------------------------------- + 2B00 + bit 0 + osd filter control + 0 = filter off + 1 = filter on + + bits 1:4 + osd ?? unknown + + -------------------------------------------------------------------------------- + +The cx231xx DMA engine +---------------------- + + +This page describes the structures and procedures used by the cx2341x DMA +engine. + +Introduction +~~~~~~~~~~~~ + +The cx2341x PCI interface is busmaster capable. This means it has a DMA +engine to efficiently transfer large volumes of data between the card and main +memory without requiring help from a CPU. Like most hardware, it must operate +on contiguous physical memory. This is difficult to come by in large quantities +on virtual memory machines. + +Therefore, it also supports a technique called "scatter-gather". The card can +transfer multiple buffers in one operation. Instead of allocating one large +contiguous buffer, the driver can allocate several smaller buffers. + +In practice, I've seen the average transfer to be roughly 80K, but transfers +above 128K were not uncommon, particularly at startup. The 128K figure is +important, because that is the largest block that the kernel can normally +allocate. Even still, 128K blocks are hard to come by, so the driver writer is +urged to choose a smaller block size and learn the scatter-gather technique. + +Mailbox #10 is reserved for DMA transfer information. + +Note: the hardware expects little-endian data ('intel format'). + +Flow +~~~~ + +This section describes, in general, the order of events when handling DMA +transfers. Detailed information follows this section. + +- The card raises the Encoder interrupt. +- The driver reads the transfer type, offset and size from Mailbox #10. +- The driver constructs the scatter-gather array from enough free dma buffers + to cover the size. +- The driver schedules the DMA transfer via the ScheduleDMAtoHost API call. +- The card raises the DMA Complete interrupt. +- The driver checks the DMA status register for any errors. +- The driver post-processes the newly transferred buffers. + +NOTE! It is possible that the Encoder and DMA Complete interrupts get raised +simultaneously. (End of the last, start of the next, etc.) + +Mailbox #10 +~~~~~~~~~~~ + +The Flags, Command, Return Value and Timeout fields are ignored. + +- Name: Mailbox #10 +- Results[0]: Type: 0: MPEG. +- Results[1]: Offset: The position relative to the card's memory space. +- Results[2]: Size: The exact number of bytes to transfer. + +My speculation is that since the StartCapture API has a capture type of "RAW" +available, that the type field will have other values that correspond to YUV +and PCM data. + +Scatter-Gather Array +~~~~~~~~~~~~~~~~~~~~ + +The scatter-gather array is a contiguously allocated block of memory that +tells the card the source and destination of each data-block to transfer. +Card "addresses" are derived from the offset supplied by Mailbox #10. Host +addresses are the physical memory location of the target DMA buffer. + +Each S-G array element is a struct of three 32-bit words. The first word is +the source address, the second is the destination address. Both take up the +entire 32 bits. The lowest 18 bits of the third word is the transfer byte +count. The high-bit of the third word is the "last" flag. The last-flag tells +the card to raise the DMA_DONE interrupt. From hard personal experience, if +you forget to set this bit, the card will still "work" but the stream will +most likely get corrupted. + +The transfer count must be a multiple of 256. Therefore, the driver will need +to track how much data in the target buffer is valid and deal with it +accordingly. + +Array Element: + +- 32-bit Source Address +- 32-bit Destination Address +- 14-bit reserved (high bit is the last flag) +- 18-bit byte count + +DMA Transfer Status +~~~~~~~~~~~~~~~~~~~ + +Register 0x0004 holds the DMA Transfer Status: + +- bit 0: read completed +- bit 1: write completed +- bit 2: DMA read error +- bit 3: DMA write error +- bit 4: Scatter-Gather array error diff --git a/Documentation/driver-api/media/drivers/cx88-devel.rst b/Documentation/driver-api/media/drivers/cx88-devel.rst new file mode 100644 index 0000000000..cfe7c03f49 --- /dev/null +++ b/Documentation/driver-api/media/drivers/cx88-devel.rst @@ -0,0 +1,113 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The cx88 driver +=============== + +Author: Gerd Hoffmann + +Documentation missing at the cx88 datasheet +------------------------------------------- + +MO_OUTPUT_FORMAT (0x310164) + +.. code-block:: none + + Previous default from DScaler: 0x1c1f0008 + Digit 8: 31-28 + 28: PREVREMOD = 1 + + Digit 7: 27-24 (0xc = 12 = b1100 ) + 27: COMBALT = 1 + 26: PAL_INV_PHASE + (DScaler apparently set this to 1, resulted in sucky picture) + + Digits 6,5: 23-16 + 25-16: COMB_RANGE = 0x1f [default] (9 bits -> max 512) + + Digit 4: 15-12 + 15: DISIFX = 0 + 14: INVCBF = 0 + 13: DISADAPT = 0 + 12: NARROWADAPT = 0 + + Digit 3: 11-8 + 11: FORCE2H + 10: FORCEREMD + 9: NCHROMAEN + 8: NREMODEN + + Digit 2: 7-4 + 7-6: YCORE + 5-4: CCORE + + Digit 1: 3-0 + 3: RANGE = 1 + 2: HACTEXT + 1: HSFMT + +0x47 is the sync byte for MPEG-2 transport stream packets. +Datasheet incorrectly states to use 47 decimal. 188 is the length. +All DVB compliant frontends output packets with this start code. + +Hauppauge WinTV cx88 IR information +----------------------------------- + +The controls for the mux are GPIO [0,1] for source, and GPIO 2 for muting. + +====== ======== ================================================= +GPIO0 GPIO1 +====== ======== ================================================= + 0 0 TV Audio + 1 0 FM radio + 0 1 Line-In + 1 1 Mono tuner bypass or CD passthru (tuner specific) +====== ======== ================================================= + +GPIO 16(I believe) is tied to the IR port (if present). + + +From the data sheet: + +- Register 24'h20004 PCI Interrupt Status + + - bit [18] IR_SMP_INT Set when 32 input samples have been collected over + - gpio[16] pin into GP_SAMPLE register. + +What's missing from the data sheet: + +- Setup 4KHz sampling rate (roughly 2x oversampled; good enough for our RC5 + compat remote) +- set register 0x35C050 to 0xa80a80 +- enable sampling +- set register 0x35C054 to 0x5 +- enable the IRQ bit 18 in the interrupt mask register (and + provide for a handler) + +GP_SAMPLE register is at 0x35C058 + +Bits are then right shifted into the GP_SAMPLE register at the specified +rate; you get an interrupt when a full DWORD is received. +You need to recover the actual RC5 bits out of the (oversampled) IR sensor +bits. (Hint: look for the 0/1and 1/0 crossings of the RC5 bi-phase data) An +actual raw RC5 code will span 2-3 DWORDS, depending on the actual alignment. + +I'm pretty sure when no IR signal is present the receiver is always in a +marking state(1); but stray light, etc can cause intermittent noise values +as well. Remember, this is a free running sample of the IR receiver state +over time, so don't assume any sample starts at any particular place. + +Additional info +~~~~~~~~~~~~~~~ + +This data sheet (google search) seems to have a lovely description of the +RC5 basics: +http://www.atmel.com/dyn/resources/prod_documents/doc2817.pdf + +This document has more data: +http://www.nenya.be/beor/electronics/rc5.htm + +This document has a how to decode a bi-phase data stream: +http://www.ee.washington.edu/circuit_archive/text/ir_decode.txt + +This document has still more info: +http://www.xs4all.nl/~sbp/knowledge/ir/rc5.htm diff --git a/Documentation/driver-api/media/drivers/dvb-usb.rst b/Documentation/driver-api/media/drivers/dvb-usb.rst new file mode 100644 index 0000000000..b2d5d9e62b --- /dev/null +++ b/Documentation/driver-api/media/drivers/dvb-usb.rst @@ -0,0 +1,357 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Idea behind the dvb-usb-framework +================================= + +.. note:: + + #) This documentation is outdated. Please check at the DVB wiki + at https://linuxtv.org/wiki for more updated info. + + #) **deprecated:** Newer DVB USB drivers should use the dvb-usb-v2 framework. + +In March 2005 I got the new Twinhan USB2.0 DVB-T device. They provided specs +and a firmware. + +Quite keen I wanted to put the driver (with some quirks of course) into dibusb. +After reading some specs and doing some USB snooping, it realized, that the +dibusb-driver would be a complete mess afterwards. So I decided to do it in a +different way: With the help of a dvb-usb-framework. + +The framework provides generic functions (mostly kernel API calls), such as: + +- Transport Stream URB handling in conjunction with dvb-demux-feed-control + (bulk and isoc are supported) +- registering the device for the DVB-API +- registering an I2C-adapter if applicable +- remote-control/input-device handling +- firmware requesting and loading (currently just for the Cypress USB + controllers) +- other functions/methods which can be shared by several drivers (such as + functions for bulk-control-commands) +- TODO: a I2C-chunker. It creates device-specific chunks of register-accesses + depending on length of a register and the number of values that can be + multi-written and multi-read. + +The source code of the particular DVB USB devices does just the communication +with the device via the bus. The connection between the DVB-API-functionality +is done via callbacks, assigned in a static device-description (struct +dvb_usb_device) each device-driver has to have. + +For an example have a look in drivers/media/usb/dvb-usb/vp7045*. + +Objective is to migrate all the usb-devices (dibusb, cinergyT2, maybe the +ttusb; flexcop-usb already benefits from the generic flexcop-device) to use +the dvb-usb-lib. + +TODO: dynamic enabling and disabling of the pid-filter in regard to number of +feeds requested. + +Supported devices +----------------- + +See the LinuxTV DVB Wiki at https://linuxtv.org for a complete list of +cards/drivers/firmwares: +https://linuxtv.org/wiki/index.php/DVB_USB + +0. History & News: + + 2005-06-30 + + - added support for WideView WT-220U (Thanks to Steve Chang) + + 2005-05-30 + + - added basic isochronous support to the dvb-usb-framework + - added support for Conexant Hybrid reference design and Nebula + DigiTV USB + + 2005-04-17 + + - all dibusb devices ported to make use of the dvb-usb-framework + + 2005-04-02 + + - re-enabled and improved remote control code. + + 2005-03-31 + + - ported the Yakumo/Hama/Typhoon DVB-T USB2.0 device to dvb-usb. + + 2005-03-30 + + - first commit of the dvb-usb-module based on the dibusb-source. + First device is a new driver for the + TwinhanDTV Alpha / MagicBox II USB2.0-only DVB-T device. + - (change from dvb-dibusb to dvb-usb) + + 2005-03-28 + + - added support for the AVerMedia AverTV DVB-T USB2.0 device + (Thanks to Glen Harris and Jiun-Kuei Jung, AVerMedia) + + 2005-03-14 + + - added support for the Typhoon/Yakumo/HAMA DVB-T mobile USB2.0 + + 2005-02-11 + + - added support for the KWorld/ADSTech Instant DVB-T USB2.0. + Thanks a lot to Joachim von Caron + + 2005-02-02 + - added support for the Hauppauge Win-TV Nova-T USB2 + + 2005-01-31 + - distorted streaming is gone for USB1.1 devices + + 2005-01-13 + + - moved the mirrored pid_filter_table back to dvb-dibusb + first almost working version for HanfTek UMT-010 + found out, that Yakumo/HAMA/Typhoon are predecessors of the HanfTek UMT-010 + + 2005-01-10 + + - refactoring completed, now everything is very delightful + + - tuner quirks for some weird devices (Artec T1 AN2235 device has sometimes a + Panasonic Tuner assembled). Tunerprobing implemented. + Thanks a lot to Gunnar Wittich. + + 2004-12-29 + + - after several days of struggling around bug of no returning URBs fixed. + + 2004-12-26 + + - refactored the dibusb-driver, split into separate files + - i2c-probing enabled + + 2004-12-06 + + - possibility for demod i2c-address probing + - new usb IDs (Compro, Artec) + + 2004-11-23 + + - merged changes from DiB3000MC_ver2.1 + - revised the debugging + - possibility to deliver the complete TS for USB2.0 + + 2004-11-21 + + - first working version of the dib3000mc/p frontend driver. + + 2004-11-12 + + - added additional remote control keys. Thanks to Uwe Hanke. + + 2004-11-07 + + - added remote control support. Thanks to David Matthews. + + 2004-11-05 + + - added support for a new devices (Grandtec/Avermedia/Artec) + - merged my changes (for dib3000mb/dibusb) to the FE_REFACTORING, because it became HEAD + - moved transfer control (pid filter, fifo control) from usb driver to frontend, it seems + better settled there (added xfer_ops-struct) + - created a common files for frontends (mc/p/mb) + + 2004-09-28 + + - added support for a new device (Unknown, vendor ID is Hyper-Paltek) + + 2004-09-20 + + - added support for a new device (Compro DVB-U2000), thanks + to Amaury Demol for reporting + - changed usb TS transfer method (several urbs, stopping transfer + before setting a new pid) + + 2004-09-13 + + - added support for a new device (Artec T1 USB TVBOX), thanks + to Christian Motschke for reporting + + 2004-09-05 + + - released the dibusb device and dib3000mb-frontend driver + (old news for vp7041.c) + + 2004-07-15 + + - found out, by accident, that the device has a TUA6010XS for PLL + + 2004-07-12 + + - figured out, that the driver should also work with the + CTS Portable (Chinese Television System) + + 2004-07-08 + + - firmware-extraction-2.422-problem solved, driver is now working + properly with firmware extracted from 2.422 + - #if for 2.6.4 (dvb), compile issue + - changed firmware handling, see vp7041.txt sec 1.1 + + 2004-07-02 + + - some tuner modifications, v0.1, cleanups, first public + + 2004-06-28 + + - now using the dvb_dmx_swfilter_packets, everything runs fine now + + 2004-06-27 + + - able to watch and switching channels (pre-alpha) + - no section filtering yet + + 2004-06-06 + + - first TS received, but kernel oops :/ + + 2004-05-14 + + - firmware loader is working + + 2004-05-11 + + - start writing the driver + +How to use? +----------- + +Firmware +~~~~~~~~ + +Most of the USB drivers need to download a firmware to the device before start +working. + +Have a look at the Wikipage for the DVB-USB-drivers to find out, which firmware +you need for your device: + +https://linuxtv.org/wiki/index.php/DVB_USB + +Compiling +~~~~~~~~~ + +Since the driver is in the linux kernel, activating the driver in +your favorite config-environment should sufficient. I recommend +to compile the driver as module. Hotplug does the rest. + +If you use dvb-kernel enter the build-2.6 directory run 'make' and 'insmod.sh +load' afterwards. + +Loading the drivers +~~~~~~~~~~~~~~~~~~~ + +Hotplug is able to load the driver, when it is needed (because you plugged +in the device). + +If you want to enable debug output, you have to load the driver manually and +from within the dvb-kernel cvs repository. + +first have a look, which debug level are available: + +.. code-block:: none + + # modinfo dvb-usb + # modinfo dvb-usb-vp7045 + + etc. + +.. code-block:: none + + modprobe dvb-usb debug=<level> + modprobe dvb-usb-vp7045 debug=<level> + etc. + +should do the trick. + +When the driver is loaded successfully, the firmware file was in +the right place and the device is connected, the "Power"-LED should be +turned on. + +At this point you should be able to start a dvb-capable application. I'm use +(t|s)zap, mplayer and dvbscan to test the basics. VDR-xine provides the +long-term test scenario. + +Known problems and bugs +----------------------- + +- Don't remove the USB device while running an DVB application, your system + will go crazy or die most likely. + +Adding support for devices +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TODO + +USB1.1 Bandwidth limitation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A lot of the currently supported devices are USB1.1 and thus they have a +maximum bandwidth of about 5-6 MBit/s when connected to a USB2.0 hub. +This is not enough for receiving the complete transport stream of a +DVB-T channel (which is about 16 MBit/s). Normally this is not a +problem, if you only want to watch TV (this does not apply for HDTV), +but watching a channel while recording another channel on the same +frequency simply does not work very well. This applies to all USB1.1 +DVB-T devices, not just the dvb-usb-devices) + +The bug, where the TS is distorted by a heavy usage of the device is gone +definitely. All dvb-usb-devices I was using (Twinhan, Kworld, DiBcom) are +working like charm now with VDR. Sometimes I even was able to record a channel +and watch another one. + +Comments +~~~~~~~~ + +Patches, comments and suggestions are very very welcome. + +3. Acknowledgements +------------------- + + Amaury Demol (Amaury.Demol@parrot.com) and Francois Kanounnikoff from DiBcom for + providing specs, code and help, on which the dvb-dibusb, dib3000mb and + dib3000mc are based. + + David Matthews for identifying a new device type (Artec T1 with AN2235) + and for extending dibusb with remote control event handling. Thank you. + + Alex Woods for frequently answering question about usb and dvb + stuff, a big thank you. + + Bernd Wagner for helping with huge bug reports and discussions. + + Gunnar Wittich and Joachim von Caron for their trust for providing + root-shells on their machines to implement support for new devices. + + Allan Third and Michael Hutchinson for their help to write the Nebula + digitv-driver. + + Glen Harris for bringing up, that there is a new dibusb-device and Jiun-Kuei + Jung from AVerMedia who kindly provided a special firmware to get the device + up and running in Linux. + + Jennifer Chen, Jeff and Jack from Twinhan for kindly supporting by + writing the vp7045-driver. + + Steve Chang from WideView for providing information for new devices and + firmware files. + + Michael Paxton for submitting remote control keymaps. + + Some guys on the linux-dvb mailing list for encouraging me. + + Peter Schildmann >peter.schildmann-nospam-at-web.de< for his + user-level firmware loader, which saves a lot of time + (when writing the vp7041 driver) + + Ulf Hermenau for helping me out with traditional chinese. + + André Smoktun and Christian Frömmel for supporting me with + hardware and listening to my problems very patiently. diff --git a/Documentation/driver-api/media/drivers/fimc-devel.rst b/Documentation/driver-api/media/drivers/fimc-devel.rst new file mode 100644 index 0000000000..4c6b7c8be1 --- /dev/null +++ b/Documentation/driver-api/media/drivers/fimc-devel.rst @@ -0,0 +1,33 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: <isonum.txt> + +The Samsung S5P/EXYNOS4 FIMC driver +=================================== + +Copyright |copy| 2012 - 2013 Samsung Electronics Co., Ltd. + +Files partitioning +------------------ + +- media device driver + + drivers/media/platform/samsung/exynos4-is/media-dev.[ch] + +- camera capture video device driver + + drivers/media/platform/samsung/exynos4-is/fimc-capture.c + +- MIPI-CSI2 receiver subdev + + drivers/media/platform/samsung/exynos4-is/mipi-csis.[ch] + +- video post-processor (mem-to-mem) + + drivers/media/platform/samsung/exynos4-is/fimc-core.c + +- common files + + drivers/media/platform/samsung/exynos4-is/fimc-core.h + drivers/media/platform/samsung/exynos4-is/fimc-reg.h + drivers/media/platform/samsung/exynos4-is/regs-fimc.h diff --git a/Documentation/driver-api/media/drivers/frontends.rst b/Documentation/driver-api/media/drivers/frontends.rst new file mode 100644 index 0000000000..7b8336ece6 --- /dev/null +++ b/Documentation/driver-api/media/drivers/frontends.rst @@ -0,0 +1,32 @@ +.. SPDX-License-Identifier: GPL-2.0 + +**************** +Frontend drivers +**************** + +Frontend attach headers +*********************** + +.. Keep it on alphabetic order + +.. kernel-doc:: drivers/media/dvb-frontends/a8293.h +.. kernel-doc:: drivers/media/dvb-frontends/af9013.h +.. kernel-doc:: drivers/media/dvb-frontends/ascot2e.h +.. kernel-doc:: drivers/media/dvb-frontends/cxd2820r.h +.. kernel-doc:: drivers/media/dvb-frontends/drxk.h +.. kernel-doc:: drivers/media/dvb-frontends/dvb-pll.h +.. kernel-doc:: drivers/media/dvb-frontends/helene.h +.. kernel-doc:: drivers/media/dvb-frontends/horus3a.h +.. kernel-doc:: drivers/media/dvb-frontends/ix2505v.h +.. kernel-doc:: drivers/media/dvb-frontends/m88ds3103.h +.. kernel-doc:: drivers/media/dvb-frontends/mb86a20s.h +.. kernel-doc:: drivers/media/dvb-frontends/mn88472.h +.. kernel-doc:: drivers/media/dvb-frontends/rtl2830.h +.. kernel-doc:: drivers/media/dvb-frontends/rtl2832.h +.. kernel-doc:: drivers/media/dvb-frontends/rtl2832_sdr.h +.. kernel-doc:: drivers/media/dvb-frontends/stb6000.h +.. kernel-doc:: drivers/media/dvb-frontends/tda10071.h +.. kernel-doc:: drivers/media/dvb-frontends/tda826x.h +.. kernel-doc:: drivers/media/dvb-frontends/zd1301_demod.h +.. kernel-doc:: drivers/media/dvb-frontends/zl10036.h + diff --git a/Documentation/driver-api/media/drivers/index.rst b/Documentation/driver-api/media/drivers/index.rst new file mode 100644 index 0000000000..c4123a16b5 --- /dev/null +++ b/Documentation/driver-api/media/drivers/index.rst @@ -0,0 +1,40 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: <isonum.txt> + +=================================== +Media driver-specific documentation +=================================== + +Video4Linux (V4L) drivers +========================= + +.. toctree:: + :maxdepth: 5 + + bttv-devel + cx2341x-devel + cx88-devel + fimc-devel + pvrusb2 + pxa_camera + radiotrack + rkisp1 + saa7134-devel + sh_mobile_ceu_camera + tuners + vimc-devel + zoran + ccs/ccs + + +Digital TV drivers +================== + +.. toctree:: + :maxdepth: 5 + + dvb-usb + frontends + vidtv + contributors diff --git a/Documentation/driver-api/media/drivers/pvrusb2.rst b/Documentation/driver-api/media/drivers/pvrusb2.rst new file mode 100644 index 0000000000..cbd9359c24 --- /dev/null +++ b/Documentation/driver-api/media/drivers/pvrusb2.rst @@ -0,0 +1,202 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The pvrusb2 driver +================== + +Author: Mike Isely <isely@pobox.com> + +Background +---------- + +This driver is intended for the "Hauppauge WinTV PVR USB 2.0", which +is a USB 2.0 hosted TV Tuner. This driver is a work in progress. +Its history started with the reverse-engineering effort by Björn +Danielsson <pvrusb2@dax.nu> whose web page can be found here: +http://pvrusb2.dax.nu/ + +From there Aurelien Alleaume <slts@free.fr> began an effort to +create a video4linux compatible driver. I began with Aurelien's +last known snapshot and evolved the driver to the state it is in +here. + +More information on this driver can be found at: +https://www.isely.net/pvrusb2.html + + +This driver has a strong separation of layers. They are very +roughly: + +1. Low level wire-protocol implementation with the device. + +2. I2C adaptor implementation and corresponding I2C client drivers + implemented elsewhere in V4L. + +3. High level hardware driver implementation which coordinates all + activities that ensure correct operation of the device. + +4. A "context" layer which manages instancing of driver, setup, + tear-down, arbitration, and interaction with high level + interfaces appropriately as devices are hotplugged in the + system. + +5. High level interfaces which glue the driver to various published + Linux APIs (V4L, sysfs, maybe DVB in the future). + +The most important shearing layer is between the top 2 layers. A +lot of work went into the driver to ensure that any kind of +conceivable API can be laid on top of the core driver. (Yes, the +driver internally leverages V4L to do its work but that really has +nothing to do with the API published by the driver to the outside +world.) The architecture allows for different APIs to +simultaneously access the driver. I have a strong sense of fairness +about APIs and also feel that it is a good design principle to keep +implementation and interface isolated from each other. Thus while +right now the V4L high level interface is the most complete, the +sysfs high level interface will work equally well for similar +functions, and there's no reason I see right now why it shouldn't be +possible to produce a DVB high level interface that can sit right +alongside V4L. + +Building +-------- + +To build these modules essentially amounts to just running "Make", +but you need the kernel source tree nearby and you will likely also +want to set a few controlling environment variables first in order +to link things up with that source tree. Please see the Makefile +here for comments that explain how to do that. + +Source file list / functional overview +-------------------------------------- + +(Note: The term "module" used below generally refers to loosely +defined functional units within the pvrusb2 driver and bears no +relation to the Linux kernel's concept of a loadable module.) + +pvrusb2-audio.[ch] - This is glue logic that resides between this + driver and the msp3400.ko I2C client driver (which is found + elsewhere in V4L). + +pvrusb2-context.[ch] - This module implements the context for an + instance of the driver. Everything else eventually ties back to + or is otherwise instanced within the data structures implemented + here. Hotplugging is ultimately coordinated here. All high level + interfaces tie into the driver through this module. This module + helps arbitrate each interface's access to the actual driver core, + and is designed to allow concurrent access through multiple + instances of multiple interfaces (thus you can for example change + the tuner's frequency through sysfs while simultaneously streaming + video through V4L out to an instance of mplayer). + +pvrusb2-debug.h - This header defines a printk() wrapper and a mask + of debugging bit definitions for the various kinds of debug + messages that can be enabled within the driver. + +pvrusb2-debugifc.[ch] - This module implements a crude command line + oriented debug interface into the driver. Aside from being part + of the process for implementing manual firmware extraction (see + the pvrusb2 web site mentioned earlier), probably I'm the only one + who has ever used this. It is mainly a debugging aid. + +pvrusb2-eeprom.[ch] - This is glue logic that resides between this + driver the tveeprom.ko module, which is itself implemented + elsewhere in V4L. + +pvrusb2-encoder.[ch] - This module implements all protocol needed to + interact with the Conexant mpeg2 encoder chip within the pvrusb2 + device. It is a crude echo of corresponding logic in ivtv, + however the design goals (strict isolation) and physical layer + (proxy through USB instead of PCI) are enough different that this + implementation had to be completely different. + +pvrusb2-hdw-internal.h - This header defines the core data structure + in the driver used to track ALL internal state related to control + of the hardware. Nobody outside of the core hardware-handling + modules should have any business using this header. All external + access to the driver should be through one of the high level + interfaces (e.g. V4L, sysfs, etc), and in fact even those high + level interfaces are restricted to the API defined in + pvrusb2-hdw.h and NOT this header. + +pvrusb2-hdw.h - This header defines the full internal API for + controlling the hardware. High level interfaces (e.g. V4L, sysfs) + will work through here. + +pvrusb2-hdw.c - This module implements all the various bits of logic + that handle overall control of a specific pvrusb2 device. + (Policy, instantiation, and arbitration of pvrusb2 devices fall + within the jurisdiction of pvrusb-context not here). + +pvrusb2-i2c-chips-\*.c - These modules implement the glue logic to + tie together and configure various I2C modules as they attach to + the I2C bus. There are two versions of this file. The "v4l2" + version is intended to be used in-tree alongside V4L, where we + implement just the logic that makes sense for a pure V4L + environment. The "all" version is intended for use outside of + V4L, where we might encounter other possibly "challenging" modules + from ivtv or older kernel snapshots (or even the support modules + in the standalone snapshot). + +pvrusb2-i2c-cmd-v4l1.[ch] - This module implements generic V4L1 + compatible commands to the I2C modules. It is here where state + changes inside the pvrusb2 driver are translated into V4L1 + commands that are in turn send to the various I2C modules. + +pvrusb2-i2c-cmd-v4l2.[ch] - This module implements generic V4L2 + compatible commands to the I2C modules. It is here where state + changes inside the pvrusb2 driver are translated into V4L2 + commands that are in turn send to the various I2C modules. + +pvrusb2-i2c-core.[ch] - This module provides an implementation of a + kernel-friendly I2C adaptor driver, through which other external + I2C client drivers (e.g. msp3400, tuner, lirc) may connect and + operate corresponding chips within the pvrusb2 device. It is + through here that other V4L modules can reach into this driver to + operate specific pieces (and those modules are in turn driven by + glue logic which is coordinated by pvrusb2-hdw, doled out by + pvrusb2-context, and then ultimately made available to users + through one of the high level interfaces). + +pvrusb2-io.[ch] - This module implements a very low level ring of + transfer buffers, required in order to stream data from the + device. This module is *very* low level. It only operates the + buffers and makes no attempt to define any policy or mechanism for + how such buffers might be used. + +pvrusb2-ioread.[ch] - This module layers on top of pvrusb2-io.[ch] + to provide a streaming API usable by a read() system call style of + I/O. Right now this is the only layer on top of pvrusb2-io.[ch], + however the underlying architecture here was intended to allow for + other styles of I/O to be implemented with additional modules, like + mmap()'ed buffers or something even more exotic. + +pvrusb2-main.c - This is the top level of the driver. Module level + and USB core entry points are here. This is our "main". + +pvrusb2-sysfs.[ch] - This is the high level interface which ties the + pvrusb2 driver into sysfs. Through this interface you can do + everything with the driver except actually stream data. + +pvrusb2-tuner.[ch] - This is glue logic that resides between this + driver and the tuner.ko I2C client driver (which is found + elsewhere in V4L). + +pvrusb2-util.h - This header defines some common macros used + throughout the driver. These macros are not really specific to + the driver, but they had to go somewhere. + +pvrusb2-v4l2.[ch] - This is the high level interface which ties the + pvrusb2 driver into video4linux. It is through here that V4L + applications can open and operate the driver in the usual V4L + ways. Note that **ALL** V4L functionality is published only + through here and nowhere else. + +pvrusb2-video-\*.[ch] - This is glue logic that resides between this + driver and the saa711x.ko I2C client driver (which is found + elsewhere in V4L). Note that saa711x.ko used to be known as + saa7115.ko in ivtv. There are two versions of this; one is + selected depending on the particular saa711[5x].ko that is found. + +pvrusb2.h - This header contains compile time tunable parameters + (and at the moment the driver has very little that needs to be + tuned). diff --git a/Documentation/driver-api/media/drivers/pxa_camera.rst b/Documentation/driver-api/media/drivers/pxa_camera.rst new file mode 100644 index 0000000000..46919919ba --- /dev/null +++ b/Documentation/driver-api/media/drivers/pxa_camera.rst @@ -0,0 +1,194 @@ +.. SPDX-License-Identifier: GPL-2.0 + +PXA-Camera Host Driver +====================== + +Author: Robert Jarzmik <robert.jarzmik@free.fr> + +Constraints +----------- + +a) Image size for YUV422P format + All YUV422P images are enforced to have width x height % 16 = 0. + This is due to DMA constraints, which transfers only planes of 8 byte + multiples. + + +Global video workflow +--------------------- + +a) QCI stopped + Initially, the QCI interface is stopped. + When a buffer is queued, start_streaming is called and the QCI starts. + +b) QCI started + More buffers can be queued while the QCI is started without halting the + capture. The new buffers are "appended" at the tail of the DMA chain, and + smoothly captured one frame after the other. + + Once a buffer is filled in the QCI interface, it is marked as "DONE" and + removed from the active buffers list. It can be then requeud or dequeued by + userland application. + + Once the last buffer is filled in, the QCI interface stops. + +c) Capture global finite state machine schema + +.. code-block:: none + + +----+ +---+ +----+ + | DQ | | Q | | DQ | + | v | v | v + +-----------+ +------------------------+ + | STOP | | Wait for capture start | + +-----------+ Q +------------------------+ + +-> | QCI: stop | ------------------> | QCI: run | <------------+ + | | DMA: stop | | DMA: stop | | + | +-----------+ +-----> +------------------------+ | + | / | | + | / +---+ +----+ | | + |capture list empty / | Q | | DQ | | QCI Irq EOF | + | / | v | v v | + | +--------------------+ +----------------------+ | + | | DMA hotlink missed | | Capture running | | + | +--------------------+ +----------------------+ | + | | QCI: run | +-----> | QCI: run | <-+ | + | | DMA: stop | / | DMA: run | | | + | +--------------------+ / +----------------------+ | Other | + | ^ /DMA still | | channels | + | | capture list / running | DMA Irq End | not | + | | not empty / | | finished | + | | / v | yet | + | +----------------------+ +----------------------+ | | + | | Videobuf released | | Channel completed | | | + | +----------------------+ +----------------------+ | | + +-- | QCI: run | | QCI: run | --+ | + | DMA: run | | DMA: run | | + +----------------------+ +----------------------+ | + ^ / | | + | no overrun / | overrun | + | / v | + +--------------------+ / +----------------------+ | + | Frame completed | / | Frame overran | | + +--------------------+ <-----+ +----------------------+ restart frame | + | QCI: run | | QCI: stop | --------------+ + | DMA: run | | DMA: stop | + +--------------------+ +----------------------+ + + Legend: - each box is a FSM state + - each arrow is the condition to transition to another state + - an arrow with a comment is a mandatory transition (no condition) + - arrow "Q" means : a buffer was enqueued + - arrow "DQ" means : a buffer was dequeued + - "QCI: stop" means the QCI interface is not enabled + - "DMA: stop" means all 3 DMA channels are stopped + - "DMA: run" means at least 1 DMA channel is still running + +DMA usage +--------- + +a) DMA flow + - first buffer queued for capture + Once a first buffer is queued for capture, the QCI is started, but data + transfer is not started. On "End Of Frame" interrupt, the irq handler + starts the DMA chain. + - capture of one videobuffer + The DMA chain starts transferring data into videobuffer RAM pages. + When all pages are transferred, the DMA irq is raised on "ENDINTR" status + - finishing one videobuffer + The DMA irq handler marks the videobuffer as "done", and removes it from + the active running queue + Meanwhile, the next videobuffer (if there is one), is transferred by DMA + - finishing the last videobuffer + On the DMA irq of the last videobuffer, the QCI is stopped. + +b) DMA prepared buffer will have this structure + +.. code-block:: none + + +------------+-----+---------------+-----------------+ + | desc-sg[0] | ... | desc-sg[last] | finisher/linker | + +------------+-----+---------------+-----------------+ + +This structure is pointed by dma->sg_cpu. +The descriptors are used as follows: + +- desc-sg[i]: i-th descriptor, transferring the i-th sg + element to the video buffer scatter gather +- finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN +- linker: has ddadr= desc-sg[0] of next video buffer, dcmd=0 + +For the next schema, let's assume d0=desc-sg[0] .. dN=desc-sg[N], +"f" stands for finisher and "l" for linker. +A typical running chain is : + +.. code-block:: none + + Videobuffer 1 Videobuffer 2 + +---------+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+---+ + | | + +----+ + +After the chaining is finished, the chain looks like : + +.. code-block:: none + + Videobuffer 1 Videobuffer 2 Videobuffer 3 + +---------+----+---+ +----+----+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+ + | | | | + +----+ +----+ + new_link + +c) DMA hot chaining timeslice issue + +As DMA chaining is done while DMA _is_ running, the linking may be done +while the DMA jumps from one Videobuffer to another. On the schema, that +would be a problem if the following sequence is encountered : + +- DMA chain is Videobuffer1 + Videobuffer2 +- pxa_videobuf_queue() is called to queue Videobuffer3 +- DMA controller finishes Videobuffer2, and DMA stops + +.. code-block:: none + + => + Videobuffer 1 Videobuffer 2 + +---------+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+-^-+ + | | | + +----+ +-- DMA DDADR loads DDADR_STOP + +- pxa_dma_add_tail_buf() is called, the Videobuffer2 "finisher" is + replaced by a "linker" to Videobuffer3 (creation of new_link) +- pxa_videobuf_queue() finishes +- the DMA irq handler is called, which terminates Videobuffer2 +- Videobuffer3 capture is not scheduled on DMA chain (as it stopped !!!) + +.. code-block:: none + + Videobuffer 1 Videobuffer 2 Videobuffer 3 + +---------+----+---+ +----+----+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+ + | | | | + +----+ +----+ + new_link + DMA DDADR still is DDADR_STOP + +- pxa_camera_check_link_miss() is called + This checks if the DMA is finished and a buffer is still on the + pcdev->capture list. If that's the case, the capture will be restarted, + and Videobuffer3 is scheduled on DMA chain. +- the DMA irq handler finishes + +.. note:: + + If DMA stops just after pxa_camera_check_link_miss() reads DDADR() + value, we have the guarantee that the DMA irq handler will be called back + when the DMA will finish the buffer, and pxa_camera_check_link_miss() will + be called again, to reschedule Videobuffer3. diff --git a/Documentation/driver-api/media/drivers/radiotrack.rst b/Documentation/driver-api/media/drivers/radiotrack.rst new file mode 100644 index 0000000000..a85cb6205d --- /dev/null +++ b/Documentation/driver-api/media/drivers/radiotrack.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The Radiotrack radio driver +=========================== + +Author: Stephen M. Benoit <benoits@servicepro.com> + +Date: Dec 14, 1996 + +ACKNOWLEDGMENTS +---------------- + +This document was made based on 'C' code for Linux from Gideon le Grange +(legrang@active.co.za or legrang@cs.sun.ac.za) in 1994, and elaborations from +Frans Brinkman (brinkman@esd.nl) in 1996. The results reported here are from +experiments that the author performed on his own setup, so your mileage may +vary... I make no guarantees, claims or warranties to the suitability or +validity of this information. No other documentation on the AIMS +Lab (http://www.aimslab.com/) RadioTrack card was made available to the +author. This document is offered in the hopes that it might help users who +want to use the RadioTrack card in an environment other than MS Windows. + +WHY THIS DOCUMENT? +------------------ + +I have a RadioTrack card from back when I ran an MS-Windows platform. After +converting to Linux, I found Gideon le Grange's command-line software for +running the card, and found that it was good! Frans Brinkman made a +comfortable X-windows interface, and added a scanning feature. For hack +value, I wanted to see if the tuner could be tuned beyond the usual FM radio +broadcast band, so I could pick up the audio carriers from North American +broadcast TV channels, situated just below and above the 87.0-109.0 MHz range. +I did not get much success, but I learned about programming ioports under +Linux and gained some insights about the hardware design used for the card. + +So, without further delay, here are the details. + + +PHYSICAL DESCRIPTION +-------------------- + +The RadioTrack card is an ISA 8-bit FM radio card. The radio frequency (RF) +input is simply an antenna lead, and the output is a power audio signal +available through a miniature phone plug. Its RF frequencies of operation are +more or less limited from 87.0 to 109.0 MHz (the commercial FM broadcast +band). Although the registers can be programmed to request frequencies beyond +these limits, experiments did not give promising results. The variable +frequency oscillator (VFO) that demodulates the intermediate frequency (IF) +signal probably has a small range of useful frequencies, and wraps around or +gets clipped beyond the limits mentioned above. + + +CONTROLLING THE CARD WITH IOPORT +-------------------------------- + +The RadioTrack (base) ioport is configurable for 0x30c or 0x20c. Only one +ioport seems to be involved. The ioport decoding circuitry must be pretty +simple, as individual ioport bits are directly matched to specific functions +(or blocks) of the radio card. This way, many functions can be changed in +parallel with one write to the ioport. The only feedback available through +the ioports appears to be the "Stereo Detect" bit. + +The bits of the ioport are arranged as follows: + +.. code-block:: none + + MSb LSb + +------+------+------+--------+--------+-------+---------+--------+ + | VolA | VolB | ???? | Stereo | Radio | TuneA | TuneB | Tune | + | (+) | (-) | | Detect | Audio | (bit) | (latch) | Update | + | | | | Enable | Enable | | | Enable | + +------+------+------+--------+--------+-------+---------+--------+ + + +==== ==== ================================= +VolA VolB Description +==== ==== ================================= +0 0 audio mute +0 1 volume + (some delay required) +1 0 volume - (some delay required) +1 1 stay at present volume +==== ==== ================================= + +==================== =========== +Stereo Detect Enable Description +==================== =========== +0 No Detect +1 Detect +==================== =========== + +Results available by reading ioport >60 msec after last port write. + + 0xff ==> no stereo detected, 0xfd ==> stereo detected. + +============================= ============================= +Radio to Audio (path) Enable Description +============================= ============================= +0 Disable path (silence) +1 Enable path (audio produced) +============================= ============================= + +===== ===== ================== +TuneA TuneB Description +===== ===== ================== +0 0 "zero" bit phase 1 +0 1 "zero" bit phase 2 +1 0 "one" bit phase 1 +1 1 "one" bit phase 2 +===== ===== ================== + + +24-bit code, where bits = (freq*40) + 10486188. +The Most Significant 11 bits must be 1010 xxxx 0x0 to be valid. +The bits are shifted in LSb first. + +================== =========================== +Tune Update Enable Description +================== =========================== +0 Tuner held constant +1 Tuner updating in progress +================== =========================== + + +PROGRAMMING EXAMPLES +-------------------- + +.. code-block:: none + + Default: BASE <-- 0xc8 (current volume, no stereo detect, + radio enable, tuner adjust disable) + + Card Off: BASE <-- 0x00 (audio mute, no stereo detect, + radio disable, tuner adjust disable) + + Card On: BASE <-- 0x00 (see "Card Off", clears any unfinished business) + BASE <-- 0xc8 (see "Default") + + Volume Down: BASE <-- 0x48 (volume down, no stereo detect, + radio enable, tuner adjust disable) + wait 10 msec + BASE <-- 0xc8 (see "Default") + + Volume Up: BASE <-- 0x88 (volume up, no stereo detect, + radio enable, tuner adjust disable) + wait 10 msec + BASE <-- 0xc8 (see "Default") + + Check Stereo: BASE <-- 0xd8 (current volume, stereo detect, + radio enable, tuner adjust disable) + wait 100 msec + x <-- BASE (read ioport) + BASE <-- 0xc8 (see "Default") + + x=0xff ==> "not stereo", x=0xfd ==> "stereo detected" + + Set Frequency: code = (freq*40) + 10486188 + foreach of the 24 bits in code, + (from Least to Most Significant): + to write a "zero" bit, + BASE <-- 0x01 (audio mute, no stereo detect, radio + disable, "zero" bit phase 1, tuner adjust) + BASE <-- 0x03 (audio mute, no stereo detect, radio + disable, "zero" bit phase 2, tuner adjust) + to write a "one" bit, + BASE <-- 0x05 (audio mute, no stereo detect, radio + disable, "one" bit phase 1, tuner adjust) + BASE <-- 0x07 (audio mute, no stereo detect, radio + disable, "one" bit phase 2, tuner adjust) diff --git a/Documentation/driver-api/media/drivers/rkisp1.rst b/Documentation/driver-api/media/drivers/rkisp1.rst new file mode 100644 index 0000000000..ea336958a3 --- /dev/null +++ b/Documentation/driver-api/media/drivers/rkisp1.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The Rockchip Image Signal Processor Driver (rkisp1) +=================================================== + +Versions and their differences +------------------------------ + +The rkisp1 block underwent some changes between SoC implementations. +The vendor designates them as: + +- V10: used at least in rk3288 and rk3399 +- V11: declared in the original vendor code, but not used +- V12: used at least in rk3326 and px30 +- V13: used at least in rk1808 +- V20: used in rk3568 and beyond + +Right now the kernel supports rkisp1 implementations based +on V10 and V12 variants. V11 does not seem to be actually used +and V13 will need some more additions but isn't researched yet, +especially as it seems to be limited to the rk1808 which hasn't +reached much market spread. + +V20 on the other hand will probably be used in future SoCs and +has seen really big changes in the vendor kernel, so will need +quite a bit of research. + +Changes from V10 to V12 +----------------------- + +- V12 supports a new CSI-host implementation but can still + also use the same implementation from V10 +- The module for lens shading correction got changed + from 12bit to 13bit width +- The AWB and AEC modules got replaced to support finer + grained data collection + +Changes from V12 to V13 +----------------------- + +The list for V13 is incomplete and needs further investigation. + +- V13 does not support the old CSI-host implementation anymore diff --git a/Documentation/driver-api/media/drivers/saa7134-devel.rst b/Documentation/driver-api/media/drivers/saa7134-devel.rst new file mode 100644 index 0000000000..167fd729bc --- /dev/null +++ b/Documentation/driver-api/media/drivers/saa7134-devel.rst @@ -0,0 +1,67 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The saa7134 driver +================== + +Author Gerd Hoffmann + + +Card Variations: +---------------- + +Cards can use either of these two crystals (xtal): + +- 32.11 MHz -> .audio_clock=0x187de7 +- 24.576MHz -> .audio_clock=0x200000 (xtal * .audio_clock = 51539600) + +Some details about 30/34/35: + +- saa7130 - low-price chip, doesn't have mute, that is why all those + cards should have .mute field defined in their tuner structure. + +- saa7134 - usual chip + +- saa7133/35 - saa7135 is probably a marketing decision, since all those + chips identifies itself as 33 on pci. + +LifeView GPIOs +-------------- + +This section was authored by: Peter Missel <peter.missel@onlinehome.de> + +- LifeView FlyTV Platinum FM (LR214WF) + + - GP27 MDT2005 PB4 pin 10 + - GP26 MDT2005 PB3 pin 9 + - GP25 MDT2005 PB2 pin 8 + - GP23 MDT2005 PB1 pin 7 + - GP22 MDT2005 PB0 pin 6 + - GP21 MDT2005 PB5 pin 11 + - GP20 MDT2005 PB6 pin 12 + - GP19 MDT2005 PB7 pin 13 + - nc MDT2005 PA3 pin 2 + - Remote MDT2005 PA2 pin 1 + - GP18 MDT2005 PA1 pin 18 + - nc MDT2005 PA0 pin 17 strap low + - GP17 Strap "GP7"=High + - GP16 Strap "GP6"=High + + - 0=Radio 1=TV + - Drives SA630D ENCH1 and HEF4052 A1 pinsto do FM radio through + SIF input + + - GP15 nc + - GP14 nc + - GP13 nc + - GP12 Strap "GP5" = High + - GP11 Strap "GP4" = High + - GP10 Strap "GP3" = High + - GP09 Strap "GP2" = Low + - GP08 Strap "GP1" = Low + - GP07.00 nc + +Credits +------- + +andrew.stevens@philips.com + werner.leeb@philips.com for providing +saa7134 hardware specs and sample board. diff --git a/Documentation/driver-api/media/drivers/sh_mobile_ceu_camera.rst b/Documentation/driver-api/media/drivers/sh_mobile_ceu_camera.rst new file mode 100644 index 0000000000..822fcb8368 --- /dev/null +++ b/Documentation/driver-api/media/drivers/sh_mobile_ceu_camera.rst @@ -0,0 +1,142 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Cropping and Scaling algorithm, used in the sh_mobile_ceu_camera driver +======================================================================= + +Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de> + +Terminology +----------- + +sensor scales: horizontal and vertical scales, configured by the sensor driver +host scales: -"- host driver +combined scales: sensor_scale * host_scale + + +Generic scaling / cropping scheme +--------------------------------- + +.. code-block:: none + + -1-- + | + -2-- -\ + | --\ + | --\ + +-5-- . -- -3-- -\ + | `... -\ + | `... -4-- . - -7.. + | `. + | `. .6-- + | + | . .6'- + | .´ + | ... -4'- .´ + | ...´ - -7'. + +-5'- .´ -/ + | -- -3'- -/ + | --/ + | --/ + -2'- -/ + | + | + -1'- + +In the above chart minuses and slashes represent "real" data amounts, points and +accents represent "useful" data, basically, CEU scaled and cropped output, +mapped back onto the client's source plane. + +Such a configuration can be produced by user requests: + +S_CROP(left / top = (5) - (1), width / height = (5') - (5)) +S_FMT(width / height = (6') - (6)) + +Here: + +(1) to (1') - whole max width or height +(1) to (2) - sensor cropped left or top +(2) to (2') - sensor cropped width or height +(3) to (3') - sensor scale +(3) to (4) - CEU cropped left or top +(4) to (4') - CEU cropped width or height +(5) to (5') - reverse sensor scale applied to CEU cropped width or height +(2) to (5) - reverse sensor scale applied to CEU cropped left or top +(6) to (6') - CEU scale - user window + + +S_FMT +----- + +Do not touch input rectangle - it is already optimal. + +1. Calculate current sensor scales: + + scale_s = ((2') - (2)) / ((3') - (3)) + +2. Calculate "effective" input crop (sensor subwindow) - CEU crop scaled back at +current sensor scales onto input window - this is user S_CROP: + + width_u = (5') - (5) = ((4') - (4)) * scale_s + +3. Calculate new combined scales from "effective" input window to requested user +window: + + scale_comb = width_u / ((6') - (6)) + +4. Calculate sensor output window by applying combined scales to real input +window: + + width_s_out = ((7') - (7)) = ((2') - (2)) / scale_comb + +5. Apply iterative sensor S_FMT for sensor output window. + + subdev->video_ops->s_fmt(.width = width_s_out) + +6. Retrieve sensor output window (g_fmt) + +7. Calculate new sensor scales: + + scale_s_new = ((3')_new - (3)_new) / ((2') - (2)) + +8. Calculate new CEU crop - apply sensor scales to previously calculated +"effective" crop: + + width_ceu = (4')_new - (4)_new = width_u / scale_s_new + left_ceu = (4)_new - (3)_new = ((5) - (2)) / scale_s_new + +9. Use CEU cropping to crop to the new window: + + ceu_crop(.width = width_ceu, .left = left_ceu) + +10. Use CEU scaling to scale to the requested user window: + + scale_ceu = width_ceu / width + + +S_CROP +------ + +The :ref:`V4L2 crop API <crop-scale>` says: + +"...specification does not define an origin or units. However by convention +drivers should horizontally count unscaled samples relative to 0H." + +We choose to follow the advise and interpret cropping units as client input +pixels. + +Cropping is performed in the following 6 steps: + +1. Request exactly user rectangle from the sensor. + +2. If smaller - iterate until a larger one is obtained. Result: sensor cropped + to 2 : 2', target crop 5 : 5', current output format 6' - 6. + +3. In the previous step the sensor has tried to preserve its output frame as + good as possible, but it could have changed. Retrieve it again. + +4. Sensor scaled to 3 : 3'. Sensor's scale is (2' - 2) / (3' - 3). Calculate + intermediate window: 4' - 4 = (5' - 5) * (3' - 3) / (2' - 2) + +5. Calculate and apply host scale = (6' - 6) / (4' - 4) + +6. Calculate and apply host crop: 6 - 7 = (5 - 2) * (6' - 6) / (5' - 5) diff --git a/Documentation/driver-api/media/drivers/tuners.rst b/Documentation/driver-api/media/drivers/tuners.rst new file mode 100644 index 0000000000..d7924141c5 --- /dev/null +++ b/Documentation/driver-api/media/drivers/tuners.rst @@ -0,0 +1,133 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Tuner drivers +============= + +Simple tuner Programming +------------------------ + +There are some flavors of Tuner programming APIs. +These differ mainly by the bandswitch byte. + +- L= LG_API (VHF_LO=0x01, VHF_HI=0x02, UHF=0x08, radio=0x04) +- P= PHILIPS_API (VHF_LO=0xA0, VHF_HI=0x90, UHF=0x30, radio=0x04) +- T= TEMIC_API (VHF_LO=0x02, VHF_HI=0x04, UHF=0x01) +- A= ALPS_API (VHF_LO=0x14, VHF_HI=0x12, UHF=0x11) +- M= PHILIPS_MK3 (VHF_LO=0x01, VHF_HI=0x02, UHF=0x04, radio=0x19) + +Tuner Manufacturers +------------------- + +- Samsung Tuner identification: (e.g. TCPM9091PD27) + +.. code-block:: none + + TCP [ABCJLMNQ] 90[89][125] [DP] [ACD] 27 [ABCD] + [ABCJLMNQ]: + A= BG+DK + B= BG + C= I+DK + J= NTSC-Japan + L= Secam LL + M= BG+I+DK + N= NTSC + Q= BG+I+DK+LL + [89]: ? + [125]: + 2: No FM + 5: With FM + [DP]: + D= NTSC + P= PAL + [ACD]: + A= F-connector + C= Phono connector + D= Din Jack + [ABCD]: + 3-wire/I2C tuning, 2-band/3-band + +These Tuners are PHILIPS_API compatible. + +Philips Tuner identification: (e.g. FM1216MF) + +.. code-block:: none + + F[IRMQ]12[1345]6{MF|ME|MP} + F[IRMQ]: + FI12x6: Tuner Series + FR12x6: Tuner + Radio IF + FM12x6: Tuner + FM + FQ12x6: special + FMR12x6: special + TD15xx: Digital Tuner ATSC + 12[1345]6: + 1216: PAL BG + 1236: NTSC + 1246: PAL I + 1256: Pal DK + {MF|ME|MP} + MF: BG LL w/ Secam (Multi France) + ME: BG DK I LL (Multi Europe) + MP: BG DK I (Multi PAL) + MR: BG DK M (?) + MG: BG DKI M (?) + MK2 series PHILIPS_API, most tuners are compatible to this one ! + MK3 series introduced in 2002 w/ PHILIPS_MK3_API + +Temic Tuner identification: (.e.g 4006FH5) + +.. code-block:: none + + 4[01][0136][269]F[HYNR]5 + 40x2: Tuner (5V/33V), TEMIC_API. + 40x6: Tuner 5V + 41xx: Tuner compact + 40x9: Tuner+FM compact + [0136] + xx0x: PAL BG + xx1x: Pal DK, Secam LL + xx3x: NTSC + xx6x: PAL I + F[HYNR]5 + FH5: Pal BG + FY5: others + FN5: multistandard + FR5: w/ FM radio + 3X xxxx: order number with specific connector + Note: Only 40x2 series has TEMIC_API, all newer tuners have PHILIPS_API. + +LG Innotek Tuner: + +- TPI8NSR11 : NTSC J/M (TPI8NSR01 w/FM) (P,210/497) +- TPI8PSB11 : PAL B/G (TPI8PSB01 w/FM) (P,170/450) +- TAPC-I701 : PAL I (TAPC-I001 w/FM) (P,170/450) +- TPI8PSB12 : PAL D/K+B/G (TPI8PSB02 w/FM) (P,170/450) +- TAPC-H701P: NTSC_JP (TAPC-H001P w/FM) (L,170/450) +- TAPC-G701P: PAL B/G (TAPC-G001P w/FM) (L,170/450) +- TAPC-W701P: PAL I (TAPC-W001P w/FM) (L,170/450) +- TAPC-Q703P: PAL D/K (TAPC-Q001P w/FM) (L,170/450) +- TAPC-Q704P: PAL D/K+I (L,170/450) +- TAPC-G702P: PAL D/K+B/G (L,170/450) + +- TADC-H002F: NTSC (L,175/410?; 2-B, C-W+11, W+12-69) +- TADC-M201D: PAL D/K+B/G+I (L,143/425) (sound control at I2C address 0xc8) +- TADC-T003F: NTSC Taiwan (L,175/410?; 2-B, C-W+11, W+12-69) + +Suffix: + - P= Standard phono female socket + - D= IEC female socket + - F= F-connector + +Other Tuners: + +- TCL2002MB-1 : PAL BG + DK =TUNER_LG_PAL_NEW_TAPC +- TCL2002MB-1F: PAL BG + DK w/FM =PHILIPS_PAL +- TCL2002MI-2 : PAL I = ?? + +ALPS Tuners: + +- Most are LG_API compatible +- TSCH6 has ALPS_API (TSCH5 ?) +- TSBE1 has extra API 05,02,08 Control_byte=0xCB Source:[#f1]_ + +.. [#f1] conexant100029b-PCI-Decoder-ApplicationNote.pdf diff --git a/Documentation/driver-api/media/drivers/vidtv.rst b/Documentation/driver-api/media/drivers/vidtv.rst new file mode 100644 index 0000000000..54f269f478 --- /dev/null +++ b/Documentation/driver-api/media/drivers/vidtv.rst @@ -0,0 +1,513 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +vidtv: Virtual Digital TV driver +================================ + +Author: Daniel W. S. Almeida <dwlsalmeida@gmail.com>, June 2020. + +Background +---------- + +Vidtv is a virtual DVB driver that aims to serve as a reference for driver +writers by serving as a template. It also validates the existing media DVB +APIs, thus helping userspace application writers. + +Currently, it consists of: + +- A fake tuner driver, which will report a bad signal quality if the chosen + frequency is too far away from a table of valid frequencies for a + particular delivery system. + +- A fake demod driver, which will constantly poll the fake signal quality + returned by the tuner, simulating a device that can lose/reacquire a lock + on the signal depending on the CNR levels. + +- A fake bridge driver, which is the module responsible for modprobing the + fake tuner and demod modules and implementing the demux logic. This module + takes parameters at initialization that will dictate how the simulation + behaves. + +- Code responsible for encoding a valid MPEG Transport Stream, which is then + passed to the bridge driver. This fake stream contains some hardcoded content. + For now, we have a single, audio-only channel containing a single MPEG + Elementary Stream, which in turn contains a SMPTE 302m encoded sine-wave. + Note that this particular encoder was chosen because it is the easiest + way to encode PCM audio data in a MPEG Transport Stream. + +Building vidtv +-------------- +vidtv is a test driver and thus is **not** enabled by default when +compiling the kernel. + +In order to enable compilation of vidtv: + +- Enable **DVB_TEST_DRIVERS**, then +- Enable **DVB_VIDTV** + +When compiled as a module, expect the following .ko files: + +- dvb_vidtv_tuner.ko + +- dvb_vidtv_demod.ko + +- dvb_vidtv_bridge.ko + +Running vidtv +------------- +When compiled as a module, run:: + + modprobe vidtv + +That's it! The bridge driver will initialize the tuner and demod drivers as +part of its own initialization. + +By default, it will accept the following frequencies: + + - 474 MHz for DVB-T/T2/C; + - 11,362 GHz for DVB-S/S2. + +For satellite systems, the driver simulates an universal extended +LNBf, with frequencies at Ku-Band, ranging from 10.7 GHz to 12.75 GHz. + +You can optionally define some command-line arguments to vidtv. + +Command-line arguments to vidtv +------------------------------- +Below is a list of all arguments that can be supplied to vidtv: + +drop_tslock_prob_on_low_snr + Probability of losing the TS lock if the signal quality is bad. + This probability be used by the fake demodulator driver to + eventually return a status of 0 when the signal quality is not + good. + +recover_tslock_prob_on_good_snr: + Probability recovering the TS lock when the signal improves. This + probability be used by the fake demodulator driver to eventually + return a status of 0x1f when/if the signal quality improves. + +mock_power_up_delay_msec + Simulate a power up delay. Default: 0. + +mock_tune_delay_msec + Simulate a tune delay. Default 0. + +vidtv_valid_dvb_t_freqs + Valid DVB-T frequencies to simulate, in Hz. + +vidtv_valid_dvb_c_freqs + Valid DVB-C frequencies to simulate, in Hz. + +vidtv_valid_dvb_s_freqs + Valid DVB-S/S2 frequencies to simulate at Ku-Band, in kHz. + +max_frequency_shift_hz, + Maximum shift in HZ allowed when tuning in a channel. + +si_period_msec + How often to send SI packets. Default: 40ms. + +pcr_period_msec + How often to send PCR packets. Default: 40ms. + +mux_rate_kbytes_sec + Attempt to maintain this bit rate by inserting TS null packets, if + necessary. Default: 4096. + +pcr_pid, + PCR PID for all channels. Default: 0x200. + +mux_buf_sz_pkts, + Size for the mux buffer in multiples of 188 bytes. + +vidtv internal structure +------------------------ +The kernel modules are split in the following way: + +vidtv_tuner.[ch] + Implements a fake tuner DVB driver. + +vidtv_demod.[ch] + Implements a fake demodulator DVB driver. + +vidtv_bridge.[ch] + Implements a bridge driver. + +The MPEG related code is split in the following way: + +vidtv_ts.[ch] + Code to work with MPEG TS packets, such as TS headers, adaptation + fields, PCR packets and NULL packets. + +vidtv_psi.[ch] + This is the PSI generator. PSI packets contain general information + about a MPEG Transport Stream. A PSI generator is needed so + userspace apps can retrieve information about the Transport Stream + and eventually tune into a (dummy) channel. + + Because the generator is implemented in a separate file, it can be + reused elsewhere in the media subsystem. + + Currently vidtv supports working with 5 PSI tables: PAT, PMT, + SDT, NIT and EIT. + + The specification for PAT and PMT can be found in *ISO 13818-1: + Systems*, while the specification for the SDT, NIT, EIT can be found in *ETSI + EN 300 468: Specification for Service Information (SI) in DVB + systems*. + + It isn't strictly necessary, but using a real TS file helps when + debugging PSI tables. Vidtv currently tries to replicate the PSI + structure found in this file: `TS1Globo.ts + <https://tsduck.io/streams/brazil-isdb-tb/TS1globo.ts>`_. + + A good way to visualize the structure of streams is by using + `DVBInspector <https://sourceforge.net/projects/dvbinspector/>`_. + +vidtv_pes.[ch] + Implements the PES logic to convert encoder data into MPEG TS + packets. These can then be fed into a TS multiplexer and eventually + into userspace. + +vidtv_encoder.h + An interface for vidtv encoders. New encoders can be added to this + driver by implementing the calls in this file. + +vidtv_s302m.[ch] + Implements a S302M encoder to make it possible to insert PCM audio + data in the generated MPEG Transport Stream. The relevant + specification is available online as *SMPTE 302M-2007: Television - + Mapping of AES3 Data into MPEG-2 Transport Stream*. + + + The resulting MPEG Elementary Stream is conveyed in a private + stream with a S302M registration descriptor attached. + + This shall enable passing an audio signal into userspace so it can + be decoded and played by media software. The corresponding decoder + in ffmpeg is located in 'libavcodec/s302m.c' and is experimental. + +vidtv_channel.[ch] + Implements a 'channel' abstraction. + + When vidtv boots, it will create some hardcoded channels: + + #. Their services will be concatenated to populate the SDT. + + #. Their programs will be concatenated to populate the PAT + + #. Their events will be concatenated to populate the EIT + + #. For each program in the PAT, a PMT section will be created + + #. The PMT section for a channel will be assigned its streams. + + #. Every stream will have its corresponding encoder polled in a + loop to produce TS packets. + These packets may be interleaved by the muxer and then delivered + to the bridge. + +vidtv_mux.[ch] + Implements a MPEG TS mux, loosely based on the ffmpeg + implementation in "libavcodec/mpegtsenc.c" + + The muxer runs a loop which is responsible for: + + #. Keeping track of the amount of time elapsed since the last + iteration. + + #. Polling encoders in order to fetch 'elapsed_time' worth of data. + + #. Inserting PSI and/or PCR packets, if needed. + + #. Padding the resulting stream with NULL packets if + necessary in order to maintain the chosen bit rate. + + #. Delivering the resulting TS packets to the bridge + driver so it can pass them to the demux. + +Testing vidtv with v4l-utils +---------------------------- + +Using the tools in v4l-utils is a great way to test and inspect the output of +vidtv. It is hosted here: `v4l-utils Documentation +<https://linuxtv.org/wiki/index.php/V4l-utils>`_. + +From its webpage:: + + The v4l-utils are a series of packages for handling media devices. + + It is hosted at http://git.linuxtv.org/v4l-utils.git, and packaged + on most distributions. + + It provides a series of libraries and utilities to be used to + control several aspect of the media boards. + + +Start by installing v4l-utils and then modprobing vidtv:: + + modprobe dvb_vidtv_bridge + +If the driver is OK, it should load and its probing code will run. This will +pull in the tuner and demod drivers. + +Using dvb-fe-tool +~~~~~~~~~~~~~~~~~ + +The first step to check whether the demod loaded successfully is to run:: + + $ dvb-fe-tool + Device Dummy demod for DVB-T/T2/C/S/S2 (/dev/dvb/adapter0/frontend0) capabilities: + CAN_FEC_1_2 + CAN_FEC_2_3 + CAN_FEC_3_4 + CAN_FEC_4_5 + CAN_FEC_5_6 + CAN_FEC_6_7 + CAN_FEC_7_8 + CAN_FEC_8_9 + CAN_FEC_AUTO + CAN_GUARD_INTERVAL_AUTO + CAN_HIERARCHY_AUTO + CAN_INVERSION_AUTO + CAN_QAM_16 + CAN_QAM_32 + CAN_QAM_64 + CAN_QAM_128 + CAN_QAM_256 + CAN_QAM_AUTO + CAN_QPSK + CAN_TRANSMISSION_MODE_AUTO + DVB API Version 5.11, Current v5 delivery system: DVBC/ANNEX_A + Supported delivery systems: + DVBT + DVBT2 + [DVBC/ANNEX_A] + DVBS + DVBS2 + Frequency range for the current standard: + From: 51.0 MHz + To: 2.15 GHz + Step: 62.5 kHz + Tolerance: 29.5 MHz + Symbol rate ranges for the current standard: + From: 1.00 MBauds + To: 45.0 MBauds + +This should return what is currently set up at the demod struct, i.e.:: + + static const struct dvb_frontend_ops vidtv_demod_ops = { + .delsys = { + SYS_DVBT, + SYS_DVBT2, + SYS_DVBC_ANNEX_A, + SYS_DVBS, + SYS_DVBS2, + }, + + .info = { + .name = "Dummy demod for DVB-T/T2/C/S/S2", + .frequency_min_hz = 51 * MHz, + .frequency_max_hz = 2150 * MHz, + .frequency_stepsize_hz = 62500, + .frequency_tolerance_hz = 29500 * kHz, + .symbol_rate_min = 1000000, + .symbol_rate_max = 45000000, + + .caps = FE_CAN_FEC_1_2 | + FE_CAN_FEC_2_3 | + FE_CAN_FEC_3_4 | + FE_CAN_FEC_4_5 | + FE_CAN_FEC_5_6 | + FE_CAN_FEC_6_7 | + FE_CAN_FEC_7_8 | + FE_CAN_FEC_8_9 | + FE_CAN_QAM_16 | + FE_CAN_QAM_64 | + FE_CAN_QAM_32 | + FE_CAN_QAM_128 | + FE_CAN_QAM_256 | + FE_CAN_QAM_AUTO | + FE_CAN_QPSK | + FE_CAN_FEC_AUTO | + FE_CAN_INVERSION_AUTO | + FE_CAN_TRANSMISSION_MODE_AUTO | + FE_CAN_GUARD_INTERVAL_AUTO | + FE_CAN_HIERARCHY_AUTO, + } + + .... + +For more information on dvb-fe-tools check its online documentation here: +`dvb-fe-tool Documentation +<https://www.linuxtv.org/wiki/index.php/Dvb-fe-tool>`_. + +Using dvb-scan +~~~~~~~~~~~~~~ + +In order to tune into a channel and read the PSI tables, we can use dvb-scan. + +For this, one should provide a configuration file known as a 'scan file', +here's an example:: + + [Channel] + FREQUENCY = 474000000 + MODULATION = QAM/AUTO + SYMBOL_RATE = 6940000 + INNER_FEC = AUTO + DELIVERY_SYSTEM = DVBC/ANNEX_A + +.. note:: + The parameters depend on the video standard you're testing. + +.. note:: + Vidtv is a fake driver and does not validate much of the information + in the scan file. Just specifying 'FREQUENCY' and 'DELIVERY_SYSTEM' + should be enough for DVB-T/DVB-T2. For DVB-S/DVB-C however, you + should also provide 'SYMBOL_RATE'. + +You can browse scan tables online here: `dvb-scan-tables +<https://git.linuxtv.org/dtv-scan-tables.git>`_. + +Assuming this channel is named 'channel.conf', you can then run:: + + $ dvbv5-scan channel.conf + dvbv5-scan ~/vidtv.conf + ERROR command BANDWIDTH_HZ (5) not found during retrieve + Cannot calc frequency shift. Either bandwidth/symbol-rate is unavailable (yet). + Scanning frequency #1 330000000 + (0x00) Signal= -68.00dBm + Scanning frequency #2 474000000 + Lock (0x1f) Signal= -34.45dBm C/N= 33.74dB UCB= 0 + Service Beethoven, provider LinuxTV.org: digital television + +For more information on dvb-scan, check its documentation online here: +`dvb-scan Documentation <https://www.linuxtv.org/wiki/index.php/Dvbscan>`_. + +Using dvb-zap +~~~~~~~~~~~~~ + +dvbv5-zap is a command line tool that can be used to record MPEG-TS to disk. The +typical use is to tune into a channel and put it into record mode. The example +below - which is taken from the documentation - illustrates that\ [1]_:: + + $ dvbv5-zap -c dvb_channel.conf "beethoven" -o music.ts -P -t 10 + using demux 'dvb0.demux0' + reading channels from file 'dvb_channel.conf' + tuning to 474000000 Hz + pass all PID's to TS + dvb_set_pesfilter 8192 + dvb_dev_set_bufsize: buffer set to 6160384 + Lock (0x1f) Quality= Good Signal= -34.66dBm C/N= 33.41dB UCB= 0 postBER= 0 preBER= 1.05x10^-3 PER= 0 + Lock (0x1f) Quality= Good Signal= -34.57dBm C/N= 33.46dB UCB= 0 postBER= 0 preBER= 1.05x10^-3 PER= 0 + Record to file 'music.ts' started + received 24587768 bytes (2401 Kbytes/sec) + Lock (0x1f) Quality= Good Signal= -34.42dBm C/N= 33.89dB UCB= 0 postBER= 0 preBER= 2.44x10^-3 PER= 0 + +.. [1] In this example, it records 10 seconds with all program ID's stored + at the music.ts file. + + +The channel can be watched by playing the contents of the stream with some +player that recognizes the MPEG-TS format, such as ``mplayer`` or ``vlc``. + +By playing the contents of the stream one can visually inspect the workings of +vidtv, e.g., to play a recorded TS file with:: + + $ mplayer music.ts + +or, alternatively, running this command on one terminal:: + + $ dvbv5-zap -c dvb_channel.conf "beethoven" -P -r & + +And, on a second terminal, playing the contents from DVR interface with:: + + $ mplayer /dev/dvb/adapter0/dvr0 + +For more information on dvb-zap check its online documentation here: +`dvb-zap Documentation +<https://www.linuxtv.org/wiki/index.php/Dvbv5-zap>`_. +See also: `zap <https://www.linuxtv.org/wiki/index.php/Zap>`_. + + +What can still be improved in vidtv +----------------------------------- + +Add *debugfs* integration +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Although frontend drivers provide DVBv5 statistics via the .read_status +call, a nice addition would be to make additional statistics available to +userspace via debugfs, which is a simple-to-use, RAM-based filesystem +specifically designed for debug purposes. + +The logic for this would be implemented on a separate file so as not to +pollute the frontend driver. These statistics are driver-specific and can +be useful during tests. + +The Siano driver is one example of a driver using +debugfs to convey driver-specific statistics to userspace and it can be +used as a reference. + +This should be further enabled and disabled via a Kconfig +option for convenience. + +Add a way to test video +~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, vidtv can only encode PCM audio. It would be great to implement +a barebones version of MPEG-2 video encoding so we can also test video. The +first place to look into is *ISO 13818-2: Information technology — Generic +coding of moving pictures and associated audio information — Part 2: Video*, +which covers the encoding of compressed video in MPEG Transport Streams. + +This might optionally use the Video4Linux2 Test Pattern Generator, v4l2-tpg, +which resides at:: + + drivers/media/common/v4l2-tpg/ + + +Add white noise simulation +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The vidtv tuner already has code to identify whether the chosen frequency +is too far away from a table of valid frequencies. For now, this means that +the demodulator can eventually lose the lock on the signal, since the tuner will +report a bad signal quality. + +A nice addition is to simulate some noise when the signal quality is bad by: + +- Randomly dropping some TS packets. This will trigger a continuity error if the + continuity counter is updated but the packet is not passed on to the demux. + +- Updating the error statistics accordingly (e.g. BER, etc). + +- Simulating some noise in the encoded data. + +Functions and structs used within vidtv +--------------------------------------- + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_bridge.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_channel.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_demod.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_encoder.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_mux.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_pes.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_psi.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_s302m.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_ts.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_tuner.h + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_common.c + +.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_tuner.c diff --git a/Documentation/driver-api/media/drivers/vimc-devel.rst b/Documentation/driver-api/media/drivers/vimc-devel.rst new file mode 100644 index 0000000000..9e984f914b --- /dev/null +++ b/Documentation/driver-api/media/drivers/vimc-devel.rst @@ -0,0 +1,15 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The Virtual Media Controller Driver (vimc) +========================================== + +Source code documentation +------------------------- + +vimc-streamer +~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/media/test-drivers/vimc/vimc-streamer.h + :internal: + +.. kernel-doc:: drivers/media/test-drivers/vimc/vimc-streamer.c diff --git a/Documentation/driver-api/media/drivers/zoran.rst b/Documentation/driver-api/media/drivers/zoran.rst new file mode 100644 index 0000000000..b205e10c31 --- /dev/null +++ b/Documentation/driver-api/media/drivers/zoran.rst @@ -0,0 +1,575 @@ +.. SPDX-License-Identifier: GPL-2.0 + +The Zoran driver +================ + +unified zoran driver (zr360x7, zoran, buz, dc10(+), dc30(+), lml33) + +website: http://mjpeg.sourceforge.net/driver-zoran/ + + +Frequently Asked Questions +-------------------------- + +What cards are supported +------------------------ + +Iomega Buz, Linux Media Labs LML33/LML33R10, Pinnacle/Miro +DC10/DC10+/DC30/DC30+ and related boards (available under various names). + +Iomega Buz +~~~~~~~~~~ + +* Zoran zr36067 PCI controller +* Zoran zr36060 MJPEG codec +* Philips saa7111 TV decoder +* Philips saa7185 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, saa7111, saa7185, zr36060, zr36067 + +Inputs/outputs: Composite and S-video + +Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) + +Card number: 7 + +AverMedia 6 Eyes AVS6EYES +~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Zoran zr36067 PCI controller +* Zoran zr36060 MJPEG codec +* Samsung ks0127 TV decoder +* Conexant bt866 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, ks0127, bt866, zr36060, zr36067 + +Inputs/outputs: + Six physical inputs. 1-6 are composite, + 1-2, 3-4, 5-6 doubles as S-video, + 1-3 triples as component. + One composite output. + +Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) + +Card number: 8 + +.. note:: + + Not autodetected, card=8 is necessary. + +Linux Media Labs LML33 +~~~~~~~~~~~~~~~~~~~~~~ + +* Zoran zr36067 PCI controller +* Zoran zr36060 MJPEG codec +* Brooktree bt819 TV decoder +* Brooktree bt856 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, bt819, bt856, zr36060, zr36067 + +Inputs/outputs: Composite and S-video + +Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) + +Card number: 5 + +Linux Media Labs LML33R10 +~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Zoran zr36067 PCI controller +* Zoran zr36060 MJPEG codec +* Philips saa7114 TV decoder +* Analog Devices adv7170 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, saa7114, adv7170, zr36060, zr36067 + +Inputs/outputs: Composite and S-video + +Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) + +Card number: 6 + +Pinnacle/Miro DC10(new) +~~~~~~~~~~~~~~~~~~~~~~~ + +* Zoran zr36057 PCI controller +* Zoran zr36060 MJPEG codec +* Philips saa7110a TV decoder +* Analog Devices adv7176 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, saa7110, adv7175, zr36060, zr36067 + +Inputs/outputs: Composite, S-video and Internal + +Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) + +Card number: 1 + +Pinnacle/Miro DC10+ +~~~~~~~~~~~~~~~~~~~ + +* Zoran zr36067 PCI controller +* Zoran zr36060 MJPEG codec +* Philips saa7110a TV decoder +* Analog Devices adv7176 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, saa7110, adv7175, zr36060, zr36067 + +Inputs/outputs: Composite, S-video and Internal + +Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) + +Card number: 2 + +Pinnacle/Miro DC10(old) +~~~~~~~~~~~~~~~~~~~~~~~ + +* Zoran zr36057 PCI controller +* Zoran zr36050 MJPEG codec +* Zoran zr36016 Video Front End or Fuji md0211 Video Front End (clone?) +* Micronas vpx3220a TV decoder +* mse3000 TV encoder or Analog Devices adv7176 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, vpx3220, mse3000/adv7175, zr36050, zr36016, zr36067 + +Inputs/outputs: Composite, S-video and Internal + +Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) + +Card number: 0 + +Pinnacle/Miro DC30 +~~~~~~~~~~~~~~~~~~ + +* Zoran zr36057 PCI controller +* Zoran zr36050 MJPEG codec +* Zoran zr36016 Video Front End +* Micronas vpx3225d/vpx3220a/vpx3216b TV decoder +* Analog Devices adv7176 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36016, zr36067 + +Inputs/outputs: Composite, S-video and Internal + +Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) + +Card number: 3 + +Pinnacle/Miro DC30+ +~~~~~~~~~~~~~~~~~~~ + +* Zoran zr36067 PCI controller +* Zoran zr36050 MJPEG codec +* Zoran zr36016 Video Front End +* Micronas vpx3225d/vpx3220a/vpx3216b TV decoder +* Analog Devices adv7176 TV encoder + +Drivers to use: videodev, i2c-core, i2c-algo-bit, +videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36015, zr36067 + +Inputs/outputs: Composite, S-video and Internal + +Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) + +Card number: 4 + +.. note:: + + #) No module for the mse3000 is available yet + #) No module for the vpx3224 is available yet + +1.1 What the TV decoder can do an what not +------------------------------------------ + +The best know TV standards are NTSC/PAL/SECAM. but for decoding a frame that +information is not enough. There are several formats of the TV standards. +And not every TV decoder is able to handle every format. Also the every +combination is supported by the driver. There are currently 11 different +tv broadcast formats all aver the world. + +The CCIR defines parameters needed for broadcasting the signal. +The CCIR has defined different standards: A,B,D,E,F,G,D,H,I,K,K1,L,M,N,... +The CCIR says not much about the colorsystem used !!! +And talking about a colorsystem says not to much about how it is broadcast. + +The CCIR standards A,E,F are not used any more. + +When you speak about NTSC, you usually mean the standard: CCIR - M using +the NTSC colorsystem which is used in the USA, Japan, Mexico, Canada +and a few others. + +When you talk about PAL, you usually mean: CCIR - B/G using the PAL +colorsystem which is used in many Countries. + +When you talk about SECAM, you mean: CCIR - L using the SECAM Colorsystem +which is used in France, and a few others. + +There the other version of SECAM, CCIR - D/K is used in Bulgaria, China, +Slovakai, Hungary, Korea (Rep.), Poland, Rumania and a others. + +The CCIR - H uses the PAL colorsystem (sometimes SECAM) and is used in +Egypt, Libya, Sri Lanka, Syrain Arab. Rep. + +The CCIR - I uses the PAL colorsystem, and is used in Great Britain, Hong Kong, +Ireland, Nigeria, South Africa. + +The CCIR - N uses the PAL colorsystem and PAL frame size but the NTSC framerate, +and is used in Argentinia, Uruguay, an a few others + +We do not talk about how the audio is broadcast ! + +A rather good sites about the TV standards are: +http://www.sony.jp/support/ +http://info.electronicwerkstatt.de/bereiche/fernsehtechnik/frequenzen_und_normen/Fernsehnormen/ +and http://www.cabl.com/restaurant/channel.html + +Other weird things around: NTSC 4.43 is a modificated NTSC, which is mainly +used in PAL VCR's that are able to play back NTSC. PAL 60 seems to be the same +as NTSC 4.43 . The Datasheets also talk about NTSC 44, It seems as if it would +be the same as NTSC 4.43. +NTSC Combs seems to be a decoder mode where the decoder uses a comb filter +to split coma and luma instead of a Delay line. + +But I did not defiantly find out what NTSC Comb is. + +Philips saa7111 TV decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1997, is used in the BUZ and +- can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC N, NTSC 4.43 and SECAM + +Philips saa7110a TV decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1995, is used in the Pinnacle/Miro DC10(new), DC10+ and +- can handle: PAL B/G, NTSC M and SECAM + +Philips saa7114 TV decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 2000, is used in the LML33R10 and +- can handle: PAL B/G/D/H/I/N, PAL N, PAL M, NTSC M, NTSC 4.43 and SECAM + +Brooktree bt819 TV decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1996, and is used in the LML33 and +- can handle: PAL B/D/G/H/I, NTSC M + +Micronas vpx3220a TV decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1996, is used in the DC30 and DC30+ and +- can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC 44, PAL 60, SECAM,NTSC Comb + +Samsung ks0127 TV decoder +~~~~~~~~~~~~~~~~~~~~~~~~~ + +- is used in the AVS6EYES card and +- can handle: NTSC-M/N/44, PAL-M/N/B/G/H/I/D/K/L and SECAM + + +What the TV encoder can do an what not +-------------------------------------- + +The TV encoder is doing the "same" as the decoder, but in the other direction. +You feed them digital data and the generate a Composite or SVHS signal. +For information about the colorsystems and TV norm take a look in the +TV decoder section. + +Philips saa7185 TV Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1996, is used in the BUZ +- can generate: PAL B/G, NTSC M + +Brooktree bt856 TV Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1994, is used in the LML33 +- can generate: PAL B/D/G/H/I/N, PAL M, NTSC M, PAL-N (Argentina) + +Analog Devices adv7170 TV Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 2000, is used in the LML300R10 +- can generate: PAL B/D/G/H/I/N, PAL M, NTSC M, PAL 60 + +Analog Devices adv7175 TV Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1996, is used in the DC10, DC10+, DC10 old, DC30, DC30+ +- can generate: PAL B/D/G/H/I/N, PAL M, NTSC M + +ITT mse3000 TV encoder +~~~~~~~~~~~~~~~~~~~~~~ + +- was introduced in 1991, is used in the DC10 old +- can generate: PAL , NTSC , SECAM + +Conexant bt866 TV encoder +~~~~~~~~~~~~~~~~~~~~~~~~~ + +- is used in AVS6EYES, and +- can generate: NTSC/PAL, PAL-M, PAL-N + +The adv717x, should be able to produce PAL N. But you find nothing PAL N +specific in the registers. Seem that you have to reuse a other standard +to generate PAL N, maybe it would work if you use the PAL M settings. + +How do I get this damn thing to work +------------------------------------ + +Load zr36067.o. If it can't autodetect your card, use the card=X insmod +option with X being the card number as given in the previous section. +To have more than one card, use card=X1[,X2[,X3,[X4[..]]]] + +To automate this, add the following to your /etc/modprobe.d/zoran.conf: + +options zr36067 card=X1[,X2[,X3[,X4[..]]]] +alias char-major-81-0 zr36067 + +One thing to keep in mind is that this doesn't load zr36067.o itself yet. It +just automates loading. If you start using xawtv, the device won't load on +some systems, since you're trying to load modules as a user, which is not +allowed ("permission denied"). A quick workaround is to add 'Load "v4l"' to +XF86Config-4 when you use X by default, or to run 'v4l-conf -c <device>' in +one of your startup scripts (normally rc.local) if you don't use X. Both +make sure that the modules are loaded on startup, under the root account. + +What mainboard should I use (or why doesn't my card work) +--------------------------------------------------------- + + +<insert lousy disclaimer here>. In short: good=SiS/Intel, bad=VIA. + +Experience tells us that people with a Buz, on average, have more problems +than users with a DC10+/LML33. Also, it tells us that people owning a VIA- +based mainboard (ktXXX, MVP3) have more problems than users with a mainboard +based on a different chipset. Here's some notes from Andrew Stevens: + +Here's my experience of using LML33 and Buz on various motherboards: + +- VIA MVP3 + - Forget it. Pointless. Doesn't work. +- Intel 430FX (Pentium 200) + - LML33 perfect, Buz tolerable (3 or 4 frames dropped per movie) +- Intel 440BX (early stepping) + - LML33 tolerable. Buz starting to get annoying (6-10 frames/hour) +- Intel 440BX (late stepping) + - Buz tolerable, LML3 almost perfect (occasional single frame drops) +- SiS735 + - LML33 perfect, Buz tolerable. +- VIA KT133(*) + - LML33 starting to get annoying, Buz poor enough that I have up. + +- Both 440BX boards were dual CPU versions. + +Bernhard Praschinger later added: + +- AMD 751 + - Buz perfect-tolerable +- AMD 760 + - Buz perfect-tolerable + +In general, people on the user mailinglist won't give you much of a chance +if you have a VIA-based motherboard. They may be cheap, but sometimes, you'd +rather want to spend some more money on better boards. In general, VIA +mainboard's IDE/PCI performance will also suck badly compared to others. +You'll noticed the DC10+/DC30+ aren't mentioned anywhere in the overview. +Basically, you can assume that if the Buz works, the LML33 will work too. If +the LML33 works, the DC10+/DC30+ will work too. They're most tolerant to +different mainboard chipsets from all of the supported cards. + +If you experience timeouts during capture, buy a better mainboard or lower +the quality/buffersize during capture (see 'Concerning buffer sizes, quality, +output size etc.'). If it hangs, there's little we can do as of now. Check +your IRQs and make sure the card has its own interrupts. + +Programming interface +--------------------- + +This driver conforms to video4linux2. Support for V4L1 and for the custom +zoran ioctls has been removed in kernel 2.6.38. + +For programming example, please, look at lavrec.c and lavplay.c code in +the MJPEG-tools (http://mjpeg.sf.net/). + +Additional notes for software developers: + + The driver returns maxwidth and maxheight parameters according to + the current TV standard (norm). Therefore, the software which + communicates with the driver and "asks" for these parameters should + first set the correct norm. Well, it seems logically correct: TV + standard is "more constant" for current country than geometry + settings of a variety of TV capture cards which may work in ITU or + square pixel format. + +Applications +------------ + +Applications known to work with this driver: + +TV viewing: + +* xawtv +* kwintv +* probably any TV application that supports video4linux or video4linux2. + +MJPEG capture/playback: + +* mjpegtools/lavtools (or Linux Video Studio) +* gstreamer +* mplayer + +General raw capture: + +* xawtv +* gstreamer +* probably any application that supports video4linux or video4linux2 + +Video editing: + +* Cinelerra +* MainActor +* mjpegtools (or Linux Video Studio) + + +Concerning buffer sizes, quality, output size etc. +-------------------------------------------------- + + +The zr36060 can do 1:2 JPEG compression. This is really the theoretical +maximum that the chipset can reach. The driver can, however, limit compression +to a maximum (size) of 1:4. The reason for this is that some cards (e.g. Buz) +can't handle 1:2 compression without stopping capture after only a few minutes. +With 1:4, it'll mostly work. If you have a Buz, use 'low_bitrate=1' to go into +1:4 max. compression mode. + +100% JPEG quality is thus 1:2 compression in practice. So for a full PAL frame +(size 720x576). The JPEG fields are stored in YUY2 format, so the size of the +fields are 720x288x16/2 bits/field (2 fields/frame) = 207360 bytes/field x 2 = +414720 bytes/frame (add some more bytes for headers and DHT (huffman)/DQT +(quantization) tables, and you'll get to something like 512kB per frame for +1:2 compression. For 1:4 compression, you'd have frames of half this size. + +Some additional explanation by Martin Samuelsson, which also explains the +importance of buffer sizes: +-- +> Hmm, I do not think it is really that way. With the current (downloaded +> at 18:00 Monday) driver I get that output sizes for 10 sec: +> -q 50 -b 128 : 24.283.332 Bytes +> -q 50 -b 256 : 48.442.368 +> -q 25 -b 128 : 24.655.992 +> -q 25 -b 256 : 25.859.820 + +I woke up, and can't go to sleep again. I'll kill some time explaining why +this doesn't look strange to me. + +Let's do some math using a width of 704 pixels. I'm not sure whether the Buz +actually use that number or not, but that's not too important right now. + +704x288 pixels, one field, is 202752 pixels. Divided by 64 pixels per block; +3168 blocks per field. Each pixel consist of two bytes; 128 bytes per block; +1024 bits per block. 100% in the new driver mean 1:2 compression; the maximum +output becomes 512 bits per block. Actually 510, but 512 is simpler to use +for calculations. + +Let's say that we specify d1q50. We thus want 256 bits per block; times 3168 +becomes 811008 bits; 101376 bytes per field. We're talking raw bits and bytes +here, so we don't need to do any fancy corrections for bits-per-pixel or such +things. 101376 bytes per field. + +d1 video contains two fields per frame. Those sum up to 202752 bytes per +frame, and one of those frames goes into each buffer. + +But wait a second! -b128 gives 128kB buffers! It's not possible to cram +202752 bytes of JPEG data into 128kB! + +This is what the driver notice and automatically compensate for in your +examples. Let's do some math using this information: + +128kB is 131072 bytes. In this buffer, we want to store two fields, which +leaves 65536 bytes for each field. Using 3168 blocks per field, we get +20.68686868... available bytes per block; 165 bits. We can't allow the +request for 256 bits per block when there's only 165 bits available! The -q50 +option is silently overridden, and the -b128 option takes precedence, leaving +us with the equivalence of -q32. + +This gives us a data rate of 165 bits per block, which, times 3168, sums up +to 65340 bytes per field, out of the allowed 65536. The current driver has +another level of rate limiting; it won't accept -q values that fill more than +6/8 of the specified buffers. (I'm not sure why. "Playing it safe" seem to be +a safe bet. Personally, I think I would have lowered requested-bits-per-block +by one, or something like that.) We can't use 165 bits per block, but have to +lower it again, to 6/8 of the available buffer space: We end up with 124 bits +per block, the equivalence of -q24. With 128kB buffers, you can't use greater +than -q24 at -d1. (And PAL, and 704 pixels width...) + +The third example is limited to -q24 through the same process. The second +example, using very similar calculations, is limited to -q48. The only +example that actually grab at the specified -q value is the last one, which +is clearly visible, looking at the file size. +-- + +Conclusion: the quality of the resulting movie depends on buffer size, quality, +whether or not you use 'low_bitrate=1' as insmod option for the zr36060.c +module to do 1:4 instead of 1:2 compression, etc. + +If you experience timeouts, lowering the quality/buffersize or using +'low_bitrate=1 as insmod option for zr36060.o might actually help, as is +proven by the Buz. + +It hangs/crashes/fails/whatevers! Help! +--------------------------------------- + +Make sure that the card has its own interrupts (see /proc/interrupts), check +the output of dmesg at high verbosity (load zr36067.o with debug=2, +load all other modules with debug=1). Check that your mainboard is favorable +(see question 2) and if not, test the card in another computer. Also see the +notes given in question 3 and try lowering quality/buffersize/capturesize +if recording fails after a period of time. + +If all this doesn't help, give a clear description of the problem including +detailed hardware information (memory+brand, mainboard+chipset+brand, which +MJPEG card, processor, other PCI cards that might be of interest), give the +system PnP information (/proc/interrupts, /proc/dma, /proc/devices), and give +the kernel version, driver version, glibc version, gcc version and any other +information that might possibly be of interest. Also provide the dmesg output +at high verbosity. See 'Contacting' on how to contact the developers. + +Maintainers/Contacting +---------------------- + +Previous maintainers/developers of this driver are +- Laurent Pinchart <laurent.pinchart@skynet.be> +- Ronald Bultje rbultje@ronald.bitfreak.net +- Serguei Miridonov <mirsev@cicese.mx> +- Wolfgang Scherr <scherr@net4you.net> +- Dave Perks <dperks@ibm.net> +- Rainer Johanni <Rainer@Johanni.de> + +Driver's License +---------------- + + This driver is distributed under the terms of the General Public License. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + +See http://www.gnu.org/ for more information. diff --git a/Documentation/driver-api/media/dtv-ca.rst b/Documentation/driver-api/media/dtv-ca.rst new file mode 100644 index 0000000000..8a09862b42 --- /dev/null +++ b/Documentation/driver-api/media/dtv-ca.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Digital TV Conditional Access kABI +---------------------------------- + +.. kernel-doc:: include/media/dvb_ca_en50221.h diff --git a/Documentation/driver-api/media/dtv-common.rst b/Documentation/driver-api/media/dtv-common.rst new file mode 100644 index 0000000000..207a22bcaf --- /dev/null +++ b/Documentation/driver-api/media/dtv-common.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Digital TV Common functions +--------------------------- + +DVB devices +~~~~~~~~~~~ + +Those functions are responsible for handling the DVB device nodes. + +.. kernel-doc:: include/media/dvbdev.h + +Digital TV Ring buffer +~~~~~~~~~~~~~~~~~~~~~~ + +Those routines implement ring buffers used to handle digital TV data and +copy it from/to userspace. + +.. note:: + + 1) For performance reasons read and write routines don't check buffer sizes + and/or number of bytes free/available. This has to be done before these + routines are called. For example: + + .. code-block:: c + + /* write @buflen: bytes */ + free = dvb_ringbuffer_free(rbuf); + if (free >= buflen) + count = dvb_ringbuffer_write(rbuf, buffer, buflen); + else + /* do something */ + + /* read min. 1000, max. @bufsize: bytes */ + avail = dvb_ringbuffer_avail(rbuf); + if (avail >= 1000) + count = dvb_ringbuffer_read(rbuf, buffer, min(avail, bufsize)); + else + /* do something */ + + 2) If there is exactly one reader and one writer, there is no need + to lock read or write operations. + Two or more readers must be locked against each other. + Flushing the buffer counts as a read operation. + Resetting the buffer counts as a read and write operation. + Two or more writers must be locked against each other. + +.. kernel-doc:: include/media/dvb_ringbuffer.h + +Digital TV VB2 handler +~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/media/dvb_vb2.h diff --git a/Documentation/driver-api/media/dtv-core.rst b/Documentation/driver-api/media/dtv-core.rst new file mode 100644 index 0000000000..82c5b85ed9 --- /dev/null +++ b/Documentation/driver-api/media/dtv-core.rst @@ -0,0 +1,39 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Digital TV (DVB) devices +------------------------ + +Digital TV devices are implemented by several different drivers: + +- A bridge driver that is responsible to talk with the bus where the other + devices are connected (PCI, USB, SPI), bind to the other drivers and + implement the digital demux logic (either in software or in hardware); + +- Frontend drivers that are usually implemented as two separate drivers: + + - A tuner driver that implements the logic which commands the part of + the hardware responsible for tuning into a digital TV transponder or + physical channel. The output of a tuner is usually a baseband or + Intermediate Frequency (IF) signal; + + - A demodulator driver (a.k.a "demod") that implements the logic which + commands the digital TV decoding hardware. The output of a demod is + a digital stream, with multiple audio, video and data channels typically + multiplexed using MPEG Transport Stream [#f1]_. + +On most hardware, the frontend drivers talk with the bridge driver using an +I2C bus. + +.. [#f1] Some standards use TCP/IP for multiplexing data, like DVB-H (an + abandoned standard, not used anymore) and ATSC version 3.0 current + proposals. Currently, the DVB subsystem doesn't implement those standards. + + +.. toctree:: + :maxdepth: 1 + + dtv-common + dtv-frontend + dtv-demux + dtv-ca + dtv-net diff --git a/Documentation/driver-api/media/dtv-demux.rst b/Documentation/driver-api/media/dtv-demux.rst new file mode 100644 index 0000000000..1441241426 --- /dev/null +++ b/Documentation/driver-api/media/dtv-demux.rst @@ -0,0 +1,84 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Digital TV Demux kABI +--------------------- + +Digital TV Demux +~~~~~~~~~~~~~~~~ + +The Kernel Digital TV Demux kABI defines a driver-internal interface for +registering low-level, hardware specific driver to a hardware independent +demux layer. It is only of interest for Digital TV device driver writers. +The header file for this kABI is named ``demux.h`` and located in +``include/media``. + +The demux kABI should be implemented for each demux in the system. It is +used to select the TS source of a demux and to manage the demux resources. +When the demux client allocates a resource via the demux kABI, it receives +a pointer to the kABI of that resource. + +Each demux receives its TS input from a DVB front-end or from memory, as +set via this demux kABI. In a system with more than one front-end, the kABI +can be used to select one of the DVB front-ends as a TS source for a demux, +unless this is fixed in the HW platform. + +The demux kABI only controls front-ends regarding to their connections with +demuxes; the kABI used to set the other front-end parameters, such as +tuning, are defined via the Digital TV Frontend kABI. + +The functions that implement the abstract interface demux should be defined +static or module private and registered to the Demux core for external +access. It is not necessary to implement every function in the struct +:c:type:`dmx_demux`. For example, a demux interface might support Section filtering, +but not PES filtering. The kABI client is expected to check the value of any +function pointer before calling the function: the value of ``NULL`` means +that the function is not available. + +Whenever the functions of the demux API modify shared data, the +possibilities of lost update and race condition problems should be +addressed, e.g. by protecting parts of code with mutexes. + +Note that functions called from a bottom half context must not sleep. +Even a simple memory allocation without using ``GFP_ATOMIC`` can result in a +kernel thread being put to sleep if swapping is needed. For example, the +Linux Kernel calls the functions of a network device interface from a +bottom half context. Thus, if a demux kABI function is called from network +device code, the function must not sleep. + +Demux Callback API +~~~~~~~~~~~~~~~~~~ + +This kernel-space API comprises the callback functions that deliver filtered +data to the demux client. Unlike the other DVB kABIs, these functions are +provided by the client and called from the demux code. + +The function pointers of this abstract interface are not packed into a +structure as in the other demux APIs, because the callback functions are +registered and used independent of each other. As an example, it is possible +for the API client to provide several callback functions for receiving TS +packets and no callbacks for PES packets or sections. + +The functions that implement the callback API need not be re-entrant: when +a demux driver calls one of these functions, the driver is not allowed to +call the function again before the original call returns. If a callback is +triggered by a hardware interrupt, it is recommended to use the Linux +bottom half mechanism or start a tasklet instead of making the callback +function call directly from a hardware interrupt. + +This mechanism is implemented by :c:func:`dmx_ts_cb()` and :c:func:`dmx_section_cb()` +callbacks. + +Digital TV Demux device registration functions and data structures +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/media/dmxdev.h + +High-level Digital TV demux interface +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/media/dvb_demux.h + +Driver-internal low-level hardware specific driver demux interface +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/media/demux.h diff --git a/Documentation/driver-api/media/dtv-frontend.rst b/Documentation/driver-api/media/dtv-frontend.rst new file mode 100644 index 0000000000..ea43cdb5b0 --- /dev/null +++ b/Documentation/driver-api/media/dtv-frontend.rst @@ -0,0 +1,445 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Digital TV Frontend kABI +------------------------ + +Digital TV Frontend +~~~~~~~~~~~~~~~~~~~ + +The Digital TV Frontend kABI defines a driver-internal interface for +registering low-level, hardware specific driver to a hardware independent +frontend layer. It is only of interest for Digital TV device driver writers. +The header file for this API is named ``dvb_frontend.h`` and located in +``include/media/``. + +Demodulator driver +^^^^^^^^^^^^^^^^^^ + +The demodulator driver is responsible for talking with the decoding part of the +hardware. Such driver should implement :c:type:`dvb_frontend_ops`, which +tells what type of digital TV standards are supported, and points to a +series of functions that allow the DVB core to command the hardware via +the code under ``include/media/dvb_frontend.c``. + +A typical example of such struct in a driver ``foo`` is:: + + static struct dvb_frontend_ops foo_ops = { + .delsys = { SYS_DVBT, SYS_DVBT2, SYS_DVBC_ANNEX_A }, + .info = { + .name = "foo DVB-T/T2/C driver", + .caps = FE_CAN_FEC_1_2 | + FE_CAN_FEC_2_3 | + FE_CAN_FEC_3_4 | + FE_CAN_FEC_5_6 | + FE_CAN_FEC_7_8 | + FE_CAN_FEC_AUTO | + FE_CAN_QPSK | + FE_CAN_QAM_16 | + FE_CAN_QAM_32 | + FE_CAN_QAM_64 | + FE_CAN_QAM_128 | + FE_CAN_QAM_256 | + FE_CAN_QAM_AUTO | + FE_CAN_TRANSMISSION_MODE_AUTO | + FE_CAN_GUARD_INTERVAL_AUTO | + FE_CAN_HIERARCHY_AUTO | + FE_CAN_MUTE_TS | + FE_CAN_2G_MODULATION, + .frequency_min = 42000000, /* Hz */ + .frequency_max = 1002000000, /* Hz */ + .symbol_rate_min = 870000, + .symbol_rate_max = 11700000 + }, + .init = foo_init, + .sleep = foo_sleep, + .release = foo_release, + .set_frontend = foo_set_frontend, + .get_frontend = foo_get_frontend, + .read_status = foo_get_status_and_stats, + .tune = foo_tune, + .i2c_gate_ctrl = foo_i2c_gate_ctrl, + .get_frontend_algo = foo_get_algo, + }; + +A typical example of such struct in a driver ``bar`` meant to be used on +Satellite TV reception is:: + + static const struct dvb_frontend_ops bar_ops = { + .delsys = { SYS_DVBS, SYS_DVBS2 }, + .info = { + .name = "Bar DVB-S/S2 demodulator", + .frequency_min = 500000, /* KHz */ + .frequency_max = 2500000, /* KHz */ + .frequency_stepsize = 0, + .symbol_rate_min = 1000000, + .symbol_rate_max = 45000000, + .symbol_rate_tolerance = 500, + .caps = FE_CAN_INVERSION_AUTO | + FE_CAN_FEC_AUTO | + FE_CAN_QPSK, + }, + .init = bar_init, + .sleep = bar_sleep, + .release = bar_release, + .set_frontend = bar_set_frontend, + .get_frontend = bar_get_frontend, + .read_status = bar_get_status_and_stats, + .i2c_gate_ctrl = bar_i2c_gate_ctrl, + .get_frontend_algo = bar_get_algo, + .tune = bar_tune, + + /* Satellite-specific */ + .diseqc_send_master_cmd = bar_send_diseqc_msg, + .diseqc_send_burst = bar_send_burst, + .set_tone = bar_set_tone, + .set_voltage = bar_set_voltage, + }; + +.. note:: + + #) For satellite digital TV standards (DVB-S, DVB-S2, ISDB-S), the + frequencies are specified in kHz, while, for terrestrial and cable + standards, they're specified in Hz. Due to that, if the same frontend + supports both types, you'll need to have two separate + :c:type:`dvb_frontend_ops` structures, one for each standard. + #) The ``.i2c_gate_ctrl`` field is present only when the hardware has + allows controlling an I2C gate (either directly of via some GPIO pin), + in order to remove the tuner from the I2C bus after a channel is + tuned. + #) All new drivers should implement the + :ref:`DVBv5 statistics <dvbv5_stats>` via ``.read_status``. + Yet, there are a number of callbacks meant to get statistics for + signal strength, S/N and UCB. Those are there to provide backward + compatibility with legacy applications that don't support the DVBv5 + API. Implementing those callbacks are optional. Those callbacks may be + removed in the future, after we have all existing drivers supporting + DVBv5 stats. + #) Other callbacks are required for satellite TV standards, in order to + control LNBf and DiSEqC: ``.diseqc_send_master_cmd``, + ``.diseqc_send_burst``, ``.set_tone``, ``.set_voltage``. + +.. |delta| unicode:: U+00394 + +The ``include/media/dvb_frontend.c`` has a kernel thread which is +responsible for tuning the device. It supports multiple algorithms to +detect a channel, as defined at enum :c:func:`dvbfe_algo`. + +The algorithm to be used is obtained via ``.get_frontend_algo``. If the driver +doesn't fill its field at struct dvb_frontend_ops, it will default to +``DVBFE_ALGO_SW``, meaning that the dvb-core will do a zigzag when tuning, +e. g. it will try first to use the specified center frequency ``f``, +then, it will do ``f`` + |delta|, ``f`` - |delta|, ``f`` + 2 x |delta|, +``f`` - 2 x |delta| and so on. + +If the hardware has internally a some sort of zigzag algorithm, you should +define a ``.get_frontend_algo`` function that would return ``DVBFE_ALGO_HW``. + +.. note:: + + The core frontend support also supports + a third type (``DVBFE_ALGO_CUSTOM``), in order to allow the driver to + define its own hardware-assisted algorithm. Very few hardware need to + use it nowadays. Using ``DVBFE_ALGO_CUSTOM`` require to provide other + function callbacks at struct dvb_frontend_ops. + +Attaching frontend driver to the bridge driver +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Before using the Digital TV frontend core, the bridge driver should attach +the frontend demod, tuner and SEC devices and call +:c:func:`dvb_register_frontend()`, +in order to register the new frontend at the subsystem. At device +detach/removal, the bridge driver should call +:c:func:`dvb_unregister_frontend()` to +remove the frontend from the core and then :c:func:`dvb_frontend_detach()` +to free the memory allocated by the frontend drivers. + +The drivers should also call :c:func:`dvb_frontend_suspend()` as part of +their handler for the :c:type:`device_driver`.\ ``suspend()``, and +:c:func:`dvb_frontend_resume()` as +part of their handler for :c:type:`device_driver`.\ ``resume()``. + +A few other optional functions are provided to handle some special cases. + +.. _dvbv5_stats: + +Digital TV Frontend statistics +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Introduction +^^^^^^^^^^^^ + +Digital TV frontends provide a range of +:ref:`statistics <frontend-stat-properties>` meant to help tuning the device +and measuring the quality of service. + +For each statistics measurement, the driver should set the type of scale used, +or ``FE_SCALE_NOT_AVAILABLE`` if the statistics is not available on a given +time. Drivers should also provide the number of statistics for each type. +that's usually 1 for most video standards [#f2]_. + +Drivers should initialize each statistic counters with length and +scale at its init code. For example, if the frontend provides signal +strength, it should have, on its init code:: + + struct dtv_frontend_properties *c = &state->fe.dtv_property_cache; + + c->strength.len = 1; + c->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE; + +And, when the statistics got updated, set the scale:: + + c->strength.stat[0].scale = FE_SCALE_DECIBEL; + c->strength.stat[0].uvalue = strength; + +.. [#f2] For ISDB-T, it may provide both a global statistics and a per-layer + set of statistics. On such cases, len should be equal to 4. The first + value corresponds to the global stat; the other ones to each layer, e. g.: + + - c->cnr.stat[0] for global S/N carrier ratio, + - c->cnr.stat[1] for Layer A S/N carrier ratio, + - c->cnr.stat[2] for layer B S/N carrier ratio, + - c->cnr.stat[3] for layer C S/N carrier ratio. + +.. note:: Please prefer to use ``FE_SCALE_DECIBEL`` instead of + ``FE_SCALE_RELATIVE`` for signal strength and CNR measurements. + +Groups of statistics +^^^^^^^^^^^^^^^^^^^^ + +There are several groups of statistics currently supported: + +Signal strength (:ref:`DTV-STAT-SIGNAL-STRENGTH`) + - Measures the signal strength level at the analog part of the tuner or + demod. + + - Typically obtained from the gain applied to the tuner and/or frontend + in order to detect the carrier. When no carrier is detected, the gain is + at the maximum value (so, strength is on its minimal). + + - As the gain is visible through the set of registers that adjust the gain, + typically, this statistics is always available [#f3]_. + + - Drivers should try to make it available all the times, as these statistics + can be used when adjusting an antenna position and to check for troubles + at the cabling. + + .. [#f3] On a few devices, the gain keeps floating if there is no carrier. + On such devices, strength report should check first if carrier is + detected at the tuner (``FE_HAS_CARRIER``, see :c:type:`fe_status`), + and otherwise return the lowest possible value. + +Carrier Signal to Noise ratio (:ref:`DTV-STAT-CNR`) + - Signal to Noise ratio for the main carrier. + + - Signal to Noise measurement depends on the device. On some hardware, it is + available when the main carrier is detected. On those hardware, CNR + measurement usually comes from the tuner (e. g. after ``FE_HAS_CARRIER``, + see :c:type:`fe_status`). + + On other devices, it requires inner FEC decoding, + as the frontend measures it indirectly from other parameters (e. g. after + ``FE_HAS_VITERBI``, see :c:type:`fe_status`). + + Having it available after inner FEC is more common. + +Bit counts post-FEC (:ref:`DTV-STAT-POST-ERROR-BIT-COUNT` and :ref:`DTV-STAT-POST-TOTAL-BIT-COUNT`) + - Those counters measure the number of bits and bit errors after + the forward error correction (FEC) on the inner coding block + (after Viterbi, LDPC or other inner code). + + - Due to its nature, those statistics depend on full coding lock + (e. g. after ``FE_HAS_SYNC`` or after ``FE_HAS_LOCK``, + see :c:type:`fe_status`). + +Bit counts pre-FEC (:ref:`DTV-STAT-PRE-ERROR-BIT-COUNT` and :ref:`DTV-STAT-PRE-TOTAL-BIT-COUNT`) + - Those counters measure the number of bits and bit errors before + the forward error correction (FEC) on the inner coding block + (before Viterbi, LDPC or other inner code). + + - Not all frontends provide this kind of statistics. + + - Due to its nature, those statistics depend on inner coding lock (e. g. + after ``FE_HAS_VITERBI``, see :c:type:`fe_status`). + +Block counts (:ref:`DTV-STAT-ERROR-BLOCK-COUNT` and :ref:`DTV-STAT-TOTAL-BLOCK-COUNT`) + - Those counters measure the number of blocks and block errors after + the forward error correction (FEC) on the inner coding block + (before Viterbi, LDPC or other inner code). + + - Due to its nature, those statistics depend on full coding lock + (e. g. after ``FE_HAS_SYNC`` or after + ``FE_HAS_LOCK``, see :c:type:`fe_status`). + +.. note:: All counters should be monotonically increased as they're + collected from the hardware. + +A typical example of the logic that handle status and statistics is:: + + static int foo_get_status_and_stats(struct dvb_frontend *fe) + { + struct foo_state *state = fe->demodulator_priv; + struct dtv_frontend_properties *c = &fe->dtv_property_cache; + + int rc; + enum fe_status *status; + + /* Both status and strength are always available */ + rc = foo_read_status(fe, &status); + if (rc < 0) + return rc; + + rc = foo_read_strength(fe); + if (rc < 0) + return rc; + + /* Check if CNR is available */ + if (!(fe->status & FE_HAS_CARRIER)) + return 0; + + rc = foo_read_cnr(fe); + if (rc < 0) + return rc; + + /* Check if pre-BER stats are available */ + if (!(fe->status & FE_HAS_VITERBI)) + return 0; + + rc = foo_get_pre_ber(fe); + if (rc < 0) + return rc; + + /* Check if post-BER stats are available */ + if (!(fe->status & FE_HAS_SYNC)) + return 0; + + rc = foo_get_post_ber(fe); + if (rc < 0) + return rc; + } + + static const struct dvb_frontend_ops ops = { + /* ... */ + .read_status = foo_get_status_and_stats, + }; + +Statistics collection +^^^^^^^^^^^^^^^^^^^^^ + +On almost all frontend hardware, the bit and byte counts are stored by +the hardware after a certain amount of time or after the total bit/block +counter reaches a certain value (usually programmable), for example, on +every 1000 ms or after receiving 1,000,000 bits. + +So, if you read the registers too soon, you'll end by reading the same +value as in the previous reading, causing the monotonic value to be +incremented too often. + +Drivers should take the responsibility to avoid too often reads. That +can be done using two approaches: + +if the driver have a bit that indicates when a collected data is ready +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Driver should check such bit before making the statistics available. + +An example of such behavior can be found at this code snippet (adapted +from mb86a20s driver's logic):: + + static int foo_get_pre_ber(struct dvb_frontend *fe) + { + struct foo_state *state = fe->demodulator_priv; + struct dtv_frontend_properties *c = &fe->dtv_property_cache; + int rc, bit_error; + + /* Check if the BER measures are already available */ + rc = foo_read_u8(state, 0x54); + if (rc < 0) + return rc; + + if (!rc) + return 0; + + /* Read Bit Error Count */ + bit_error = foo_read_u32(state, 0x55); + if (bit_error < 0) + return bit_error; + + /* Read Total Bit Count */ + rc = foo_read_u32(state, 0x51); + if (rc < 0) + return rc; + + c->pre_bit_error.stat[0].scale = FE_SCALE_COUNTER; + c->pre_bit_error.stat[0].uvalue += bit_error; + c->pre_bit_count.stat[0].scale = FE_SCALE_COUNTER; + c->pre_bit_count.stat[0].uvalue += rc; + + return 0; + } + +If the driver doesn't provide a statistics available check bit +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +A few devices, however, may not provide a way to check if the stats are +available (or the way to check it is unknown). They may not even provide +a way to directly read the total number of bits or blocks. + +On those devices, the driver need to ensure that it won't be reading from +the register too often and/or estimate the total number of bits/blocks. + +On such drivers, a typical routine to get statistics would be like +(adapted from dib8000 driver's logic):: + + struct foo_state { + /* ... */ + + unsigned long per_jiffies_stats; + } + + static int foo_get_pre_ber(struct dvb_frontend *fe) + { + struct foo_state *state = fe->demodulator_priv; + struct dtv_frontend_properties *c = &fe->dtv_property_cache; + int rc, bit_error; + u64 bits; + + /* Check if time for stats was elapsed */ + if (!time_after(jiffies, state->per_jiffies_stats)) + return 0; + + /* Next stat should be collected in 1000 ms */ + state->per_jiffies_stats = jiffies + msecs_to_jiffies(1000); + + /* Read Bit Error Count */ + bit_error = foo_read_u32(state, 0x55); + if (bit_error < 0) + return bit_error; + + /* + * On this particular frontend, there's no register that + * would provide the number of bits per 1000ms sample. So, + * some function would calculate it based on DTV properties + */ + bits = get_number_of_bits_per_1000ms(fe); + + c->pre_bit_error.stat[0].scale = FE_SCALE_COUNTER; + c->pre_bit_error.stat[0].uvalue += bit_error; + c->pre_bit_count.stat[0].scale = FE_SCALE_COUNTER; + c->pre_bit_count.stat[0].uvalue += bits; + + return 0; + } + +Please notice that, on both cases, we're getting the statistics using the +:c:type:`dvb_frontend_ops` ``.read_status`` callback. The rationale is that +the frontend core will automatically call this function periodically +(usually, 3 times per second, when the frontend is locked). + +That warrants that we won't miss to collect a counter and increment the +monotonic stats at the right time. + +Digital TV Frontend functions and types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/media/dvb_frontend.h diff --git a/Documentation/driver-api/media/dtv-net.rst b/Documentation/driver-api/media/dtv-net.rst new file mode 100644 index 0000000000..deb6bffe96 --- /dev/null +++ b/Documentation/driver-api/media/dtv-net.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Digital TV Network kABI +----------------------- + +.. kernel-doc:: include/media/dvb_net.h diff --git a/Documentation/driver-api/media/index.rst b/Documentation/driver-api/media/index.rst new file mode 100644 index 0000000000..08e2065674 --- /dev/null +++ b/Documentation/driver-api/media/index.rst @@ -0,0 +1,59 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: <isonum.txt> + +=================================== +Media subsystem kernel internal API +=================================== + +This section contains usage information about media subsystem and +its supported drivers. + +Please see: + +Documentation/admin-guide/media/index.rst + + - for usage information about media subsystem and supported drivers; + +Documentation/userspace-api/media/index.rst + + - for the userspace APIs used on media devices. + + +.. only:: html + + .. class:: toc-title + + Table of Contents + +.. toctree:: + :maxdepth: 5 + :numbered: + + maintainer-entry-profile + + v4l2-core + dtv-core + rc-core + mc-core + cec-core + tx-rx + camera-sensor + + drivers/index + +**Copyright** |copy| 2009-2020 : LinuxTV Developers + +:: + + This documentation is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + For more details see the file COPYING in the source distribution of Linux. diff --git a/Documentation/driver-api/media/maintainer-entry-profile.rst b/Documentation/driver-api/media/maintainer-entry-profile.rst new file mode 100644 index 0000000000..ffc712a5f6 --- /dev/null +++ b/Documentation/driver-api/media/maintainer-entry-profile.rst @@ -0,0 +1,206 @@ +Media Subsystem Profile +======================= + +Overview +-------- + +The media subsystem covers support for a variety of devices: stream +capture, analog and digital TV streams, cameras, remote controllers, HDMI CEC +and media pipeline control. + +It covers, mainly, the contents of those directories: + + - drivers/media + - drivers/staging/media + - Documentation/admin-guide/media + - Documentation/driver-api/media + - Documentation/userspace-api/media + - Documentation/devicetree/bindings/media/\ [1]_ + - include/media + +.. [1] Device tree bindings are maintained by the + OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS maintainers + (see the MAINTAINERS file). So, changes there must be reviewed + by them before being merged via the media subsystem's development + tree. + +Both media userspace and Kernel APIs are documented and the documentation +must be kept in sync with the API changes. It means that all patches that +add new features to the subsystem must also bring changes to the +corresponding API files. + +Due to the size and wide scope of the media subsystem, media's +maintainership model is to have sub-maintainers that have a broad +knowledge of a specific aspect of the subsystem. It is the sub-maintainers' +task to review the patches, providing feedback to users if the patches are +following the subsystem rules and are properly using the media kernel and +userspace APIs. + +Patches for the media subsystem must be sent to the media mailing list +at linux-media@vger.kernel.org as plain text only e-mail. Emails with +HTML will be automatically rejected by the mail server. It could be wise +to also copy the sub-maintainer(s). + +Media's workflow is heavily based on Patchwork, meaning that, once a patch +is submitted, the e-mail will first be accepted by the mailing list +server, and, after a while, it should appear at: + + - https://patchwork.linuxtv.org/project/linux-media/list/ + +If it doesn't automatically appear there after a few minutes, then +probably something went wrong on your submission. Please check if the +email is in plain text\ [2]_ only and if your emailer is not mangling +whitespaces before complaining or submitting them again. + +You can check if the mailing list server accepted your patch, by looking at: + + - https://lore.kernel.org/linux-media/ + +.. [2] If your email contains HTML, the mailing list server will simply + drop it, without any further notice. + + +Media maintainers ++++++++++++++++++ + +At the media subsystem, we have a group of senior developers that +are responsible for doing the code reviews at the drivers (also known as +sub-maintainers), and another senior developer responsible for the +subsystem as a whole. For core changes, whenever possible, multiple +media maintainers do the review. + +The media maintainers that work on specific areas of the subsystem are: + +- Remote Controllers (infrared): + Sean Young <sean@mess.org> + +- HDMI CEC: + Hans Verkuil <hverkuil@xs4all.nl> + +- Media controller drivers: + Laurent Pinchart <laurent.pinchart@ideasonboard.com> + +- ISP, v4l2-async, v4l2-fwnode, v4l2-flash-led-class and Sensor drivers: + Sakari Ailus <sakari.ailus@linux.intel.com> + +- V4L2 drivers and core V4L2 frameworks: + Hans Verkuil <hverkuil@xs4all.nl> + +The subsystem maintainer is: + Mauro Carvalho Chehab <mchehab@kernel.org> + +Media maintainers may delegate a patch to other media maintainers as needed. +On such case, checkpatch's ``delegate`` field indicates who's currently +responsible for reviewing a patch. + +Submit Checklist Addendum +------------------------- + +Patches that change the Open Firmware/Device Tree bindings must be +reviewed by the Device Tree maintainers. So, DT maintainers should be +Cc:ed when those are submitted via devicetree@vger.kernel.org mailing +list. + +There is a set of compliance tools at https://git.linuxtv.org/v4l-utils.git/ +that should be used in order to check if the drivers are properly +implementing the media APIs: + +==================== ======================================================= +Type Tool +==================== ======================================================= +V4L2 drivers\ [3]_ ``v4l2-compliance`` +V4L2 virtual drivers ``contrib/test/test-media`` +CEC drivers ``cec-compliance`` +==================== ======================================================= + +.. [3] The ``v4l2-compliance`` also covers the media controller usage inside + V4L2 drivers. + +Other compilance tools are under development to check other parts of the +subsystem. + +Those tests need to pass before the patches go upstream. + +Also, please notice that we build the Kernel with:: + + make CF=-D__CHECK_ENDIAN__ CONFIG_DEBUG_SECTION_MISMATCH=y C=1 W=1 CHECK=check_script + +Where the check script is:: + + #!/bin/bash + /devel/smatch/smatch -p=kernel $@ >&2 + /devel/sparse/sparse $@ >&2 + +Be sure to not introduce new warnings on your patches without a +very good reason. + +Style Cleanup Patches ++++++++++++++++++++++ + +Style cleanups are welcome when they come together with other changes +at the files where the style changes will affect. + +We may accept pure standalone style cleanups, but they should ideally +be one patch for the whole subsystem (if the cleanup is low volume), +or at least be grouped per directory. So, for example, if you're doing a +big cleanup change set at drivers under drivers/media, please send a single +patch for all drivers under drivers/media/pci, another one for +drivers/media/usb and so on. + +Coding Style Addendum ++++++++++++++++++++++ + +Media development uses ``checkpatch.pl`` on strict mode to verify the code +style, e.g.:: + + $ ./scripts/checkpatch.pl --strict --max-line-length=80 + +In principle, patches should follow the coding style rules, but exceptions +are allowed if there are good reasons. On such case, maintainers and reviewers +may question about the rationale for not addressing the ``checkpatch.pl``. + +Please notice that the goal here is to improve code readability. On +a few cases, ``checkpatch.pl`` may actually point to something that would +look worse. So, you should use good sense. + +Note that addressing one ``checkpatch.pl`` issue (of any kind) alone may lead +to having longer lines than 80 characters per line. While this is not +strictly prohibited, efforts should be made towards staying within 80 +characters per line. This could include using re-factoring code that leads +to less indentation, shorter variable or function names and last but not +least, simply wrapping the lines. + +In particular, we accept lines with more than 80 columns: + + - on strings, as they shouldn't be broken due to line length limits; + - when a function or variable name need to have a big identifier name, + which keeps hard to honor the 80 columns limit; + - on arithmetic expressions, when breaking lines makes them harder to + read; + - when they avoid a line to end with an open parenthesis or an open + bracket. + +Key Cycle Dates +--------------- + +New submissions can be sent at any time, but if they intend to hit the +next merge window they should be sent before -rc5, and ideally stabilized +in the linux-media branch by -rc6. + +Review Cadence +-------------- + +Provided that your patch is at https://patchwork.linuxtv.org, it should +be sooner or later handled, so you don't need to re-submit a patch. + +Except for bug fixes, we don't usually add new patches to the development +tree between -rc6 and the next -rc1. + +Please notice that the media subsystem is a high traffic one, so it +could take a while for us to be able to review your patches. Feel free +to ping if you don't get a feedback in a couple of weeks or to ask +other developers to publicly add Reviewed-by and, more importantly, +``Tested-by:`` tags. + +Please note that we expect a detailed description for ``Tested-by:``, +identifying what boards were used at the test and what it was tested. diff --git a/Documentation/driver-api/media/mc-core.rst b/Documentation/driver-api/media/mc-core.rst new file mode 100644 index 0000000000..2456950ce8 --- /dev/null +++ b/Documentation/driver-api/media/mc-core.rst @@ -0,0 +1,326 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Media Controller devices +------------------------ + +Media Controller +~~~~~~~~~~~~~~~~ + +The media controller userspace API is documented in +:ref:`the Media Controller uAPI book <media_controller>`. This document focus +on the kernel-side implementation of the media framework. + +Abstract media device model +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Discovering a device internal topology, and configuring it at runtime, is one +of the goals of the media framework. To achieve this, hardware devices are +modelled as an oriented graph of building blocks called entities connected +through pads. + +An entity is a basic media hardware building block. It can correspond to +a large variety of logical blocks such as physical hardware devices +(CMOS sensor for instance), logical hardware devices (a building block +in a System-on-Chip image processing pipeline), DMA channels or physical +connectors. + +A pad is a connection endpoint through which an entity can interact with +other entities. Data (not restricted to video) produced by an entity +flows from the entity's output to one or more entity inputs. Pads should +not be confused with physical pins at chip boundaries. + +A link is a point-to-point oriented connection between two pads, either +on the same entity or on different entities. Data flows from a source +pad to a sink pad. + +Media device +^^^^^^^^^^^^ + +A media device is represented by a struct media_device +instance, defined in ``include/media/media-device.h``. +Allocation of the structure is handled by the media device driver, usually by +embedding the :c:type:`media_device` instance in a larger driver-specific +structure. + +Drivers initialise media device instances by calling +:c:func:`media_device_init()`. After initialising a media device instance, it is +registered by calling :c:func:`__media_device_register()` via the macro +``media_device_register()`` and unregistered by calling +:c:func:`media_device_unregister()`. An initialised media device must be +eventually cleaned up by calling :c:func:`media_device_cleanup()`. + +Note that it is not allowed to unregister a media device instance that was not +previously registered, or clean up a media device instance that was not +previously initialised. + +Entities +^^^^^^^^ + +Entities are represented by a struct media_entity +instance, defined in ``include/media/media-entity.h``. The structure is usually +embedded into a higher-level structure, such as +:c:type:`v4l2_subdev` or :c:type:`video_device` +instances, although drivers can allocate entities directly. + +Drivers initialize entity pads by calling +:c:func:`media_entity_pads_init()`. + +Drivers register entities with a media device by calling +:c:func:`media_device_register_entity()` +and unregistered by calling +:c:func:`media_device_unregister_entity()`. + +Interfaces +^^^^^^^^^^ + +Interfaces are represented by a +struct media_interface instance, defined in +``include/media/media-entity.h``. Currently, only one type of interface is +defined: a device node. Such interfaces are represented by a +struct media_intf_devnode. + +Drivers initialize and create device node interfaces by calling +:c:func:`media_devnode_create()` +and remove them by calling: +:c:func:`media_devnode_remove()`. + +Pads +^^^^ +Pads are represented by a struct media_pad instance, +defined in ``include/media/media-entity.h``. Each entity stores its pads in +a pads array managed by the entity driver. Drivers usually embed the array in +a driver-specific structure. + +Pads are identified by their entity and their 0-based index in the pads +array. + +Both information are stored in the struct media_pad, +making the struct media_pad pointer the canonical way +to store and pass link references. + +Pads have flags that describe the pad capabilities and state. + +``MEDIA_PAD_FL_SINK`` indicates that the pad supports sinking data. +``MEDIA_PAD_FL_SOURCE`` indicates that the pad supports sourcing data. + +.. note:: + + One and only one of ``MEDIA_PAD_FL_SINK`` or ``MEDIA_PAD_FL_SOURCE`` must + be set for each pad. + +Links +^^^^^ + +Links are represented by a struct media_link instance, +defined in ``include/media/media-entity.h``. There are two types of links: + +**1. pad to pad links**: + +Associate two entities via their PADs. Each entity has a list that points +to all links originating at or targeting any of its pads. +A given link is thus stored twice, once in the source entity and once in +the target entity. + +Drivers create pad to pad links by calling: +:c:func:`media_create_pad_link()` and remove with +:c:func:`media_entity_remove_links()`. + +**2. interface to entity links**: + +Associate one interface to a Link. + +Drivers create interface to entity links by calling: +:c:func:`media_create_intf_link()` and remove with +:c:func:`media_remove_intf_links()`. + +.. note:: + + Links can only be created after having both ends already created. + +Links have flags that describe the link capabilities and state. The +valid values are described at :c:func:`media_create_pad_link()` and +:c:func:`media_create_intf_link()`. + +Graph traversal +^^^^^^^^^^^^^^^ + +The media framework provides APIs to iterate over entities in a graph. + +To iterate over all entities belonging to a media device, drivers can use +the media_device_for_each_entity macro, defined in +``include/media/media-device.h``. + +.. code-block:: c + + struct media_entity *entity; + + media_device_for_each_entity(entity, mdev) { + // entity will point to each entity in turn + ... + } + +Drivers might also need to iterate over all entities in a graph that can be +reached only through enabled links starting at a given entity. The media +framework provides a depth-first graph traversal API for that purpose. + +.. note:: + + Graphs with cycles (whether directed or undirected) are **NOT** + supported by the graph traversal API. To prevent infinite loops, the graph + traversal code limits the maximum depth to ``MEDIA_ENTITY_ENUM_MAX_DEPTH``, + currently defined as 16. + +Drivers initiate a graph traversal by calling +:c:func:`media_graph_walk_start()` + +The graph structure, provided by the caller, is initialized to start graph +traversal at the given entity. + +Drivers can then retrieve the next entity by calling +:c:func:`media_graph_walk_next()` + +When the graph traversal is complete the function will return ``NULL``. + +Graph traversal can be interrupted at any moment. No cleanup function call +is required and the graph structure can be freed normally. + +Helper functions can be used to find a link between two given pads, or a pad +connected to another pad through an enabled link +(:c:func:`media_entity_find_link()`, :c:func:`media_pad_remote_pad_first()`, +:c:func:`media_entity_remote_source_pad_unique()` and +:c:func:`media_pad_remote_pad_unique()`). + +Use count and power handling +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Due to the wide differences between drivers regarding power management +needs, the media controller does not implement power management. However, +the struct media_entity includes a ``use_count`` +field that media drivers +can use to track the number of users of every entity for power management +needs. + +The :c:type:`media_entity<media_entity>`.\ ``use_count`` field is owned by +media drivers and must not be +touched by entity drivers. Access to the field must be protected by the +:c:type:`media_device`.\ ``graph_mutex`` lock. + +Links setup +^^^^^^^^^^^ + +Link properties can be modified at runtime by calling +:c:func:`media_entity_setup_link()`. + +Pipelines and media streams +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A media stream is a stream of pixels or metadata originating from one or more +source devices (such as a sensors) and flowing through media entity pads +towards the final sinks. The stream can be modified on the route by the +devices (e.g. scaling or pixel format conversions), or it can be split into +multiple branches, or multiple branches can be merged. + +A media pipeline is a set of media streams which are interdependent. This +interdependency can be caused by the hardware (e.g. configuration of a second +stream cannot be changed if the first stream has been enabled) or by the driver +due to the software design. Most commonly a media pipeline consists of a single +stream which does not branch. + +When starting streaming, drivers must notify all entities in the pipeline to +prevent link states from being modified during streaming by calling +:c:func:`media_pipeline_start()`. + +The function will mark all the pads which are part of the pipeline as streaming. + +The struct media_pipeline instance pointed to by the pipe argument will be +stored in every pad in the pipeline. Drivers should embed the struct +media_pipeline in higher-level pipeline structures and can then access the +pipeline through the struct media_pad pipe field. + +Calls to :c:func:`media_pipeline_start()` can be nested. +The pipeline pointer must be identical for all nested calls to the function. + +:c:func:`media_pipeline_start()` may return an error. In that case, +it will clean up any of the changes it did by itself. + +When stopping the stream, drivers must notify the entities with +:c:func:`media_pipeline_stop()`. + +If multiple calls to :c:func:`media_pipeline_start()` have been +made the same number of :c:func:`media_pipeline_stop()` calls +are required to stop streaming. +The :c:type:`media_entity`.\ ``pipe`` field is reset to ``NULL`` on the last +nested stop call. + +Link configuration will fail with ``-EBUSY`` by default if either end of the +link is a streaming entity. Links that can be modified while streaming must +be marked with the ``MEDIA_LNK_FL_DYNAMIC`` flag. + +If other operations need to be disallowed on streaming entities (such as +changing entities configuration parameters) drivers can explicitly check the +media_entity stream_count field to find out if an entity is streaming. This +operation must be done with the media_device graph_mutex held. + +Link validation +^^^^^^^^^^^^^^^ + +Link validation is performed by :c:func:`media_pipeline_start()` +for any entity which has sink pads in the pipeline. The +:c:type:`media_entity`.\ ``link_validate()`` callback is used for that +purpose. In ``link_validate()`` callback, entity driver should check +that the properties of the source pad of the connected entity and its own +sink pad match. It is up to the type of the entity (and in the end, the +properties of the hardware) what matching actually means. + +Subsystems should facilitate link validation by providing subsystem specific +helper functions to provide easy access for commonly needed information, and +in the end provide a way to use driver-specific callbacks. + +Media Controller Device Allocator API +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When the media device belongs to more than one driver, the shared media +device is allocated with the shared struct device as the key for look ups. + +The shared media device should stay in registered state until the last +driver unregisters it. In addition, the media device should be released when +all the references are released. Each driver gets a reference to the media +device during probe, when it allocates the media device. If media device is +already allocated, the allocate API bumps up the refcount and returns the +existing media device. The driver puts the reference back in its disconnect +routine when it calls :c:func:`media_device_delete()`. + +The media device is unregistered and cleaned up from the kref put handler to +ensure that the media device stays in registered state until the last driver +unregisters the media device. + +**Driver Usage** + +Drivers should use the appropriate media-core routines to manage the shared +media device life-time handling the two states: +1. allocate -> register -> delete +2. get reference to already registered device -> delete + +call :c:func:`media_device_delete()` routine to make sure the shared media +device delete is handled correctly. + +**driver probe:** +Call :c:func:`media_device_usb_allocate()` to allocate or get a reference +Call :c:func:`media_device_register()`, if media devnode isn't registered + +**driver disconnect:** +Call :c:func:`media_device_delete()` to free the media_device. Freeing is +handled by the kref put handler. + +API Definitions +^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/media-device.h + +.. kernel-doc:: include/media/media-devnode.h + +.. kernel-doc:: include/media/media-entity.h + +.. kernel-doc:: include/media/media-request.h + +.. kernel-doc:: include/media/media-dev-allocator.h diff --git a/Documentation/driver-api/media/rc-core.rst b/Documentation/driver-api/media/rc-core.rst new file mode 100644 index 0000000000..53f5e643b6 --- /dev/null +++ b/Documentation/driver-api/media/rc-core.rst @@ -0,0 +1,88 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Remote Controller devices +------------------------- + +Remote Controller core +~~~~~~~~~~~~~~~~~~~~~~ + +The remote controller core implements infrastructure to receive and send +remote controller keyboard keystrokes and mouse events. + +Every time a key is pressed on a remote controller, a scan code is produced. +Also, on most hardware, keeping a key pressed for more than a few dozens of +milliseconds produce a repeat key event. That's somewhat similar to what +a normal keyboard or mouse is handled internally on Linux\ [#f1]_. So, the +remote controller core is implemented on the top of the linux input/evdev +interface. + +.. [#f1] + + The main difference is that, on keyboard events, the keyboard controller + produces one event for a key press and another one for key release. On + infrared-based remote controllers, there's no key release event. Instead, + an extra code is produced to indicate key repeats. + +However, most of the remote controllers use infrared (IR) to transmit signals. +As there are several protocols used to modulate infrared signals, one +important part of the core is dedicated to adjust the driver and the core +system to support the infrared protocol used by the emitter. + +The infrared transmission is done by blinking a infrared emitter using a +carrier. The carrier can be switched on or off by the IR transmitter +hardware. When the carrier is switched on, it is called *PULSE*. +When the carrier is switched off, it is called *SPACE*. + +In other words, a typical IR transmission can be viewed as a sequence of +*PULSE* and *SPACE* events, each with a given duration. + +The carrier parameters (frequency, duty cycle) and the intervals for +*PULSE* and *SPACE* events depend on the protocol. +For example, the NEC protocol uses a carrier of 38kHz, and transmissions +start with a 9ms *PULSE* and a 4.5ms SPACE. It then transmits 16 bits of +scan code, being 8 bits for address (usually it is a fixed number for a +given remote controller), followed by 8 bits of code. A bit "1" is modulated +with 560µs *PULSE* followed by 1690µs *SPACE* and a bit "0" is modulated +with 560µs *PULSE* followed by 560µs *SPACE*. + +At receiver, a simple low-pass filter can be used to convert the received +signal in a sequence of *PULSE/SPACE* events, filtering out the carrier +frequency. Due to that, the receiver doesn't care about the carrier's +actual frequency parameters: all it has to do is to measure the amount +of time it receives *PULSE/SPACE* events. +So, a simple IR receiver hardware will just provide a sequence of timings +for those events to the Kernel. The drivers for hardware with such kind of +receivers are identified by ``RC_DRIVER_IR_RAW``, as defined by +:c:type:`rc_driver_type`\ [#f2]_. Other hardware come with a +microcontroller that decode the *PULSE/SPACE* sequence and return scan +codes to the Kernel. Such kind of receivers are identified +by ``RC_DRIVER_SCANCODE``. + +.. [#f2] + + The RC core also supports devices that have just IR emitters, + without any receivers. Right now, all such devices work only in + raw TX mode. Such kind of hardware is identified as + ``RC_DRIVER_IR_RAW_TX``. + +When the RC core receives events produced by ``RC_DRIVER_IR_RAW`` IR +receivers, it needs to decode the IR protocol, in order to obtain the +corresponding scan code. The protocols supported by the RC core are +defined at enum :c:type:`rc_proto`. + +When the RC code receives a scan code (either directly, by a driver +of the type ``RC_DRIVER_SCANCODE``, or via its IR decoders), it needs +to convert into a Linux input event code. This is done via a mapping +table. + +The Kernel has support for mapping tables available on most media +devices. It also supports loading a table in runtime, via some +sysfs nodes. See the :ref:`RC userspace API <Remote_controllers_Intro>` +for more details. + +Remote controller data structures and functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/rc-core.h + +.. kernel-doc:: include/media/rc-map.h diff --git a/Documentation/driver-api/media/tx-rx.rst b/Documentation/driver-api/media/tx-rx.rst new file mode 100644 index 0000000000..e1e9258dd8 --- /dev/null +++ b/Documentation/driver-api/media/tx-rx.rst @@ -0,0 +1,133 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _transmitter-receiver: + +Pixel data transmitter and receiver drivers +=========================================== + +V4L2 supports various devices that transmit and receive pixel data. Examples of +these devices include a camera sensor, a TV tuner and a parallel or a CSI-2 +receiver in an SoC. + +Bus types +--------- + +The following busses are the most common. This section discusses these two only. + +MIPI CSI-2 +^^^^^^^^^^ + +CSI-2 is a data bus intended for transferring images from cameras to +the host SoC. It is defined by the `MIPI alliance`_. + +.. _`MIPI alliance`: https://www.mipi.org/ + +Parallel +^^^^^^^^ + +`BT.601`_ and `BT.656`_ are the most common parallel busses. + +.. _`BT.601`: https://en.wikipedia.org/wiki/Rec._601 +.. _`BT.656`: https://en.wikipedia.org/wiki/ITU-R_BT.656 + +Transmitter drivers +------------------- + +Transmitter drivers generally need to provide the receiver drivers with the +configuration of the transmitter. What is required depends on the type of the +bus. These are common for both busses. + +Media bus pixel code +^^^^^^^^^^^^^^^^^^^^ + +See :ref:`v4l2-mbus-pixelcode`. + +Link frequency +^^^^^^^^^^^^^^ + +The :ref:`V4L2_CID_LINK_FREQ <v4l2-cid-link-freq>` control is used to tell the +receiver the frequency of the bus (i.e. it is not the same as the symbol rate). + +``.s_stream()`` callback +^^^^^^^^^^^^^^^^^^^^^^^^ + +The struct struct v4l2_subdev_video_ops->s_stream() callback is used by the +receiver driver to control the transmitter driver's streaming state. + + +CSI-2 transmitter drivers +------------------------- + +Pixel rate +^^^^^^^^^^ + +The pixel rate on the bus is calculated as follows:: + + pixel_rate = link_freq * 2 * nr_of_lanes * 16 / k / bits_per_sample + +where + +.. list-table:: variables in pixel rate calculation + :header-rows: 1 + + * - variable or constant + - description + * - link_freq + - The value of the ``V4L2_CID_LINK_FREQ`` integer64 menu item. + * - nr_of_lanes + - Number of data lanes used on the CSI-2 link. This can + be obtained from the OF endpoint configuration. + * - 2 + - Data is transferred on both rising and falling edge of the signal. + * - bits_per_sample + - Number of bits per sample. + * - k + - 16 for D-PHY and 7 for C-PHY + +.. note:: + + The pixel rate calculated this way is **not** the same thing as the + pixel rate on the camera sensor's pixel array which is indicated by the + :ref:`V4L2_CID_PIXEL_RATE <v4l2-cid-pixel-rate>` control. + +LP-11 and LP-111 modes +^^^^^^^^^^^^^^^^^^^^^^ + +As part of transitioning to high speed mode, a CSI-2 transmitter typically +briefly sets the bus to LP-11 or LP-111 state, depending on the PHY. This period +may be as short as 100 µs, during which the receiver observes this state and +proceeds its own part of high speed mode transition. + +Most receivers are capable of autonomously handling this once the software has +configured them to do so, but there are receivers which require software +involvement in observing LP-11 or LP-111 state. 100 µs is a brief period to hit +in software, especially when there is no interrupt telling something is +happening. + +One way to address this is to configure the transmitter side explicitly to LP-11 +or LP-111 mode, which requires support from the transmitter hardware. This is +not universally available. Many devices return to this state once streaming is +stopped while the state after power-on is LP-00 or LP-000. + +The ``.pre_streamon()`` callback may be used to prepare a transmitter for +transitioning to streaming state, but not yet start streaming. Similarly, the +``.post_streamoff()`` callback is used to undo what was done by the +``.pre_streamon()`` callback. The caller of ``.pre_streamon()`` is thus required +to call ``.post_streamoff()`` for each successful call of ``.pre_streamon()``. + +In the context of CSI-2, the ``.pre_streamon()`` callback is used to transition +the transmitter to the LP-11 or LP-111 mode. This also requires powering on the +device, so this should be only done when it is needed. + +Receiver drivers that do not need explicit LP-11 or LP-111 mode setup are waived +from calling the two callbacks. + +Stopping the transmitter +^^^^^^^^^^^^^^^^^^^^^^^^ + +A transmitter stops sending the stream of images as a result of +calling the ``.s_stream()`` callback. Some transmitters may stop the +stream at a frame boundary whereas others stop immediately, +effectively leaving the current frame unfinished. The receiver driver +should not make assumptions either way, but function properly in both +cases. diff --git a/Documentation/driver-api/media/v4l2-async.rst b/Documentation/driver-api/media/v4l2-async.rst new file mode 100644 index 0000000000..3422330b3b --- /dev/null +++ b/Documentation/driver-api/media/v4l2-async.rst @@ -0,0 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 async kAPI +^^^^^^^^^^^^^^^ +.. kernel-doc:: include/media/v4l2-async.h diff --git a/Documentation/driver-api/media/v4l2-cci.rst b/Documentation/driver-api/media/v4l2-cci.rst new file mode 100644 index 0000000000..dd297a40ed --- /dev/null +++ b/Documentation/driver-api/media/v4l2-cci.rst @@ -0,0 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 CCI kAPI +^^^^^^^^^^^^^ +.. kernel-doc:: include/media/v4l2-cci.h diff --git a/Documentation/driver-api/media/v4l2-common.rst b/Documentation/driver-api/media/v4l2-common.rst new file mode 100644 index 0000000000..b1e70eb56a --- /dev/null +++ b/Documentation/driver-api/media/v4l2-common.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 common functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-common.h + +.. kernel-doc:: include/media/v4l2-ioctl.h diff --git a/Documentation/driver-api/media/v4l2-controls.rst b/Documentation/driver-api/media/v4l2-controls.rst new file mode 100644 index 0000000000..b2e9180482 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-controls.rst @@ -0,0 +1,823 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 Controls +============= + +Introduction +------------ + +The V4L2 control API seems simple enough, but quickly becomes very hard to +implement correctly in drivers. But much of the code needed to handle controls +is actually not driver specific and can be moved to the V4L core framework. + +After all, the only part that a driver developer is interested in is: + +1) How do I add a control? +2) How do I set the control's value? (i.e. s_ctrl) + +And occasionally: + +3) How do I get the control's value? (i.e. g_volatile_ctrl) +4) How do I validate the user's proposed control value? (i.e. try_ctrl) + +All the rest is something that can be done centrally. + +The control framework was created in order to implement all the rules of the +V4L2 specification with respect to controls in a central place. And to make +life as easy as possible for the driver developer. + +Note that the control framework relies on the presence of a struct +:c:type:`v4l2_device` for V4L2 drivers and struct v4l2_subdev for +sub-device drivers. + + +Objects in the framework +------------------------ + +There are two main objects: + +The :c:type:`v4l2_ctrl` object describes the control properties and keeps +track of the control's value (both the current value and the proposed new +value). + +:c:type:`v4l2_ctrl_handler` is the object that keeps track of controls. It +maintains a list of v4l2_ctrl objects that it owns and another list of +references to controls, possibly to controls owned by other handlers. + + +Basic usage for V4L2 and sub-device drivers +------------------------------------------- + +1) Prepare the driver: + +.. code-block:: c + + #include <media/v4l2-ctrls.h> + +1.1) Add the handler to your driver's top-level struct: + +For V4L2 drivers: + +.. code-block:: c + + struct foo_dev { + ... + struct v4l2_device v4l2_dev; + ... + struct v4l2_ctrl_handler ctrl_handler; + ... + }; + +For sub-device drivers: + +.. code-block:: c + + struct foo_dev { + ... + struct v4l2_subdev sd; + ... + struct v4l2_ctrl_handler ctrl_handler; + ... + }; + +1.2) Initialize the handler: + +.. code-block:: c + + v4l2_ctrl_handler_init(&foo->ctrl_handler, nr_of_controls); + +The second argument is a hint telling the function how many controls this +handler is expected to handle. It will allocate a hashtable based on this +information. It is a hint only. + +1.3) Hook the control handler into the driver: + +For V4L2 drivers: + +.. code-block:: c + + foo->v4l2_dev.ctrl_handler = &foo->ctrl_handler; + +For sub-device drivers: + +.. code-block:: c + + foo->sd.ctrl_handler = &foo->ctrl_handler; + +1.4) Clean up the handler at the end: + +.. code-block:: c + + v4l2_ctrl_handler_free(&foo->ctrl_handler); + + +2) Add controls: + +You add non-menu controls by calling :c:func:`v4l2_ctrl_new_std`: + +.. code-block:: c + + struct v4l2_ctrl *v4l2_ctrl_new_std(struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_ops *ops, + u32 id, s32 min, s32 max, u32 step, s32 def); + +Menu and integer menu controls are added by calling +:c:func:`v4l2_ctrl_new_std_menu`: + +.. code-block:: c + + struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_ops *ops, + u32 id, s32 max, s32 skip_mask, s32 def); + +Menu controls with a driver specific menu are added by calling +:c:func:`v4l2_ctrl_new_std_menu_items`: + +.. code-block:: c + + struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items( + struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_ops *ops, u32 id, s32 max, + s32 skip_mask, s32 def, const char * const *qmenu); + +Standard compound controls can be added by calling +:c:func:`v4l2_ctrl_new_std_compound`: + +.. code-block:: c + + struct v4l2_ctrl *v4l2_ctrl_new_std_compound(struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_ops *ops, u32 id, + const union v4l2_ctrl_ptr p_def); + +Integer menu controls with a driver specific menu can be added by calling +:c:func:`v4l2_ctrl_new_int_menu`: + +.. code-block:: c + + struct v4l2_ctrl *v4l2_ctrl_new_int_menu(struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_ops *ops, + u32 id, s32 max, s32 def, const s64 *qmenu_int); + +These functions are typically called right after the +:c:func:`v4l2_ctrl_handler_init`: + +.. code-block:: c + + static const s64 exp_bias_qmenu[] = { + -2, -1, 0, 1, 2 + }; + static const char * const test_pattern[] = { + "Disabled", + "Vertical Bars", + "Solid Black", + "Solid White", + }; + + v4l2_ctrl_handler_init(&foo->ctrl_handler, nr_of_controls); + v4l2_ctrl_new_std(&foo->ctrl_handler, &foo_ctrl_ops, + V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); + v4l2_ctrl_new_std(&foo->ctrl_handler, &foo_ctrl_ops, + V4L2_CID_CONTRAST, 0, 255, 1, 128); + v4l2_ctrl_new_std_menu(&foo->ctrl_handler, &foo_ctrl_ops, + V4L2_CID_POWER_LINE_FREQUENCY, + V4L2_CID_POWER_LINE_FREQUENCY_60HZ, 0, + V4L2_CID_POWER_LINE_FREQUENCY_DISABLED); + v4l2_ctrl_new_int_menu(&foo->ctrl_handler, &foo_ctrl_ops, + V4L2_CID_EXPOSURE_BIAS, + ARRAY_SIZE(exp_bias_qmenu) - 1, + ARRAY_SIZE(exp_bias_qmenu) / 2 - 1, + exp_bias_qmenu); + v4l2_ctrl_new_std_menu_items(&foo->ctrl_handler, &foo_ctrl_ops, + V4L2_CID_TEST_PATTERN, ARRAY_SIZE(test_pattern) - 1, 0, + 0, test_pattern); + ... + if (foo->ctrl_handler.error) { + int err = foo->ctrl_handler.error; + + v4l2_ctrl_handler_free(&foo->ctrl_handler); + return err; + } + +The :c:func:`v4l2_ctrl_new_std` function returns the v4l2_ctrl pointer to +the new control, but if you do not need to access the pointer outside the +control ops, then there is no need to store it. + +The :c:func:`v4l2_ctrl_new_std` function will fill in most fields based on +the control ID except for the min, max, step and default values. These are +passed in the last four arguments. These values are driver specific while +control attributes like type, name, flags are all global. The control's +current value will be set to the default value. + +The :c:func:`v4l2_ctrl_new_std_menu` function is very similar but it is +used for menu controls. There is no min argument since that is always 0 for +menu controls, and instead of a step there is a skip_mask argument: if bit +X is 1, then menu item X is skipped. + +The :c:func:`v4l2_ctrl_new_int_menu` function creates a new standard +integer menu control with driver-specific items in the menu. It differs +from v4l2_ctrl_new_std_menu in that it doesn't have the mask argument and +takes as the last argument an array of signed 64-bit integers that form an +exact menu item list. + +The :c:func:`v4l2_ctrl_new_std_menu_items` function is very similar to +v4l2_ctrl_new_std_menu but takes an extra parameter qmenu, which is the +driver specific menu for an otherwise standard menu control. A good example +for this control is the test pattern control for capture/display/sensors +devices that have the capability to generate test patterns. These test +patterns are hardware specific, so the contents of the menu will vary from +device to device. + +Note that if something fails, the function will return NULL or an error and +set ctrl_handler->error to the error code. If ctrl_handler->error was already +set, then it will just return and do nothing. This is also true for +v4l2_ctrl_handler_init if it cannot allocate the internal data structure. + +This makes it easy to init the handler and just add all controls and only check +the error code at the end. Saves a lot of repetitive error checking. + +It is recommended to add controls in ascending control ID order: it will be +a bit faster that way. + +3) Optionally force initial control setup: + +.. code-block:: c + + v4l2_ctrl_handler_setup(&foo->ctrl_handler); + +This will call s_ctrl for all controls unconditionally. Effectively this +initializes the hardware to the default control values. It is recommended +that you do this as this ensures that both the internal data structures and +the hardware are in sync. + +4) Finally: implement the :c:type:`v4l2_ctrl_ops` + +.. code-block:: c + + static const struct v4l2_ctrl_ops foo_ctrl_ops = { + .s_ctrl = foo_s_ctrl, + }; + +Usually all you need is s_ctrl: + +.. code-block:: c + + static int foo_s_ctrl(struct v4l2_ctrl *ctrl) + { + struct foo *state = container_of(ctrl->handler, struct foo, ctrl_handler); + + switch (ctrl->id) { + case V4L2_CID_BRIGHTNESS: + write_reg(0x123, ctrl->val); + break; + case V4L2_CID_CONTRAST: + write_reg(0x456, ctrl->val); + break; + } + return 0; + } + +The control ops are called with the v4l2_ctrl pointer as argument. +The new control value has already been validated, so all you need to do is +to actually update the hardware registers. + +You're done! And this is sufficient for most of the drivers we have. No need +to do any validation of control values, or implement QUERYCTRL, QUERY_EXT_CTRL +and QUERYMENU. And G/S_CTRL as well as G/TRY/S_EXT_CTRLS are automatically supported. + + +.. note:: + + The remainder sections deal with more advanced controls topics and scenarios. + In practice the basic usage as described above is sufficient for most drivers. + + +Inheriting Sub-device Controls +------------------------------ + +When a sub-device is registered with a V4L2 driver by calling +v4l2_device_register_subdev() and the ctrl_handler fields of both v4l2_subdev +and v4l2_device are set, then the controls of the subdev will become +automatically available in the V4L2 driver as well. If the subdev driver +contains controls that already exist in the V4L2 driver, then those will be +skipped (so a V4L2 driver can always override a subdev control). + +What happens here is that v4l2_device_register_subdev() calls +v4l2_ctrl_add_handler() adding the controls of the subdev to the controls +of v4l2_device. + + +Accessing Control Values +------------------------ + +The following union is used inside the control framework to access control +values: + +.. code-block:: c + + union v4l2_ctrl_ptr { + s32 *p_s32; + s64 *p_s64; + char *p_char; + void *p; + }; + +The v4l2_ctrl struct contains these fields that can be used to access both +current and new values: + +.. code-block:: c + + s32 val; + struct { + s32 val; + } cur; + + + union v4l2_ctrl_ptr p_new; + union v4l2_ctrl_ptr p_cur; + +If the control has a simple s32 type, then: + +.. code-block:: c + + &ctrl->val == ctrl->p_new.p_s32 + &ctrl->cur.val == ctrl->p_cur.p_s32 + +For all other types use ctrl->p_cur.p<something>. Basically the val +and cur.val fields can be considered an alias since these are used so often. + +Within the control ops you can freely use these. The val and cur.val speak for +themselves. The p_char pointers point to character buffers of length +ctrl->maximum + 1, and are always 0-terminated. + +Unless the control is marked volatile the p_cur field points to the +current cached control value. When you create a new control this value is made +identical to the default value. After calling v4l2_ctrl_handler_setup() this +value is passed to the hardware. It is generally a good idea to call this +function. + +Whenever a new value is set that new value is automatically cached. This means +that most drivers do not need to implement the g_volatile_ctrl() op. The +exception is for controls that return a volatile register such as a signal +strength read-out that changes continuously. In that case you will need to +implement g_volatile_ctrl like this: + +.. code-block:: c + + static int foo_g_volatile_ctrl(struct v4l2_ctrl *ctrl) + { + switch (ctrl->id) { + case V4L2_CID_BRIGHTNESS: + ctrl->val = read_reg(0x123); + break; + } + } + +Note that you use the 'new value' union as well in g_volatile_ctrl. In general +controls that need to implement g_volatile_ctrl are read-only controls. If they +are not, a V4L2_EVENT_CTRL_CH_VALUE will not be generated when the control +changes. + +To mark a control as volatile you have to set V4L2_CTRL_FLAG_VOLATILE: + +.. code-block:: c + + ctrl = v4l2_ctrl_new_std(&sd->ctrl_handler, ...); + if (ctrl) + ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE; + +For try/s_ctrl the new values (i.e. as passed by the user) are filled in and +you can modify them in try_ctrl or set them in s_ctrl. The 'cur' union +contains the current value, which you can use (but not change!) as well. + +If s_ctrl returns 0 (OK), then the control framework will copy the new final +values to the 'cur' union. + +While in g_volatile/s/try_ctrl you can access the value of all controls owned +by the same handler since the handler's lock is held. If you need to access +the value of controls owned by other handlers, then you have to be very careful +not to introduce deadlocks. + +Outside of the control ops you have to go through to helper functions to get +or set a single control value safely in your driver: + +.. code-block:: c + + s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl); + int v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val); + +These functions go through the control framework just as VIDIOC_G/S_CTRL ioctls +do. Don't use these inside the control ops g_volatile/s/try_ctrl, though, that +will result in a deadlock since these helpers lock the handler as well. + +You can also take the handler lock yourself: + +.. code-block:: c + + mutex_lock(&state->ctrl_handler.lock); + pr_info("String value is '%s'\n", ctrl1->p_cur.p_char); + pr_info("Integer value is '%s'\n", ctrl2->cur.val); + mutex_unlock(&state->ctrl_handler.lock); + + +Menu Controls +------------- + +The v4l2_ctrl struct contains this union: + +.. code-block:: c + + union { + u32 step; + u32 menu_skip_mask; + }; + +For menu controls menu_skip_mask is used. What it does is that it allows you +to easily exclude certain menu items. This is used in the VIDIOC_QUERYMENU +implementation where you can return -EINVAL if a certain menu item is not +present. Note that VIDIOC_QUERYCTRL always returns a step value of 1 for +menu controls. + +A good example is the MPEG Audio Layer II Bitrate menu control where the +menu is a list of standardized possible bitrates. But in practice hardware +implementations will only support a subset of those. By setting the skip +mask you can tell the framework which menu items should be skipped. Setting +it to 0 means that all menu items are supported. + +You set this mask either through the v4l2_ctrl_config struct for a custom +control, or by calling v4l2_ctrl_new_std_menu(). + + +Custom Controls +--------------- + +Driver specific controls can be created using v4l2_ctrl_new_custom(): + +.. code-block:: c + + static const struct v4l2_ctrl_config ctrl_filter = { + .ops = &ctrl_custom_ops, + .id = V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER, + .name = "Spatial Filter", + .type = V4L2_CTRL_TYPE_INTEGER, + .flags = V4L2_CTRL_FLAG_SLIDER, + .max = 15, + .step = 1, + }; + + ctrl = v4l2_ctrl_new_custom(&foo->ctrl_handler, &ctrl_filter, NULL); + +The last argument is the priv pointer which can be set to driver-specific +private data. + +The v4l2_ctrl_config struct also has a field to set the is_private flag. + +If the name field is not set, then the framework will assume this is a standard +control and will fill in the name, type and flags fields accordingly. + + +Active and Grabbed Controls +--------------------------- + +If you get more complex relationships between controls, then you may have to +activate and deactivate controls. For example, if the Chroma AGC control is +on, then the Chroma Gain control is inactive. That is, you may set it, but +the value will not be used by the hardware as long as the automatic gain +control is on. Typically user interfaces can disable such input fields. + +You can set the 'active' status using v4l2_ctrl_activate(). By default all +controls are active. Note that the framework does not check for this flag. +It is meant purely for GUIs. The function is typically called from within +s_ctrl. + +The other flag is the 'grabbed' flag. A grabbed control means that you cannot +change it because it is in use by some resource. Typical examples are MPEG +bitrate controls that cannot be changed while capturing is in progress. + +If a control is set to 'grabbed' using v4l2_ctrl_grab(), then the framework +will return -EBUSY if an attempt is made to set this control. The +v4l2_ctrl_grab() function is typically called from the driver when it +starts or stops streaming. + + +Control Clusters +---------------- + +By default all controls are independent from the others. But in more +complex scenarios you can get dependencies from one control to another. +In that case you need to 'cluster' them: + +.. code-block:: c + + struct foo { + struct v4l2_ctrl_handler ctrl_handler; + #define AUDIO_CL_VOLUME (0) + #define AUDIO_CL_MUTE (1) + struct v4l2_ctrl *audio_cluster[2]; + ... + }; + + state->audio_cluster[AUDIO_CL_VOLUME] = + v4l2_ctrl_new_std(&state->ctrl_handler, ...); + state->audio_cluster[AUDIO_CL_MUTE] = + v4l2_ctrl_new_std(&state->ctrl_handler, ...); + v4l2_ctrl_cluster(ARRAY_SIZE(state->audio_cluster), state->audio_cluster); + +From now on whenever one or more of the controls belonging to the same +cluster is set (or 'gotten', or 'tried'), only the control ops of the first +control ('volume' in this example) is called. You effectively create a new +composite control. Similar to how a 'struct' works in C. + +So when s_ctrl is called with V4L2_CID_AUDIO_VOLUME as argument, you should set +all two controls belonging to the audio_cluster: + +.. code-block:: c + + static int foo_s_ctrl(struct v4l2_ctrl *ctrl) + { + struct foo *state = container_of(ctrl->handler, struct foo, ctrl_handler); + + switch (ctrl->id) { + case V4L2_CID_AUDIO_VOLUME: { + struct v4l2_ctrl *mute = ctrl->cluster[AUDIO_CL_MUTE]; + + write_reg(0x123, mute->val ? 0 : ctrl->val); + break; + } + case V4L2_CID_CONTRAST: + write_reg(0x456, ctrl->val); + break; + } + return 0; + } + +In the example above the following are equivalent for the VOLUME case: + +.. code-block:: c + + ctrl == ctrl->cluster[AUDIO_CL_VOLUME] == state->audio_cluster[AUDIO_CL_VOLUME] + ctrl->cluster[AUDIO_CL_MUTE] == state->audio_cluster[AUDIO_CL_MUTE] + +In practice using cluster arrays like this becomes very tiresome. So instead +the following equivalent method is used: + +.. code-block:: c + + struct { + /* audio cluster */ + struct v4l2_ctrl *volume; + struct v4l2_ctrl *mute; + }; + +The anonymous struct is used to clearly 'cluster' these two control pointers, +but it serves no other purpose. The effect is the same as creating an +array with two control pointers. So you can just do: + +.. code-block:: c + + state->volume = v4l2_ctrl_new_std(&state->ctrl_handler, ...); + state->mute = v4l2_ctrl_new_std(&state->ctrl_handler, ...); + v4l2_ctrl_cluster(2, &state->volume); + +And in foo_s_ctrl you can use these pointers directly: state->mute->val. + +Note that controls in a cluster may be NULL. For example, if for some +reason mute was never added (because the hardware doesn't support that +particular feature), then mute will be NULL. So in that case we have a +cluster of 2 controls, of which only 1 is actually instantiated. The +only restriction is that the first control of the cluster must always be +present, since that is the 'master' control of the cluster. The master +control is the one that identifies the cluster and that provides the +pointer to the v4l2_ctrl_ops struct that is used for that cluster. + +Obviously, all controls in the cluster array must be initialized to either +a valid control or to NULL. + +In rare cases you might want to know which controls of a cluster actually +were set explicitly by the user. For this you can check the 'is_new' flag of +each control. For example, in the case of a volume/mute cluster the 'is_new' +flag of the mute control would be set if the user called VIDIOC_S_CTRL for +mute only. If the user would call VIDIOC_S_EXT_CTRLS for both mute and volume +controls, then the 'is_new' flag would be 1 for both controls. + +The 'is_new' flag is always 1 when called from v4l2_ctrl_handler_setup(). + + +Handling autogain/gain-type Controls with Auto Clusters +------------------------------------------------------- + +A common type of control cluster is one that handles 'auto-foo/foo'-type +controls. Typical examples are autogain/gain, autoexposure/exposure, +autowhitebalance/red balance/blue balance. In all cases you have one control +that determines whether another control is handled automatically by the hardware, +or whether it is under manual control from the user. + +If the cluster is in automatic mode, then the manual controls should be +marked inactive and volatile. When the volatile controls are read the +g_volatile_ctrl operation should return the value that the hardware's automatic +mode set up automatically. + +If the cluster is put in manual mode, then the manual controls should become +active again and the volatile flag is cleared (so g_volatile_ctrl is no longer +called while in manual mode). In addition just before switching to manual mode +the current values as determined by the auto mode are copied as the new manual +values. + +Finally the V4L2_CTRL_FLAG_UPDATE should be set for the auto control since +changing that control affects the control flags of the manual controls. + +In order to simplify this a special variation of v4l2_ctrl_cluster was +introduced: + +.. code-block:: c + + void v4l2_ctrl_auto_cluster(unsigned ncontrols, struct v4l2_ctrl **controls, + u8 manual_val, bool set_volatile); + +The first two arguments are identical to v4l2_ctrl_cluster. The third argument +tells the framework which value switches the cluster into manual mode. The +last argument will optionally set V4L2_CTRL_FLAG_VOLATILE for the non-auto controls. +If it is false, then the manual controls are never volatile. You would typically +use that if the hardware does not give you the option to read back to values as +determined by the auto mode (e.g. if autogain is on, the hardware doesn't allow +you to obtain the current gain value). + +The first control of the cluster is assumed to be the 'auto' control. + +Using this function will ensure that you don't need to handle all the complex +flag and volatile handling. + + +VIDIOC_LOG_STATUS Support +------------------------- + +This ioctl allow you to dump the current status of a driver to the kernel log. +The v4l2_ctrl_handler_log_status(ctrl_handler, prefix) can be used to dump the +value of the controls owned by the given handler to the log. You can supply a +prefix as well. If the prefix didn't end with a space, then ': ' will be added +for you. + + +Different Handlers for Different Video Nodes +-------------------------------------------- + +Usually the V4L2 driver has just one control handler that is global for +all video nodes. But you can also specify different control handlers for +different video nodes. You can do that by manually setting the ctrl_handler +field of struct video_device. + +That is no problem if there are no subdevs involved but if there are, then +you need to block the automatic merging of subdev controls to the global +control handler. You do that by simply setting the ctrl_handler field in +struct v4l2_device to NULL. Now v4l2_device_register_subdev() will no longer +merge subdev controls. + +After each subdev was added, you will then have to call v4l2_ctrl_add_handler +manually to add the subdev's control handler (sd->ctrl_handler) to the desired +control handler. This control handler may be specific to the video_device or +for a subset of video_device's. For example: the radio device nodes only have +audio controls, while the video and vbi device nodes share the same control +handler for the audio and video controls. + +If you want to have one handler (e.g. for a radio device node) have a subset +of another handler (e.g. for a video device node), then you should first add +the controls to the first handler, add the other controls to the second +handler and finally add the first handler to the second. For example: + +.. code-block:: c + + v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_VOLUME, ...); + v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_MUTE, ...); + v4l2_ctrl_new_std(&video_ctrl_handler, &video_ops, V4L2_CID_BRIGHTNESS, ...); + v4l2_ctrl_new_std(&video_ctrl_handler, &video_ops, V4L2_CID_CONTRAST, ...); + v4l2_ctrl_add_handler(&video_ctrl_handler, &radio_ctrl_handler, NULL); + +The last argument to v4l2_ctrl_add_handler() is a filter function that allows +you to filter which controls will be added. Set it to NULL if you want to add +all controls. + +Or you can add specific controls to a handler: + +.. code-block:: c + + volume = v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_AUDIO_VOLUME, ...); + v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_BRIGHTNESS, ...); + v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_CONTRAST, ...); + +What you should not do is make two identical controls for two handlers. +For example: + +.. code-block:: c + + v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_MUTE, ...); + v4l2_ctrl_new_std(&video_ctrl_handler, &video_ops, V4L2_CID_AUDIO_MUTE, ...); + +This would be bad since muting the radio would not change the video mute +control. The rule is to have one control for each hardware 'knob' that you +can twiddle. + + +Finding Controls +---------------- + +Normally you have created the controls yourself and you can store the struct +v4l2_ctrl pointer into your own struct. + +But sometimes you need to find a control from another handler that you do +not own. For example, if you have to find a volume control from a subdev. + +You can do that by calling v4l2_ctrl_find: + +.. code-block:: c + + struct v4l2_ctrl *volume; + + volume = v4l2_ctrl_find(sd->ctrl_handler, V4L2_CID_AUDIO_VOLUME); + +Since v4l2_ctrl_find will lock the handler you have to be careful where you +use it. For example, this is not a good idea: + +.. code-block:: c + + struct v4l2_ctrl_handler ctrl_handler; + + v4l2_ctrl_new_std(&ctrl_handler, &video_ops, V4L2_CID_BRIGHTNESS, ...); + v4l2_ctrl_new_std(&ctrl_handler, &video_ops, V4L2_CID_CONTRAST, ...); + +...and in video_ops.s_ctrl: + +.. code-block:: c + + case V4L2_CID_BRIGHTNESS: + contrast = v4l2_find_ctrl(&ctrl_handler, V4L2_CID_CONTRAST); + ... + +When s_ctrl is called by the framework the ctrl_handler.lock is already taken, so +attempting to find another control from the same handler will deadlock. + +It is recommended not to use this function from inside the control ops. + + +Preventing Controls inheritance +------------------------------- + +When one control handler is added to another using v4l2_ctrl_add_handler, then +by default all controls from one are merged to the other. But a subdev might +have low-level controls that make sense for some advanced embedded system, but +not when it is used in consumer-level hardware. In that case you want to keep +those low-level controls local to the subdev. You can do this by simply +setting the 'is_private' flag of the control to 1: + +.. code-block:: c + + static const struct v4l2_ctrl_config ctrl_private = { + .ops = &ctrl_custom_ops, + .id = V4L2_CID_..., + .name = "Some Private Control", + .type = V4L2_CTRL_TYPE_INTEGER, + .max = 15, + .step = 1, + .is_private = 1, + }; + + ctrl = v4l2_ctrl_new_custom(&foo->ctrl_handler, &ctrl_private, NULL); + +These controls will now be skipped when v4l2_ctrl_add_handler is called. + + +V4L2_CTRL_TYPE_CTRL_CLASS Controls +---------------------------------- + +Controls of this type can be used by GUIs to get the name of the control class. +A fully featured GUI can make a dialog with multiple tabs with each tab +containing the controls belonging to a particular control class. The name of +each tab can be found by querying a special control with ID <control class | 1>. + +Drivers do not have to care about this. The framework will automatically add +a control of this type whenever the first control belonging to a new control +class is added. + + +Adding Notify Callbacks +----------------------- + +Sometimes the platform or bridge driver needs to be notified when a control +from a sub-device driver changes. You can set a notify callback by calling +this function: + +.. code-block:: c + + void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl, + void (*notify)(struct v4l2_ctrl *ctrl, void *priv), void *priv); + +Whenever the give control changes value the notify callback will be called +with a pointer to the control and the priv pointer that was passed with +v4l2_ctrl_notify. Note that the control's handler lock is held when the +notify function is called. + +There can be only one notify function per control handler. Any attempt +to set another notify function will cause a WARN_ON. + +v4l2_ctrl functions and data structures +--------------------------------------- + +.. kernel-doc:: include/media/v4l2-ctrls.h diff --git a/Documentation/driver-api/media/v4l2-core.rst b/Documentation/driver-api/media/v4l2-core.rst new file mode 100644 index 0000000000..239045ecc8 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-core.rst @@ -0,0 +1,29 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Video4Linux devices +------------------- + +.. toctree:: + :maxdepth: 1 + + v4l2-intro + v4l2-dev + v4l2-device + v4l2-fh + v4l2-subdev + v4l2-event + v4l2-controls + v4l2-videobuf + v4l2-videobuf2 + v4l2-dv-timings + v4l2-flash-led-class + v4l2-mc + v4l2-mediabus + v4l2-mem2mem + v4l2-async + v4l2-fwnode + v4l2-cci + v4l2-rect + v4l2-tuner + v4l2-common + v4l2-tveeprom diff --git a/Documentation/driver-api/media/v4l2-dev.rst b/Documentation/driver-api/media/v4l2-dev.rst new file mode 100644 index 0000000000..99e3b5fa74 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-dev.rst @@ -0,0 +1,375 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Video device' s internal representation +======================================= + +The actual device nodes in the ``/dev`` directory are created using the +:c:type:`video_device` struct (``v4l2-dev.h``). This struct can either be +allocated dynamically or embedded in a larger struct. + +To allocate it dynamically use :c:func:`video_device_alloc`: + +.. code-block:: c + + struct video_device *vdev = video_device_alloc(); + + if (vdev == NULL) + return -ENOMEM; + + vdev->release = video_device_release; + +If you embed it in a larger struct, then you must set the ``release()`` +callback to your own function: + +.. code-block:: c + + struct video_device *vdev = &my_vdev->vdev; + + vdev->release = my_vdev_release; + +The ``release()`` callback must be set and it is called when the last user +of the video device exits. + +The default :c:func:`video_device_release` callback currently +just calls ``kfree`` to free the allocated memory. + +There is also a :c:func:`video_device_release_empty` function that does +nothing (is empty) and should be used if the struct is embedded and there +is nothing to do when it is released. + +You should also set these fields of :c:type:`video_device`: + +- :c:type:`video_device`->v4l2_dev: must be set to the :c:type:`v4l2_device` + parent device. + +- :c:type:`video_device`->name: set to something descriptive and unique. + +- :c:type:`video_device`->vfl_dir: set this to ``VFL_DIR_RX`` for capture + devices (``VFL_DIR_RX`` has value 0, so this is normally already the + default), set to ``VFL_DIR_TX`` for output devices and ``VFL_DIR_M2M`` for mem2mem (codec) devices. + +- :c:type:`video_device`->fops: set to the :c:type:`v4l2_file_operations` + struct. + +- :c:type:`video_device`->ioctl_ops: if you use the :c:type:`v4l2_ioctl_ops` + to simplify ioctl maintenance (highly recommended to use this and it might + become compulsory in the future!), then set this to your + :c:type:`v4l2_ioctl_ops` struct. The :c:type:`video_device`->vfl_type and + :c:type:`video_device`->vfl_dir fields are used to disable ops that do not + match the type/dir combination. E.g. VBI ops are disabled for non-VBI nodes, + and output ops are disabled for a capture device. This makes it possible to + provide just one :c:type:`v4l2_ioctl_ops` struct for both vbi and + video nodes. + +- :c:type:`video_device`->lock: leave to ``NULL`` if you want to do all the + locking in the driver. Otherwise you give it a pointer to a struct + ``mutex_lock`` and before the :c:type:`video_device`->unlocked_ioctl + file operation is called this lock will be taken by the core and released + afterwards. See the next section for more details. + +- :c:type:`video_device`->queue: a pointer to the struct vb2_queue + associated with this device node. + If queue is not ``NULL``, and queue->lock is not ``NULL``, then queue->lock + is used for the queuing ioctls (``VIDIOC_REQBUFS``, ``CREATE_BUFS``, + ``QBUF``, ``DQBUF``, ``QUERYBUF``, ``PREPARE_BUF``, ``STREAMON`` and + ``STREAMOFF``) instead of the lock above. + That way the :ref:`vb2 <vb2_framework>` queuing framework does not have + to wait for other ioctls. This queue pointer is also used by the + :ref:`vb2 <vb2_framework>` helper functions to check for + queuing ownership (i.e. is the filehandle calling it allowed to do the + operation). + +- :c:type:`video_device`->prio: keeps track of the priorities. Used to + implement ``VIDIOC_G_PRIORITY`` and ``VIDIOC_S_PRIORITY``. + If left to ``NULL``, then it will use the struct v4l2_prio_state + in :c:type:`v4l2_device`. If you want to have a separate priority state per + (group of) device node(s), then you can point it to your own struct + :c:type:`v4l2_prio_state`. + +- :c:type:`video_device`->dev_parent: you only set this if v4l2_device was + registered with ``NULL`` as the parent ``device`` struct. This only happens + in cases where one hardware device has multiple PCI devices that all share + the same :c:type:`v4l2_device` core. + + The cx88 driver is an example of this: one core :c:type:`v4l2_device` struct, + but it is used by both a raw video PCI device (cx8800) and a MPEG PCI device + (cx8802). Since the :c:type:`v4l2_device` cannot be associated with two PCI + devices at the same time it is setup without a parent device. But when the + struct video_device is initialized you **do** know which parent + PCI device to use and so you set ``dev_device`` to the correct PCI device. + +If you use :c:type:`v4l2_ioctl_ops`, then you should set +:c:type:`video_device`->unlocked_ioctl to :c:func:`video_ioctl2` in your +:c:type:`v4l2_file_operations` struct. + +In some cases you want to tell the core that a function you had specified in +your :c:type:`v4l2_ioctl_ops` should be ignored. You can mark such ioctls by +calling this function before :c:func:`video_register_device` is called: + + :c:func:`v4l2_disable_ioctl <v4l2_disable_ioctl>` + (:c:type:`vdev <video_device>`, cmd). + +This tends to be needed if based on external factors (e.g. which card is +being used) you want to turns off certain features in :c:type:`v4l2_ioctl_ops` +without having to make a new struct. + +The :c:type:`v4l2_file_operations` struct is a subset of file_operations. +The main difference is that the inode argument is omitted since it is never +used. + +If integration with the media framework is needed, you must initialize the +:c:type:`media_entity` struct embedded in the :c:type:`video_device` struct +(entity field) by calling :c:func:`media_entity_pads_init`: + +.. code-block:: c + + struct media_pad *pad = &my_vdev->pad; + int err; + + err = media_entity_pads_init(&vdev->entity, 1, pad); + +The pads array must have been previously initialized. There is no need to +manually set the struct media_entity type and name fields. + +A reference to the entity will be automatically acquired/released when the +video device is opened/closed. + +ioctls and locking +------------------ + +The V4L core provides optional locking services. The main service is the +lock field in struct video_device, which is a pointer to a mutex. +If you set this pointer, then that will be used by unlocked_ioctl to +serialize all ioctls. + +If you are using the :ref:`videobuf2 framework <vb2_framework>`, then there +is a second lock that you can set: :c:type:`video_device`->queue->lock. If +set, then this lock will be used instead of :c:type:`video_device`->lock +to serialize all queuing ioctls (see the previous section +for the full list of those ioctls). + +The advantage of using a different lock for the queuing ioctls is that for some +drivers (particularly USB drivers) certain commands such as setting controls +can take a long time, so you want to use a separate lock for the buffer queuing +ioctls. That way your ``VIDIOC_DQBUF`` doesn't stall because the driver is busy +changing the e.g. exposure of the webcam. + +Of course, you can always do all the locking yourself by leaving both lock +pointers at ``NULL``. + +If you use the old :ref:`videobuf framework <vb_framework>` then you must +pass the :c:type:`video_device`->lock to the videobuf queue initialize +function: if videobuf has to wait for a frame to arrive, then it will +temporarily unlock the lock and relock it afterwards. If your driver also +waits in the code, then you should do the same to allow other +processes to access the device node while the first process is waiting for +something. + +In the case of :ref:`videobuf2 <vb2_framework>` you will need to implement the +``wait_prepare()`` and ``wait_finish()`` callbacks to unlock/lock if applicable. +If you use the ``queue->lock`` pointer, then you can use the helper functions +:c:func:`vb2_ops_wait_prepare` and :c:func:`vb2_ops_wait_finish`. + +The implementation of a hotplug disconnect should also take the lock from +:c:type:`video_device` before calling v4l2_device_disconnect. If you are also +using :c:type:`video_device`->queue->lock, then you have to first lock +:c:type:`video_device`->queue->lock followed by :c:type:`video_device`->lock. +That way you can be sure no ioctl is running when you call +:c:func:`v4l2_device_disconnect`. + +Video device registration +------------------------- + +Next you register the video device with :c:func:`video_register_device`. +This will create the character device for you. + +.. code-block:: c + + err = video_register_device(vdev, VFL_TYPE_VIDEO, -1); + if (err) { + video_device_release(vdev); /* or kfree(my_vdev); */ + return err; + } + +If the :c:type:`v4l2_device` parent device has a not ``NULL`` mdev field, +the video device entity will be automatically registered with the media +device. + +Which device is registered depends on the type argument. The following +types exist: + +========================== ==================== ============================== +:c:type:`vfl_devnode_type` Device name Usage +========================== ==================== ============================== +``VFL_TYPE_VIDEO`` ``/dev/videoX`` for video input/output devices +``VFL_TYPE_VBI`` ``/dev/vbiX`` for vertical blank data (i.e. + closed captions, teletext) +``VFL_TYPE_RADIO`` ``/dev/radioX`` for radio tuners +``VFL_TYPE_SUBDEV`` ``/dev/v4l-subdevX`` for V4L2 subdevices +``VFL_TYPE_SDR`` ``/dev/swradioX`` for Software Defined Radio + (SDR) tuners +``VFL_TYPE_TOUCH`` ``/dev/v4l-touchX`` for touch sensors +========================== ==================== ============================== + +The last argument gives you a certain amount of control over the device +node number used (i.e. the X in ``videoX``). Normally you will pass -1 +to let the v4l2 framework pick the first free number. But sometimes users +want to select a specific node number. It is common that drivers allow +the user to select a specific device node number through a driver module +option. That number is then passed to this function and video_register_device +will attempt to select that device node number. If that number was already +in use, then the next free device node number will be selected and it +will send a warning to the kernel log. + +Another use-case is if a driver creates many devices. In that case it can +be useful to place different video devices in separate ranges. For example, +video capture devices start at 0, video output devices start at 16. +So you can use the last argument to specify a minimum device node number +and the v4l2 framework will try to pick the first free number that is equal +or higher to what you passed. If that fails, then it will just pick the +first free number. + +Since in this case you do not care about a warning about not being able +to select the specified device node number, you can call the function +:c:func:`video_register_device_no_warn` instead. + +Whenever a device node is created some attributes are also created for you. +If you look in ``/sys/class/video4linux`` you see the devices. Go into e.g. +``video0`` and you will see 'name', 'dev_debug' and 'index' attributes. The +'name' attribute is the 'name' field of the video_device struct. The +'dev_debug' attribute can be used to enable core debugging. See the next +section for more detailed information on this. + +The 'index' attribute is the index of the device node: for each call to +:c:func:`video_register_device()` the index is just increased by 1. The +first video device node you register always starts with index 0. + +Users can setup udev rules that utilize the index attribute to make fancy +device names (e.g. '``mpegX``' for MPEG video capture device nodes). + +After the device was successfully registered, then you can use these fields: + +- :c:type:`video_device`->vfl_type: the device type passed to + :c:func:`video_register_device`. +- :c:type:`video_device`->minor: the assigned device minor number. +- :c:type:`video_device`->num: the device node number (i.e. the X in + ``videoX``). +- :c:type:`video_device`->index: the device index number. + +If the registration failed, then you need to call +:c:func:`video_device_release` to free the allocated :c:type:`video_device` +struct, or free your own struct if the :c:type:`video_device` was embedded in +it. The ``vdev->release()`` callback will never be called if the registration +failed, nor should you ever attempt to unregister the device if the +registration failed. + +video device debugging +---------------------- + +The 'dev_debug' attribute that is created for each video, vbi, radio or swradio +device in ``/sys/class/video4linux/<devX>/`` allows you to enable logging of +file operations. + +It is a bitmask and the following bits can be set: + +.. tabularcolumns:: |p{5ex}|L| + +===== ================================================================ +Mask Description +===== ================================================================ +0x01 Log the ioctl name and error code. VIDIOC_(D)QBUF ioctls are + only logged if bit 0x08 is also set. +0x02 Log the ioctl name arguments and error code. VIDIOC_(D)QBUF + ioctls are + only logged if bit 0x08 is also set. +0x04 Log the file operations open, release, read, write, mmap and + get_unmapped_area. The read and write operations are only + logged if bit 0x08 is also set. +0x08 Log the read and write file operations and the VIDIOC_QBUF and + VIDIOC_DQBUF ioctls. +0x10 Log the poll file operation. +0x20 Log error and messages in the control operations. +===== ================================================================ + +Video device cleanup +-------------------- + +When the video device nodes have to be removed, either during the unload +of the driver or because the USB device was disconnected, then you should +unregister them with: + + :c:func:`video_unregister_device` + (:c:type:`vdev <video_device>`); + +This will remove the device nodes from sysfs (causing udev to remove them +from ``/dev``). + +After :c:func:`video_unregister_device` returns no new opens can be done. +However, in the case of USB devices some application might still have one of +these device nodes open. So after the unregister all file operations (except +release, of course) will return an error as well. + +When the last user of the video device node exits, then the ``vdev->release()`` +callback is called and you can do the final cleanup there. + +Don't forget to cleanup the media entity associated with the video device if +it has been initialized: + + :c:func:`media_entity_cleanup <media_entity_cleanup>` + (&vdev->entity); + +This can be done from the release callback. + + +helper functions +---------------- + +There are a few useful helper functions: + +- file and :c:type:`video_device` private data + +You can set/get driver private data in the video_device struct using: + + :c:func:`video_get_drvdata <video_get_drvdata>` + (:c:type:`vdev <video_device>`); + + :c:func:`video_set_drvdata <video_set_drvdata>` + (:c:type:`vdev <video_device>`); + +Note that you can safely call :c:func:`video_set_drvdata` before calling +:c:func:`video_register_device`. + +And this function: + + :c:func:`video_devdata <video_devdata>` + (struct file \*file); + +returns the video_device belonging to the file struct. + +The :c:func:`video_devdata` function combines :c:func:`video_get_drvdata` +with :c:func:`video_devdata`: + + :c:func:`video_drvdata <video_drvdata>` + (struct file \*file); + +You can go from a :c:type:`video_device` struct to the v4l2_device struct using: + +.. code-block:: c + + struct v4l2_device *v4l2_dev = vdev->v4l2_dev; + +- Device node name + +The :c:type:`video_device` node kernel name can be retrieved using: + + :c:func:`video_device_node_name <video_device_node_name>` + (:c:type:`vdev <video_device>`); + +The name is used as a hint by userspace tools such as udev. The function +should be used where possible instead of accessing the video_device::num and +video_device::minor fields. + +video_device functions and data structures +------------------------------------------ + +.. kernel-doc:: include/media/v4l2-dev.h diff --git a/Documentation/driver-api/media/v4l2-device.rst b/Documentation/driver-api/media/v4l2-device.rst new file mode 100644 index 0000000000..7bd9c45f55 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-device.rst @@ -0,0 +1,146 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 device instance +-------------------- + +Each device instance is represented by a struct v4l2_device. +Very simple devices can just allocate this struct, but most of the time you +would embed this struct inside a larger struct. + +You must register the device instance by calling: + + :c:func:`v4l2_device_register <v4l2_device_register>` + (dev, :c:type:`v4l2_dev <v4l2_device>`). + +Registration will initialize the :c:type:`v4l2_device` struct. If the +dev->driver_data field is ``NULL``, it will be linked to +:c:type:`v4l2_dev <v4l2_device>` argument. + +Drivers that want integration with the media device framework need to set +dev->driver_data manually to point to the driver-specific device structure +that embed the struct v4l2_device instance. This is achieved by a +``dev_set_drvdata()`` call before registering the V4L2 device instance. +They must also set the struct v4l2_device mdev field to point to a +properly initialized and registered :c:type:`media_device` instance. + +If :c:type:`v4l2_dev <v4l2_device>`\ ->name is empty then it will be set to a +value derived from dev (driver name followed by the bus_id, to be precise). +If you set it up before calling :c:func:`v4l2_device_register` then it will +be untouched. If dev is ``NULL``, then you **must** setup +:c:type:`v4l2_dev <v4l2_device>`\ ->name before calling +:c:func:`v4l2_device_register`. + +You can use :c:func:`v4l2_device_set_name` to set the name based on a driver +name and a driver-global atomic_t instance. This will generate names like +``ivtv0``, ``ivtv1``, etc. If the name ends with a digit, then it will insert +a dash: ``cx18-0``, ``cx18-1``, etc. This function returns the instance number. + +The first ``dev`` argument is normally the ``struct device`` pointer of a +``pci_dev``, ``usb_interface`` or ``platform_device``. It is rare for dev to +be ``NULL``, but it happens with ISA devices or when one device creates +multiple PCI devices, thus making it impossible to associate +:c:type:`v4l2_dev <v4l2_device>` with a particular parent. + +You can also supply a ``notify()`` callback that can be called by sub-devices +to notify you of events. Whether you need to set this depends on the +sub-device. Any notifications a sub-device supports must be defined in a header +in ``include/media/subdevice.h``. + +V4L2 devices are unregistered by calling: + + :c:func:`v4l2_device_unregister` + (:c:type:`v4l2_dev <v4l2_device>`). + +If the dev->driver_data field points to :c:type:`v4l2_dev <v4l2_device>`, +it will be reset to ``NULL``. Unregistering will also automatically unregister +all subdevs from the device. + +If you have a hotpluggable device (e.g. a USB device), then when a disconnect +happens the parent device becomes invalid. Since :c:type:`v4l2_device` has a +pointer to that parent device it has to be cleared as well to mark that the +parent is gone. To do this call: + + :c:func:`v4l2_device_disconnect` + (:c:type:`v4l2_dev <v4l2_device>`). + +This does *not* unregister the subdevs, so you still need to call the +:c:func:`v4l2_device_unregister` function for that. If your driver is not +hotpluggable, then there is no need to call :c:func:`v4l2_device_disconnect`. + +Sometimes you need to iterate over all devices registered by a specific +driver. This is usually the case if multiple device drivers use the same +hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv +hardware. The same is true for alsa drivers for example. + +You can iterate over all registered devices as follows: + +.. code-block:: c + + static int callback(struct device *dev, void *p) + { + struct v4l2_device *v4l2_dev = dev_get_drvdata(dev); + + /* test if this device was inited */ + if (v4l2_dev == NULL) + return 0; + ... + return 0; + } + + int iterate(void *p) + { + struct device_driver *drv; + int err; + + /* Find driver 'ivtv' on the PCI bus. + pci_bus_type is a global. For USB buses use usb_bus_type. */ + drv = driver_find("ivtv", &pci_bus_type); + /* iterate over all ivtv device instances */ + err = driver_for_each_device(drv, NULL, p, callback); + put_driver(drv); + return err; + } + +Sometimes you need to keep a running counter of the device instance. This is +commonly used to map a device instance to an index of a module option array. + +The recommended approach is as follows: + +.. code-block:: c + + static atomic_t drv_instance = ATOMIC_INIT(0); + + static int drv_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) + { + ... + state->instance = atomic_inc_return(&drv_instance) - 1; + } + +If you have multiple device nodes then it can be difficult to know when it is +safe to unregister :c:type:`v4l2_device` for hotpluggable devices. For this +purpose :c:type:`v4l2_device` has refcounting support. The refcount is +increased whenever :c:func:`video_register_device` is called and it is +decreased whenever that device node is released. When the refcount reaches +zero, then the :c:type:`v4l2_device` release() callback is called. You can +do your final cleanup there. + +If other device nodes (e.g. ALSA) are created, then you can increase and +decrease the refcount manually as well by calling: + + :c:func:`v4l2_device_get` + (:c:type:`v4l2_dev <v4l2_device>`). + +or: + + :c:func:`v4l2_device_put` + (:c:type:`v4l2_dev <v4l2_device>`). + +Since the initial refcount is 1 you also need to call +:c:func:`v4l2_device_put` in the ``disconnect()`` callback (for USB devices) +or in the ``remove()`` callback (for e.g. PCI devices), otherwise the refcount +will never reach 0. + +v4l2_device functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-device.h diff --git a/Documentation/driver-api/media/v4l2-dv-timings.rst b/Documentation/driver-api/media/v4l2-dv-timings.rst new file mode 100644 index 0000000000..b178f93151 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-dv-timings.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 DV Timings functions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-dv-timings.h diff --git a/Documentation/driver-api/media/v4l2-event.rst b/Documentation/driver-api/media/v4l2-event.rst new file mode 100644 index 0000000000..52d4fbc5d8 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-event.rst @@ -0,0 +1,181 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 events +----------- + +The V4L2 events provide a generic way to pass events to user space. +The driver must use :c:type:`v4l2_fh` to be able to support V4L2 events. + +Events are subscribed per-filehandle. An event specification consists of a +``type`` and is optionally associated with an object identified through the +``id`` field. If unused, then the ``id`` is 0. So an event is uniquely +identified by the ``(type, id)`` tuple. + +The :c:type:`v4l2_fh` struct has a list of subscribed events on its +``subscribed`` field. + +When the user subscribes to an event, a :c:type:`v4l2_subscribed_event` +struct is added to :c:type:`v4l2_fh`\ ``.subscribed``, one for every +subscribed event. + +Each :c:type:`v4l2_subscribed_event` struct ends with a +:c:type:`v4l2_kevent` ringbuffer, with the size given by the caller +of :c:func:`v4l2_event_subscribe`. This ringbuffer is used to store any events +raised by the driver. + +So every ``(type, ID)`` event tuple will have its own +:c:type:`v4l2_kevent` ringbuffer. This guarantees that if a driver is +generating lots of events of one type in a short time, then that will +not overwrite events of another type. + +But if you get more events of one type than the size of the +:c:type:`v4l2_kevent` ringbuffer, then the oldest event will be dropped +and the new one added. + +The :c:type:`v4l2_kevent` struct links into the ``available`` +list of the :c:type:`v4l2_fh` struct so :ref:`VIDIOC_DQEVENT` will +know which event to dequeue first. + +Finally, if the event subscription is associated with a particular object +such as a V4L2 control, then that object needs to know about that as well +so that an event can be raised by that object. So the ``node`` field can +be used to link the :c:type:`v4l2_subscribed_event` struct into a list of +such objects. + +So to summarize: + +- struct v4l2_fh has two lists: one of the ``subscribed`` events, + and one of the ``available`` events. + +- struct v4l2_subscribed_event has a ringbuffer of raised + (pending) events of that particular type. + +- If struct v4l2_subscribed_event is associated with a specific + object, then that object will have an internal list of + struct v4l2_subscribed_event so it knows who subscribed an + event to that object. + +Furthermore, the internal struct v4l2_subscribed_event has +``merge()`` and ``replace()`` callbacks which drivers can set. These +callbacks are called when a new event is raised and there is no more room. + +The ``replace()`` callback allows you to replace the payload of the old event +with that of the new event, merging any relevant data from the old payload +into the new payload that replaces it. It is called when this event type has +a ringbuffer with size is one, i.e. only one event can be stored in the +ringbuffer. + +The ``merge()`` callback allows you to merge the oldest event payload into +that of the second-oldest event payload. It is called when +the ringbuffer has size is greater than one. + +This way no status information is lost, just the intermediate steps leading +up to that state. + +A good example of these ``replace``/``merge`` callbacks is in v4l2-event.c: +``ctrls_replace()`` and ``ctrls_merge()`` callbacks for the control event. + +.. note:: + these callbacks can be called from interrupt context, so they must + be fast. + +In order to queue events to video device, drivers should call: + + :c:func:`v4l2_event_queue <v4l2_event_queue>` + (:c:type:`vdev <video_device>`, :c:type:`ev <v4l2_event>`) + +The driver's only responsibility is to fill in the type and the data fields. +The other fields will be filled in by V4L2. + +Event subscription +~~~~~~~~~~~~~~~~~~ + +Subscribing to an event is via: + + :c:func:`v4l2_event_subscribe <v4l2_event_subscribe>` + (:c:type:`fh <v4l2_fh>`, :c:type:`sub <v4l2_event_subscription>` , + elems, :c:type:`ops <v4l2_subscribed_event_ops>`) + + +This function is used to implement :c:type:`video_device`-> +:c:type:`ioctl_ops <v4l2_ioctl_ops>`-> ``vidioc_subscribe_event``, +but the driver must check first if the driver is able to produce events +with specified event id, and then should call +:c:func:`v4l2_event_subscribe` to subscribe the event. + +The elems argument is the size of the event queue for this event. If it is 0, +then the framework will fill in a default value (this depends on the event +type). + +The ops argument allows the driver to specify a number of callbacks: + +.. tabularcolumns:: |p{1.5cm}|p{16.0cm}| + +======== ============================================================== +Callback Description +======== ============================================================== +add called when a new listener gets added (subscribing to the same + event twice will only cause this callback to get called once) +del called when a listener stops listening +replace replace event 'old' with event 'new'. +merge merge event 'old' into event 'new'. +======== ============================================================== + +All 4 callbacks are optional, if you don't want to specify any callbacks +the ops argument itself maybe ``NULL``. + +Unsubscribing an event +~~~~~~~~~~~~~~~~~~~~~~ + +Unsubscribing to an event is via: + + :c:func:`v4l2_event_unsubscribe <v4l2_event_unsubscribe>` + (:c:type:`fh <v4l2_fh>`, :c:type:`sub <v4l2_event_subscription>`) + +This function is used to implement :c:type:`video_device`-> +:c:type:`ioctl_ops <v4l2_ioctl_ops>`-> ``vidioc_unsubscribe_event``. +A driver may call :c:func:`v4l2_event_unsubscribe` directly unless it +wants to be involved in unsubscription process. + +The special type ``V4L2_EVENT_ALL`` may be used to unsubscribe all events. The +drivers may want to handle this in a special way. + +Check if there's a pending event +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Checking if there's a pending event is via: + + :c:func:`v4l2_event_pending <v4l2_event_pending>` + (:c:type:`fh <v4l2_fh>`) + + +This function returns the number of pending events. Useful when implementing +poll. + +How events work +~~~~~~~~~~~~~~~ + +Events are delivered to user space through the poll system call. The driver +can use :c:type:`v4l2_fh`->wait (a wait_queue_head_t) as the argument for +``poll_wait()``. + +There are standard and private events. New standard events must use the +smallest available event type. The drivers must allocate their events from +their own class starting from class base. Class base is +``V4L2_EVENT_PRIVATE_START`` + n * 1000 where n is the lowest available number. +The first event type in the class is reserved for future use, so the first +available event type is 'class base + 1'. + +An example on how the V4L2 events may be used can be found in the OMAP +3 ISP driver (``drivers/media/platform/ti/omap3isp``). + +A subdev can directly send an event to the :c:type:`v4l2_device` notify +function with ``V4L2_DEVICE_NOTIFY_EVENT``. This allows the bridge to map +the subdev that sends the event to the video node(s) associated with the +subdev that need to be informed about such an event. + +V4L2 event functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-event.h + diff --git a/Documentation/driver-api/media/v4l2-fh.rst b/Documentation/driver-api/media/v4l2-fh.rst new file mode 100644 index 0000000000..3eeaa8da0c --- /dev/null +++ b/Documentation/driver-api/media/v4l2-fh.rst @@ -0,0 +1,141 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 File handlers +------------------ + +struct v4l2_fh provides a way to easily keep file handle specific +data that is used by the V4L2 framework. + +.. attention:: + New drivers must use struct v4l2_fh + since it is also used to implement priority handling + (:ref:`VIDIOC_G_PRIORITY`). + +The users of :c:type:`v4l2_fh` (in the V4L2 framework, not the driver) know +whether a driver uses :c:type:`v4l2_fh` as its ``file->private_data`` pointer +by testing the ``V4L2_FL_USES_V4L2_FH`` bit in :c:type:`video_device`->flags. +This bit is set whenever :c:func:`v4l2_fh_init` is called. + +struct v4l2_fh is allocated as a part of the driver's own file handle +structure and ``file->private_data`` is set to it in the driver's ``open()`` +function by the driver. + +In many cases the struct v4l2_fh will be embedded in a larger +structure. In that case you should call: + +#) :c:func:`v4l2_fh_init` and :c:func:`v4l2_fh_add` in ``open()`` +#) :c:func:`v4l2_fh_del` and :c:func:`v4l2_fh_exit` in ``release()`` + +Drivers can extract their own file handle structure by using the container_of +macro. + +Example: + +.. code-block:: c + + struct my_fh { + int blah; + struct v4l2_fh fh; + }; + + ... + + int my_open(struct file *file) + { + struct my_fh *my_fh; + struct video_device *vfd; + int ret; + + ... + + my_fh = kzalloc(sizeof(*my_fh), GFP_KERNEL); + + ... + + v4l2_fh_init(&my_fh->fh, vfd); + + ... + + file->private_data = &my_fh->fh; + v4l2_fh_add(&my_fh->fh); + return 0; + } + + int my_release(struct file *file) + { + struct v4l2_fh *fh = file->private_data; + struct my_fh *my_fh = container_of(fh, struct my_fh, fh); + + ... + v4l2_fh_del(&my_fh->fh); + v4l2_fh_exit(&my_fh->fh); + kfree(my_fh); + return 0; + } + +Below is a short description of the :c:type:`v4l2_fh` functions used: + +:c:func:`v4l2_fh_init <v4l2_fh_init>` +(:c:type:`fh <v4l2_fh>`, :c:type:`vdev <video_device>`) + + +- Initialise the file handle. This **MUST** be performed in the driver's + :c:type:`v4l2_file_operations`->open() handler. + + +:c:func:`v4l2_fh_add <v4l2_fh_add>` +(:c:type:`fh <v4l2_fh>`) + +- Add a :c:type:`v4l2_fh` to :c:type:`video_device` file handle list. + Must be called once the file handle is completely initialized. + +:c:func:`v4l2_fh_del <v4l2_fh_del>` +(:c:type:`fh <v4l2_fh>`) + +- Unassociate the file handle from :c:type:`video_device`. The file handle + exit function may now be called. + +:c:func:`v4l2_fh_exit <v4l2_fh_exit>` +(:c:type:`fh <v4l2_fh>`) + +- Uninitialise the file handle. After uninitialisation the :c:type:`v4l2_fh` + memory can be freed. + + +If struct v4l2_fh is not embedded, then you can use these helper functions: + +:c:func:`v4l2_fh_open <v4l2_fh_open>` +(struct file \*filp) + +- This allocates a struct v4l2_fh, initializes it and adds it to + the struct video_device associated with the file struct. + +:c:func:`v4l2_fh_release <v4l2_fh_release>` +(struct file \*filp) + +- This deletes it from the struct video_device associated with the + file struct, uninitialised the :c:type:`v4l2_fh` and frees it. + +These two functions can be plugged into the v4l2_file_operation's ``open()`` +and ``release()`` ops. + +Several drivers need to do something when the first file handle is opened and +when the last file handle closes. Two helper functions were added to check +whether the :c:type:`v4l2_fh` struct is the only open filehandle of the +associated device node: + +:c:func:`v4l2_fh_is_singular <v4l2_fh_is_singular>` +(:c:type:`fh <v4l2_fh>`) + +- Returns 1 if the file handle is the only open file handle, else 0. + +:c:func:`v4l2_fh_is_singular_file <v4l2_fh_is_singular_file>` +(struct file \*filp) + +- Same, but it calls v4l2_fh_is_singular with filp->private_data. + + +V4L2 fh functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-fh.h diff --git a/Documentation/driver-api/media/v4l2-flash-led-class.rst b/Documentation/driver-api/media/v4l2-flash-led-class.rst new file mode 100644 index 0000000000..2aa6bed9b8 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-flash-led-class.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 flash functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-flash-led-class.h diff --git a/Documentation/driver-api/media/v4l2-fwnode.rst b/Documentation/driver-api/media/v4l2-fwnode.rst new file mode 100644 index 0000000000..e313b6cddc --- /dev/null +++ b/Documentation/driver-api/media/v4l2-fwnode.rst @@ -0,0 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 fwnode kAPI +^^^^^^^^^^^^^^^^ +.. kernel-doc:: include/media/v4l2-fwnode.h diff --git a/Documentation/driver-api/media/v4l2-intro.rst b/Documentation/driver-api/media/v4l2-intro.rst new file mode 100644 index 0000000000..4d54fa9d7a --- /dev/null +++ b/Documentation/driver-api/media/v4l2-intro.rst @@ -0,0 +1,76 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Introduction +------------ + +The V4L2 drivers tend to be very complex due to the complexity of the +hardware: most devices have multiple ICs, export multiple device nodes in +/dev, and create also non-V4L2 devices such as DVB, ALSA, FB, I2C and input +(IR) devices. + +Especially the fact that V4L2 drivers have to setup supporting ICs to +do audio/video muxing/encoding/decoding makes it more complex than most. +Usually these ICs are connected to the main bridge driver through one or +more I2C buses, but other buses can also be used. Such devices are +called 'sub-devices'. + +For a long time the framework was limited to the video_device struct for +creating V4L device nodes and video_buf for handling the video buffers +(note that this document does not discuss the video_buf framework). + +This meant that all drivers had to do the setup of device instances and +connecting to sub-devices themselves. Some of this is quite complicated +to do right and many drivers never did do it correctly. + +There is also a lot of common code that could never be refactored due to +the lack of a framework. + +So this framework sets up the basic building blocks that all drivers +need and this same framework should make it much easier to refactor +common code into utility functions shared by all drivers. + +A good example to look at as a reference is the v4l2-pci-skeleton.c +source that is available in samples/v4l/. It is a skeleton driver for +a PCI capture card, and demonstrates how to use the V4L2 driver +framework. It can be used as a template for real PCI video capture driver. + +Structure of a V4L driver +------------------------- + +All drivers have the following structure: + +1) A struct for each device instance containing the device state. + +2) A way of initializing and commanding sub-devices (if any). + +3) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX and /dev/radioX) + and keeping track of device-node specific data. + +4) Filehandle-specific structs containing per-filehandle data; + +5) video buffer handling. + +This is a rough schematic of how it all relates: + +.. code-block:: none + + device instances + | + +-sub-device instances + | + \-V4L2 device nodes + | + \-filehandle instances + + +Structure of the V4L2 framework +------------------------------- + +The framework closely resembles the driver structure: it has a v4l2_device +struct for the device instance data, a v4l2_subdev struct to refer to +sub-device instances, the video_device struct stores V4L2 device node data +and the v4l2_fh struct keeps track of filehandle instances. + +The V4L2 framework also optionally integrates with the media framework. If a +driver sets the struct v4l2_device mdev field, sub-devices and video nodes +will automatically appear in the media framework as entities. diff --git a/Documentation/driver-api/media/v4l2-mc.rst b/Documentation/driver-api/media/v4l2-mc.rst new file mode 100644 index 0000000000..0c352ac588 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-mc.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 Media Controller functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-mc.h diff --git a/Documentation/driver-api/media/v4l2-mediabus.rst b/Documentation/driver-api/media/v4l2-mediabus.rst new file mode 100644 index 0000000000..1f2254cba9 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-mediabus.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 Media Bus functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-mediabus.h diff --git a/Documentation/driver-api/media/v4l2-mem2mem.rst b/Documentation/driver-api/media/v4l2-mem2mem.rst new file mode 100644 index 0000000000..a43b31cc82 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-mem2mem.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 Memory to Memory functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-mem2mem.h diff --git a/Documentation/driver-api/media/v4l2-rect.rst b/Documentation/driver-api/media/v4l2-rect.rst new file mode 100644 index 0000000000..fc315cd841 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-rect.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 rect helper functions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/v4l2-rect.h diff --git a/Documentation/driver-api/media/v4l2-subdev.rst b/Documentation/driver-api/media/v4l2-subdev.rst new file mode 100644 index 0000000000..e56b50b3f2 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-subdev.rst @@ -0,0 +1,637 @@ +.. SPDX-License-Identifier: GPL-2.0 + +V4L2 sub-devices +---------------- + +Many drivers need to communicate with sub-devices. These devices can do all +sort of tasks, but most commonly they handle audio and/or video muxing, +encoding or decoding. For webcams common sub-devices are sensors and camera +controllers. + +Usually these are I2C devices, but not necessarily. In order to provide the +driver with a consistent interface to these sub-devices the +:c:type:`v4l2_subdev` struct (v4l2-subdev.h) was created. + +Each sub-device driver must have a :c:type:`v4l2_subdev` struct. This struct +can be stand-alone for simple sub-devices or it might be embedded in a larger +struct if more state information needs to be stored. Usually there is a +low-level device struct (e.g. ``i2c_client``) that contains the device data as +setup by the kernel. It is recommended to store that pointer in the private +data of :c:type:`v4l2_subdev` using :c:func:`v4l2_set_subdevdata`. That makes +it easy to go from a :c:type:`v4l2_subdev` to the actual low-level bus-specific +device data. + +You also need a way to go from the low-level struct to :c:type:`v4l2_subdev`. +For the common i2c_client struct the i2c_set_clientdata() call is used to store +a :c:type:`v4l2_subdev` pointer, for other buses you may have to use other +methods. + +Bridges might also need to store per-subdev private data, such as a pointer to +bridge-specific per-subdev private data. The :c:type:`v4l2_subdev` structure +provides host private data for that purpose that can be accessed with +:c:func:`v4l2_get_subdev_hostdata` and :c:func:`v4l2_set_subdev_hostdata`. + +From the bridge driver perspective, you load the sub-device module and somehow +obtain the :c:type:`v4l2_subdev` pointer. For i2c devices this is easy: you call +``i2c_get_clientdata()``. For other buses something similar needs to be done. +Helper functions exist for sub-devices on an I2C bus that do most of this +tricky work for you. + +Each :c:type:`v4l2_subdev` contains function pointers that sub-device drivers +can implement (or leave ``NULL`` if it is not applicable). Since sub-devices can +do so many different things and you do not want to end up with a huge ops struct +of which only a handful of ops are commonly implemented, the function pointers +are sorted according to category and each category has its own ops struct. + +The top-level ops struct contains pointers to the category ops structs, which +may be NULL if the subdev driver does not support anything from that category. + +It looks like this: + +.. code-block:: c + + struct v4l2_subdev_core_ops { + int (*log_status)(struct v4l2_subdev *sd); + int (*init)(struct v4l2_subdev *sd, u32 val); + ... + }; + + struct v4l2_subdev_tuner_ops { + ... + }; + + struct v4l2_subdev_audio_ops { + ... + }; + + struct v4l2_subdev_video_ops { + ... + }; + + struct v4l2_subdev_pad_ops { + ... + }; + + struct v4l2_subdev_ops { + const struct v4l2_subdev_core_ops *core; + const struct v4l2_subdev_tuner_ops *tuner; + const struct v4l2_subdev_audio_ops *audio; + const struct v4l2_subdev_video_ops *video; + const struct v4l2_subdev_pad_ops *video; + }; + +The core ops are common to all subdevs, the other categories are implemented +depending on the sub-device. E.g. a video device is unlikely to support the +audio ops and vice versa. + +This setup limits the number of function pointers while still making it easy +to add new ops and categories. + +A sub-device driver initializes the :c:type:`v4l2_subdev` struct using: + + :c:func:`v4l2_subdev_init <v4l2_subdev_init>` + (:c:type:`sd <v4l2_subdev>`, &\ :c:type:`ops <v4l2_subdev_ops>`). + + +Afterwards you need to initialize :c:type:`sd <v4l2_subdev>`->name with a +unique name and set the module owner. This is done for you if you use the +i2c helper functions. + +If integration with the media framework is needed, you must initialize the +:c:type:`media_entity` struct embedded in the :c:type:`v4l2_subdev` struct +(entity field) by calling :c:func:`media_entity_pads_init`, if the entity has +pads: + +.. code-block:: c + + struct media_pad *pads = &my_sd->pads; + int err; + + err = media_entity_pads_init(&sd->entity, npads, pads); + +The pads array must have been previously initialized. There is no need to +manually set the struct media_entity function and name fields, but the +revision field must be initialized if needed. + +A reference to the entity will be automatically acquired/released when the +subdev device node (if any) is opened/closed. + +Don't forget to cleanup the media entity before the sub-device is destroyed: + +.. code-block:: c + + media_entity_cleanup(&sd->entity); + +If a sub-device driver implements sink pads, the subdev driver may set the +link_validate field in :c:type:`v4l2_subdev_pad_ops` to provide its own link +validation function. For every link in the pipeline, the link_validate pad +operation of the sink end of the link is called. In both cases the driver is +still responsible for validating the correctness of the format configuration +between sub-devices and video nodes. + +If link_validate op is not set, the default function +:c:func:`v4l2_subdev_link_validate_default` is used instead. This function +ensures that width, height and the media bus pixel code are equal on both source +and sink of the link. Subdev drivers are also free to use this function to +perform the checks mentioned above in addition to their own checks. + +Subdev registration +~~~~~~~~~~~~~~~~~~~ + +There are currently two ways to register subdevices with the V4L2 core. The +first (traditional) possibility is to have subdevices registered by bridge +drivers. This can be done when the bridge driver has the complete information +about subdevices connected to it and knows exactly when to register them. This +is typically the case for internal subdevices, like video data processing units +within SoCs or complex PCI(e) boards, camera sensors in USB cameras or connected +to SoCs, which pass information about them to bridge drivers, usually in their +platform data. + +There are however also situations where subdevices have to be registered +asynchronously to bridge devices. An example of such a configuration is a Device +Tree based system where information about subdevices is made available to the +system independently from the bridge devices, e.g. when subdevices are defined +in DT as I2C device nodes. The API used in this second case is described further +below. + +Using one or the other registration method only affects the probing process, the +run-time bridge-subdevice interaction is in both cases the same. + +Registering synchronous sub-devices +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the **synchronous** case a device (bridge) driver needs to register the +:c:type:`v4l2_subdev` with the v4l2_device: + + :c:func:`v4l2_device_register_subdev <v4l2_device_register_subdev>` + (:c:type:`v4l2_dev <v4l2_device>`, :c:type:`sd <v4l2_subdev>`). + +This can fail if the subdev module disappeared before it could be registered. +After this function was called successfully the subdev->dev field points to +the :c:type:`v4l2_device`. + +If the v4l2_device parent device has a non-NULL mdev field, the sub-device +entity will be automatically registered with the media device. + +You can unregister a sub-device using: + + :c:func:`v4l2_device_unregister_subdev <v4l2_device_unregister_subdev>` + (:c:type:`sd <v4l2_subdev>`). + +Afterwards the subdev module can be unloaded and +:c:type:`sd <v4l2_subdev>`->dev == ``NULL``. + +Registering asynchronous sub-devices +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the **asynchronous** case subdevice probing can be invoked independently of +the bridge driver availability. The subdevice driver then has to verify whether +all the requirements for a successful probing are satisfied. This can include a +check for a master clock availability. If any of the conditions aren't satisfied +the driver might decide to return ``-EPROBE_DEFER`` to request further reprobing +attempts. Once all conditions are met the subdevice shall be registered using +the :c:func:`v4l2_async_register_subdev` function. Unregistration is +performed using the :c:func:`v4l2_async_unregister_subdev` call. Subdevices +registered this way are stored in a global list of subdevices, ready to be +picked up by bridge drivers. + +Asynchronous sub-device notifiers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bridge drivers in turn have to register a notifier object. This is performed +using the :c:func:`v4l2_async_nf_register` call. To unregister the notifier the +driver has to call :c:func:`v4l2_async_nf_unregister`. Before releasing memory +of an unregister notifier, it must be cleaned up by calling +:c:func:`v4l2_async_nf_cleanup`. + +Before registering the notifier, bridge drivers must do two things: first, the +notifier must be initialized using the :c:func:`v4l2_async_nf_init`. Second, +bridge drivers can then begin to form a list of async connection descriptors +that the bridge device needs for its +operation. :c:func:`v4l2_async_nf_add_fwnode`, +:c:func:`v4l2_async_nf_add_fwnode_remote` and :c:func:`v4l2_async_nf_add_i2c` + +Async connection descriptors describe connections to external sub-devices the +drivers for which are not yet probed. Based on an async connection, a media data +or ancillary link may be created when the related sub-device becomes +available. There may be one or more async connections to a given sub-device but +this is not known at the time of adding the connections to the notifier. Async +connections are bound as matching async sub-devices are found, one by one. + +Asynchronous sub-device notifier for sub-devices +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A driver that registers an asynchronous sub-device may also register an +asynchronous notifier. This is called an asynchronous sub-device notifier andthe +process is similar to that of a bridge driver apart from that the notifier is +initialised using :c:func:`v4l2_async_subdev_nf_init` instead. A sub-device +notifier may complete only after the V4L2 device becomes available, i.e. there's +a path via async sub-devices and notifiers to a notifier that is not an +asynchronous sub-device notifier. + +Asynchronous sub-device registration helper for camera sensor drivers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:c:func:`v4l2_async_register_subdev_sensor` is a helper function for sensor +drivers registering their own async connection, but it also registers a notifier +and further registers async connections for lens and flash devices found in +firmware. The notifier for the sub-device is unregistered and cleaned up with +the async sub-device, using :c:func:`v4l2_async_unregister_subdev`. + +Asynchronous sub-device notifier example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These functions allocate an async connection descriptor which is of type struct +:c:type:`v4l2_async_connection` embedded in a driver-specific struct. The &struct +:c:type:`v4l2_async_connection` shall be the first member of this struct: + +.. code-block:: c + + struct my_async_connection { + struct v4l2_async_connection asc; + ... + }; + + struct my_async_connection *my_asc; + struct fwnode_handle *ep; + + ... + + my_asc = v4l2_async_nf_add_fwnode_remote(¬ifier, ep, + struct my_async_connection); + fwnode_handle_put(ep); + + if (IS_ERR(my_asc)) + return PTR_ERR(my_asc); + +Asynchronous sub-device notifier callbacks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The V4L2 core will then use these connection descriptors to match asynchronously +registered subdevices to them. If a match is detected the ``.bound()`` notifier +callback is called. After all connections have been bound the .complete() +callback is called. When a connection is removed from the system the +``.unbind()`` method is called. All three callbacks are optional. + +Drivers can store any type of custom data in their driver-specific +:c:type:`v4l2_async_connection` wrapper. If any of that data requires special +handling when the structure is freed, drivers must implement the ``.destroy()`` +notifier callback. The framework will call it right before freeing the +:c:type:`v4l2_async_connection`. + +Calling subdev operations +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The advantage of using :c:type:`v4l2_subdev` is that it is a generic struct and +does not contain any knowledge about the underlying hardware. So a driver might +contain several subdevs that use an I2C bus, but also a subdev that is +controlled through GPIO pins. This distinction is only relevant when setting +up the device, but once the subdev is registered it is completely transparent. + +Once the subdev has been registered you can call an ops function either +directly: + +.. code-block:: c + + err = sd->ops->core->g_std(sd, &norm); + +but it is better and easier to use this macro: + +.. code-block:: c + + err = v4l2_subdev_call(sd, core, g_std, &norm); + +The macro will do the right ``NULL`` pointer checks and returns ``-ENODEV`` +if :c:type:`sd <v4l2_subdev>` is ``NULL``, ``-ENOIOCTLCMD`` if either +:c:type:`sd <v4l2_subdev>`->core or :c:type:`sd <v4l2_subdev>`->core->g_std is ``NULL``, or the actual result of the +:c:type:`sd <v4l2_subdev>`->ops->core->g_std ops. + +It is also possible to call all or a subset of the sub-devices: + +.. code-block:: c + + v4l2_device_call_all(v4l2_dev, 0, core, g_std, &norm); + +Any subdev that does not support this ops is skipped and error results are +ignored. If you want to check for errors use this: + +.. code-block:: c + + err = v4l2_device_call_until_err(v4l2_dev, 0, core, g_std, &norm); + +Any error except ``-ENOIOCTLCMD`` will exit the loop with that error. If no +errors (except ``-ENOIOCTLCMD``) occurred, then 0 is returned. + +The second argument to both calls is a group ID. If 0, then all subdevs are +called. If non-zero, then only those whose group ID match that value will +be called. Before a bridge driver registers a subdev it can set +:c:type:`sd <v4l2_subdev>`->grp_id to whatever value it wants (it's 0 by +default). This value is owned by the bridge driver and the sub-device driver +will never modify or use it. + +The group ID gives the bridge driver more control how callbacks are called. +For example, there may be multiple audio chips on a board, each capable of +changing the volume. But usually only one will actually be used when the +user want to change the volume. You can set the group ID for that subdev to +e.g. AUDIO_CONTROLLER and specify that as the group ID value when calling +``v4l2_device_call_all()``. That ensures that it will only go to the subdev +that needs it. + +If the sub-device needs to notify its v4l2_device parent of an event, then +it can call ``v4l2_subdev_notify(sd, notification, arg)``. This macro checks +whether there is a ``notify()`` callback defined and returns ``-ENODEV`` if not. +Otherwise the result of the ``notify()`` call is returned. + +V4L2 sub-device userspace API +----------------------------- + +Bridge drivers traditionally expose one or multiple video nodes to userspace, +and control subdevices through the :c:type:`v4l2_subdev_ops` operations in +response to video node operations. This hides the complexity of the underlying +hardware from applications. For complex devices, finer-grained control of the +device than what the video nodes offer may be required. In those cases, bridge +drivers that implement :ref:`the media controller API <media_controller>` may +opt for making the subdevice operations directly accessible from userspace. + +Device nodes named ``v4l-subdev``\ *X* can be created in ``/dev`` to access +sub-devices directly. If a sub-device supports direct userspace configuration +it must set the ``V4L2_SUBDEV_FL_HAS_DEVNODE`` flag before being registered. + +After registering sub-devices, the :c:type:`v4l2_device` driver can create +device nodes for all registered sub-devices marked with +``V4L2_SUBDEV_FL_HAS_DEVNODE`` by calling +:c:func:`v4l2_device_register_subdev_nodes`. Those device nodes will be +automatically removed when sub-devices are unregistered. + +The device node handles a subset of the V4L2 API. + +``VIDIOC_QUERYCTRL``, +``VIDIOC_QUERYMENU``, +``VIDIOC_G_CTRL``, +``VIDIOC_S_CTRL``, +``VIDIOC_G_EXT_CTRLS``, +``VIDIOC_S_EXT_CTRLS`` and +``VIDIOC_TRY_EXT_CTRLS``: + + The controls ioctls are identical to the ones defined in V4L2. They + behave identically, with the only exception that they deal only with + controls implemented in the sub-device. Depending on the driver, those + controls can be also be accessed through one (or several) V4L2 device + nodes. + +``VIDIOC_DQEVENT``, +``VIDIOC_SUBSCRIBE_EVENT`` and +``VIDIOC_UNSUBSCRIBE_EVENT`` + + The events ioctls are identical to the ones defined in V4L2. They + behave identically, with the only exception that they deal only with + events generated by the sub-device. Depending on the driver, those + events can also be reported by one (or several) V4L2 device nodes. + + Sub-device drivers that want to use events need to set the + ``V4L2_SUBDEV_FL_HAS_EVENTS`` :c:type:`v4l2_subdev`.flags before registering + the sub-device. After registration events can be queued as usual on the + :c:type:`v4l2_subdev`.devnode device node. + + To properly support events, the ``poll()`` file operation is also + implemented. + +Private ioctls + + All ioctls not in the above list are passed directly to the sub-device + driver through the core::ioctl operation. + +Read-only sub-device userspace API +---------------------------------- + +Bridge drivers that control their connected subdevices through direct calls to +the kernel API realized by :c:type:`v4l2_subdev_ops` structure do not usually +want userspace to be able to change the same parameters through the subdevice +device node and thus do not usually register any. + +It is sometimes useful to report to userspace the current subdevice +configuration through a read-only API, that does not permit applications to +change to the device parameters but allows interfacing to the subdevice device +node to inspect them. + +For instance, to implement cameras based on computational photography, userspace +needs to know the detailed camera sensor configuration (in terms of skipping, +binning, cropping and scaling) for each supported output resolution. To support +such use cases, bridge drivers may expose the subdevice operations to userspace +through a read-only API. + +To create a read-only device node for all the subdevices registered with the +``V4L2_SUBDEV_FL_HAS_DEVNODE`` set, the :c:type:`v4l2_device` driver should call +:c:func:`v4l2_device_register_ro_subdev_nodes`. + +Access to the following ioctls for userspace applications is restricted on +sub-device device nodes registered with +:c:func:`v4l2_device_register_ro_subdev_nodes`. + +``VIDIOC_SUBDEV_S_FMT``, +``VIDIOC_SUBDEV_S_CROP``, +``VIDIOC_SUBDEV_S_SELECTION``: + + These ioctls are only allowed on a read-only subdevice device node + for the :ref:`V4L2_SUBDEV_FORMAT_TRY <v4l2-subdev-format-whence>` + formats and selection rectangles. + +``VIDIOC_SUBDEV_S_FRAME_INTERVAL``, +``VIDIOC_SUBDEV_S_DV_TIMINGS``, +``VIDIOC_SUBDEV_S_STD``: + + These ioctls are not allowed on a read-only subdevice node. + +In case the ioctl is not allowed, or the format to modify is set to +``V4L2_SUBDEV_FORMAT_ACTIVE``, the core returns a negative error code and +the errno variable is set to ``-EPERM``. + +I2C sub-device drivers +---------------------- + +Since these drivers are so common, special helper functions are available to +ease the use of these drivers (``v4l2-common.h``). + +The recommended method of adding :c:type:`v4l2_subdev` support to an I2C driver +is to embed the :c:type:`v4l2_subdev` struct into the state struct that is +created for each I2C device instance. Very simple devices have no state +struct and in that case you can just create a :c:type:`v4l2_subdev` directly. + +A typical state struct would look like this (where 'chipname' is replaced by +the name of the chip): + +.. code-block:: c + + struct chipname_state { + struct v4l2_subdev sd; + ... /* additional state fields */ + }; + +Initialize the :c:type:`v4l2_subdev` struct as follows: + +.. code-block:: c + + v4l2_i2c_subdev_init(&state->sd, client, subdev_ops); + +This function will fill in all the fields of :c:type:`v4l2_subdev` ensure that +the :c:type:`v4l2_subdev` and i2c_client both point to one another. + +You should also add a helper inline function to go from a :c:type:`v4l2_subdev` +pointer to a chipname_state struct: + +.. code-block:: c + + static inline struct chipname_state *to_state(struct v4l2_subdev *sd) + { + return container_of(sd, struct chipname_state, sd); + } + +Use this to go from the :c:type:`v4l2_subdev` struct to the ``i2c_client`` +struct: + +.. code-block:: c + + struct i2c_client *client = v4l2_get_subdevdata(sd); + +And this to go from an ``i2c_client`` to a :c:type:`v4l2_subdev` struct: + +.. code-block:: c + + struct v4l2_subdev *sd = i2c_get_clientdata(client); + +Make sure to call +:c:func:`v4l2_device_unregister_subdev`\ (:c:type:`sd <v4l2_subdev>`) +when the ``remove()`` callback is called. This will unregister the sub-device +from the bridge driver. It is safe to call this even if the sub-device was +never registered. + +You need to do this because when the bridge driver destroys the i2c adapter +the ``remove()`` callbacks are called of the i2c devices on that adapter. +After that the corresponding v4l2_subdev structures are invalid, so they +have to be unregistered first. Calling +:c:func:`v4l2_device_unregister_subdev`\ (:c:type:`sd <v4l2_subdev>`) +from the ``remove()`` callback ensures that this is always done correctly. + + +The bridge driver also has some helper functions it can use: + +.. code-block:: c + + struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter, + "module_foo", "chipid", 0x36, NULL); + +This loads the given module (can be ``NULL`` if no module needs to be loaded) +and calls :c:func:`i2c_new_client_device` with the given ``i2c_adapter`` and +chip/address arguments. If all goes well, then it registers the subdev with +the v4l2_device. + +You can also use the last argument of :c:func:`v4l2_i2c_new_subdev` to pass +an array of possible I2C addresses that it should probe. These probe addresses +are only used if the previous argument is 0. A non-zero argument means that you +know the exact i2c address so in that case no probing will take place. + +Both functions return ``NULL`` if something went wrong. + +Note that the chipid you pass to :c:func:`v4l2_i2c_new_subdev` is usually +the same as the module name. It allows you to specify a chip variant, e.g. +"saa7114" or "saa7115". In general though the i2c driver autodetects this. +The use of chipid is something that needs to be looked at more closely at a +later date. It differs between i2c drivers and as such can be confusing. +To see which chip variants are supported you can look in the i2c driver code +for the i2c_device_id table. This lists all the possibilities. + +There are one more helper function: + +:c:func:`v4l2_i2c_new_subdev_board` uses an :c:type:`i2c_board_info` struct +which is passed to the i2c driver and replaces the irq, platform_data and addr +arguments. + +If the subdev supports the s_config core ops, then that op is called with +the irq and platform_data arguments after the subdev was setup. + +The :c:func:`v4l2_i2c_new_subdev` function will call +:c:func:`v4l2_i2c_new_subdev_board`, internally filling a +:c:type:`i2c_board_info` structure using the ``client_type`` and the +``addr`` to fill it. + +Centrally managed subdev active state +------------------------------------- + +Traditionally V4L2 subdev drivers maintained internal state for the active +device configuration. This is often implemented as e.g. an array of struct +v4l2_mbus_framefmt, one entry for each pad, and similarly for crop and compose +rectangles. + +In addition to the active configuration, each subdev file handle has an array of +struct v4l2_subdev_pad_config, managed by the V4L2 core, which contains the try +configuration. + +To simplify the subdev drivers the V4L2 subdev API now optionally supports a +centrally managed active configuration represented by +:c:type:`v4l2_subdev_state`. One instance of state, which contains the active +device configuration, is stored in the sub-device itself as part of +the :c:type:`v4l2_subdev` structure, while the core associates a try state to +each open file handle, to store the try configuration related to that file +handle. + +Sub-device drivers can opt-in and use state to manage their active configuration +by initializing the subdevice state with a call to v4l2_subdev_init_finalize() +before registering the sub-device. They must also call v4l2_subdev_cleanup() +to release all the allocated resources before unregistering the sub-device. +The core automatically allocates and initializes a state for each open file +handle to store the try configurations and frees it when closing the file +handle. + +V4L2 sub-device operations that use both the :ref:`ACTIVE and TRY formats +<v4l2-subdev-format-whence>` receive the correct state to operate on through +the 'state' parameter. The state must be locked and unlocked by the +caller by calling :c:func:`v4l2_subdev_lock_state()` and +:c:func:`v4l2_subdev_unlock_state()`. The caller can do so by calling the subdev +operation through the :c:func:`v4l2_subdev_call_state_active()` macro. + +Operations that do not receive a state parameter implicitly operate on the +subdevice active state, which drivers can exclusively access by +calling :c:func:`v4l2_subdev_lock_and_get_active_state()`. The sub-device active +state must equally be released by calling :c:func:`v4l2_subdev_unlock_state()`. + +Drivers must never manually access the state stored in the :c:type:`v4l2_subdev` +or in the file handle without going through the designated helpers. + +While the V4L2 core passes the correct try or active state to the subdevice +operations, many existing device drivers pass a NULL state when calling +operations with :c:func:`v4l2_subdev_call()`. This legacy construct causes +issues with subdevice drivers that let the V4L2 core manage the active state, +as they expect to receive the appropriate state as a parameter. To help the +conversion of subdevice drivers to a managed active state without having to +convert all callers at the same time, an additional wrapper layer has been +added to v4l2_subdev_call(), which handles the NULL case by getting and locking +the callee's active state with :c:func:`v4l2_subdev_lock_and_get_active_state()`, +and unlocking the state after the call. + +The whole subdev state is in reality split into three parts: the +v4l2_subdev_state, subdev controls and subdev driver's internal state. In the +future these parts should be combined into a single state. For the time being +we need a way to handle the locking for these parts. This can be accomplished +by sharing a lock. The v4l2_ctrl_handler already supports this via its 'lock' +pointer and the same model is used with states. The driver can do the following +before calling v4l2_subdev_init_finalize(): + +.. code-block:: c + + sd->ctrl_handler->lock = &priv->mutex; + sd->state_lock = &priv->mutex; + +This shares the driver's private mutex between the controls and the states. + +Streams, multiplexed media pads and internal routing +---------------------------------------------------- + +A subdevice driver can implement support for multiplexed streams by setting +the V4L2_SUBDEV_FL_STREAMS subdev flag and implementing support for +centrally managed subdev active state, routing and stream based +configuration. + +V4L2 sub-device functions and data structures +--------------------------------------------- + +.. kernel-doc:: include/media/v4l2-subdev.h diff --git a/Documentation/driver-api/media/v4l2-tuner.rst b/Documentation/driver-api/media/v4l2-tuner.rst new file mode 100644 index 0000000000..e6caa33215 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-tuner.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Tuner functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/tuner.h + +.. kernel-doc:: include/media/tuner-types.h diff --git a/Documentation/driver-api/media/v4l2-tveeprom.rst b/Documentation/driver-api/media/v4l2-tveeprom.rst new file mode 100644 index 0000000000..43fb391eda --- /dev/null +++ b/Documentation/driver-api/media/v4l2-tveeprom.rst @@ -0,0 +1,6 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Hauppauge TV EEPROM functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/tveeprom.h diff --git a/Documentation/driver-api/media/v4l2-videobuf.rst b/Documentation/driver-api/media/v4l2-videobuf.rst new file mode 100644 index 0000000000..4b1d84eefe --- /dev/null +++ b/Documentation/driver-api/media/v4l2-videobuf.rst @@ -0,0 +1,403 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _vb_framework: + +Videobuf Framework +================== + +Author: Jonathan Corbet <corbet@lwn.net> + +Current as of 2.6.33 + +.. note:: + + The videobuf framework was deprecated in favor of videobuf2. Shouldn't + be used on new drivers. + +Introduction +------------ + +The videobuf layer functions as a sort of glue layer between a V4L2 driver +and user space. It handles the allocation and management of buffers for +the storage of video frames. There is a set of functions which can be used +to implement many of the standard POSIX I/O system calls, including read(), +poll(), and, happily, mmap(). Another set of functions can be used to +implement the bulk of the V4L2 ioctl() calls related to streaming I/O, +including buffer allocation, queueing and dequeueing, and streaming +control. Using videobuf imposes a few design decisions on the driver +author, but the payback comes in the form of reduced code in the driver and +a consistent implementation of the V4L2 user-space API. + +Buffer types +------------ + +Not all video devices use the same kind of buffers. In fact, there are (at +least) three common variations: + + - Buffers which are scattered in both the physical and (kernel) virtual + address spaces. (Almost) all user-space buffers are like this, but it + makes great sense to allocate kernel-space buffers this way as well when + it is possible. Unfortunately, it is not always possible; working with + this kind of buffer normally requires hardware which can do + scatter/gather DMA operations. + + - Buffers which are physically scattered, but which are virtually + contiguous; buffers allocated with vmalloc(), in other words. These + buffers are just as hard to use for DMA operations, but they can be + useful in situations where DMA is not available but virtually-contiguous + buffers are convenient. + + - Buffers which are physically contiguous. Allocation of this kind of + buffer can be unreliable on fragmented systems, but simpler DMA + controllers cannot deal with anything else. + +Videobuf can work with all three types of buffers, but the driver author +must pick one at the outset and design the driver around that decision. + +[It's worth noting that there's a fourth kind of buffer: "overlay" buffers +which are located within the system's video memory. The overlay +functionality is considered to be deprecated for most use, but it still +shows up occasionally in system-on-chip drivers where the performance +benefits merit the use of this technique. Overlay buffers can be handled +as a form of scattered buffer, but there are very few implementations in +the kernel and a description of this technique is currently beyond the +scope of this document.] + +Data structures, callbacks, and initialization +---------------------------------------------- + +Depending on which type of buffers are being used, the driver should +include one of the following files: + +.. code-block:: none + + <media/videobuf-dma-sg.h> /* Physically scattered */ + <media/videobuf-vmalloc.h> /* vmalloc() buffers */ + <media/videobuf-dma-contig.h> /* Physically contiguous */ + +The driver's data structure describing a V4L2 device should include a +struct videobuf_queue instance for the management of the buffer queue, +along with a list_head for the queue of available buffers. There will also +need to be an interrupt-safe spinlock which is used to protect (at least) +the queue. + +The next step is to write four simple callbacks to help videobuf deal with +the management of buffers: + +.. code-block:: none + + struct videobuf_queue_ops { + int (*buf_setup)(struct videobuf_queue *q, + unsigned int *count, unsigned int *size); + int (*buf_prepare)(struct videobuf_queue *q, + struct videobuf_buffer *vb, + enum v4l2_field field); + void (*buf_queue)(struct videobuf_queue *q, + struct videobuf_buffer *vb); + void (*buf_release)(struct videobuf_queue *q, + struct videobuf_buffer *vb); + }; + +buf_setup() is called early in the I/O process, when streaming is being +initiated; its purpose is to tell videobuf about the I/O stream. The count +parameter will be a suggested number of buffers to use; the driver should +check it for rationality and adjust it if need be. As a practical rule, a +minimum of two buffers are needed for proper streaming, and there is +usually a maximum (which cannot exceed 32) which makes sense for each +device. The size parameter should be set to the expected (maximum) size +for each frame of data. + +Each buffer (in the form of a struct videobuf_buffer pointer) will be +passed to buf_prepare(), which should set the buffer's size, width, height, +and field fields properly. If the buffer's state field is +VIDEOBUF_NEEDS_INIT, the driver should pass it to: + +.. code-block:: none + + int videobuf_iolock(struct videobuf_queue* q, struct videobuf_buffer *vb, + struct v4l2_framebuffer *fbuf); + +Among other things, this call will usually allocate memory for the buffer. +Finally, the buf_prepare() function should set the buffer's state to +VIDEOBUF_PREPARED. + +When a buffer is queued for I/O, it is passed to buf_queue(), which should +put it onto the driver's list of available buffers and set its state to +VIDEOBUF_QUEUED. Note that this function is called with the queue spinlock +held; if it tries to acquire it as well things will come to a screeching +halt. Yes, this is the voice of experience. Note also that videobuf may +wait on the first buffer in the queue; placing other buffers in front of it +could again gum up the works. So use list_add_tail() to enqueue buffers. + +Finally, buf_release() is called when a buffer is no longer intended to be +used. The driver should ensure that there is no I/O active on the buffer, +then pass it to the appropriate free routine(s): + +.. code-block:: none + + /* Scatter/gather drivers */ + int videobuf_dma_unmap(struct videobuf_queue *q, + struct videobuf_dmabuf *dma); + int videobuf_dma_free(struct videobuf_dmabuf *dma); + + /* vmalloc drivers */ + void videobuf_vmalloc_free (struct videobuf_buffer *buf); + + /* Contiguous drivers */ + void videobuf_dma_contig_free(struct videobuf_queue *q, + struct videobuf_buffer *buf); + +One way to ensure that a buffer is no longer under I/O is to pass it to: + +.. code-block:: none + + int videobuf_waiton(struct videobuf_buffer *vb, int non_blocking, int intr); + +Here, vb is the buffer, non_blocking indicates whether non-blocking I/O +should be used (it should be zero in the buf_release() case), and intr +controls whether an interruptible wait is used. + +File operations +--------------- + +At this point, much of the work is done; much of the rest is slipping +videobuf calls into the implementation of the other driver callbacks. The +first step is in the open() function, which must initialize the +videobuf queue. The function to use depends on the type of buffer used: + +.. code-block:: none + + void videobuf_queue_sg_init(struct videobuf_queue *q, + struct videobuf_queue_ops *ops, + struct device *dev, + spinlock_t *irqlock, + enum v4l2_buf_type type, + enum v4l2_field field, + unsigned int msize, + void *priv); + + void videobuf_queue_vmalloc_init(struct videobuf_queue *q, + struct videobuf_queue_ops *ops, + struct device *dev, + spinlock_t *irqlock, + enum v4l2_buf_type type, + enum v4l2_field field, + unsigned int msize, + void *priv); + + void videobuf_queue_dma_contig_init(struct videobuf_queue *q, + struct videobuf_queue_ops *ops, + struct device *dev, + spinlock_t *irqlock, + enum v4l2_buf_type type, + enum v4l2_field field, + unsigned int msize, + void *priv); + +In each case, the parameters are the same: q is the queue structure for the +device, ops is the set of callbacks as described above, dev is the device +structure for this video device, irqlock is an interrupt-safe spinlock to +protect access to the data structures, type is the buffer type used by the +device (cameras will use V4L2_BUF_TYPE_VIDEO_CAPTURE, for example), field +describes which field is being captured (often V4L2_FIELD_NONE for +progressive devices), msize is the size of any containing structure used +around struct videobuf_buffer, and priv is a private data pointer which +shows up in the priv_data field of struct videobuf_queue. Note that these +are void functions which, evidently, are immune to failure. + +V4L2 capture drivers can be written to support either of two APIs: the +read() system call and the rather more complicated streaming mechanism. As +a general rule, it is necessary to support both to ensure that all +applications have a chance of working with the device. Videobuf makes it +easy to do that with the same code. To implement read(), the driver need +only make a call to one of: + +.. code-block:: none + + ssize_t videobuf_read_one(struct videobuf_queue *q, + char __user *data, size_t count, + loff_t *ppos, int nonblocking); + + ssize_t videobuf_read_stream(struct videobuf_queue *q, + char __user *data, size_t count, + loff_t *ppos, int vbihack, int nonblocking); + +Either one of these functions will read frame data into data, returning the +amount actually read; the difference is that videobuf_read_one() will only +read a single frame, while videobuf_read_stream() will read multiple frames +if they are needed to satisfy the count requested by the application. A +typical driver read() implementation will start the capture engine, call +one of the above functions, then stop the engine before returning (though a +smarter implementation might leave the engine running for a little while in +anticipation of another read() call happening in the near future). + +The poll() function can usually be implemented with a direct call to: + +.. code-block:: none + + unsigned int videobuf_poll_stream(struct file *file, + struct videobuf_queue *q, + poll_table *wait); + +Note that the actual wait queue eventually used will be the one associated +with the first available buffer. + +When streaming I/O is done to kernel-space buffers, the driver must support +the mmap() system call to enable user space to access the data. In many +V4L2 drivers, the often-complex mmap() implementation simplifies to a +single call to: + +.. code-block:: none + + int videobuf_mmap_mapper(struct videobuf_queue *q, + struct vm_area_struct *vma); + +Everything else is handled by the videobuf code. + +The release() function requires two separate videobuf calls: + +.. code-block:: none + + void videobuf_stop(struct videobuf_queue *q); + int videobuf_mmap_free(struct videobuf_queue *q); + +The call to videobuf_stop() terminates any I/O in progress - though it is +still up to the driver to stop the capture engine. The call to +videobuf_mmap_free() will ensure that all buffers have been unmapped; if +so, they will all be passed to the buf_release() callback. If buffers +remain mapped, videobuf_mmap_free() returns an error code instead. The +purpose is clearly to cause the closing of the file descriptor to fail if +buffers are still mapped, but every driver in the 2.6.32 kernel cheerfully +ignores its return value. + +ioctl() operations +------------------ + +The V4L2 API includes a very long list of driver callbacks to respond to +the many ioctl() commands made available to user space. A number of these +- those associated with streaming I/O - turn almost directly into videobuf +calls. The relevant helper functions are: + +.. code-block:: none + + int videobuf_reqbufs(struct videobuf_queue *q, + struct v4l2_requestbuffers *req); + int videobuf_querybuf(struct videobuf_queue *q, struct v4l2_buffer *b); + int videobuf_qbuf(struct videobuf_queue *q, struct v4l2_buffer *b); + int videobuf_dqbuf(struct videobuf_queue *q, struct v4l2_buffer *b, + int nonblocking); + int videobuf_streamon(struct videobuf_queue *q); + int videobuf_streamoff(struct videobuf_queue *q); + +So, for example, a VIDIOC_REQBUFS call turns into a call to the driver's +vidioc_reqbufs() callback which, in turn, usually only needs to locate the +proper struct videobuf_queue pointer and pass it to videobuf_reqbufs(). +These support functions can replace a great deal of buffer management +boilerplate in a lot of V4L2 drivers. + +The vidioc_streamon() and vidioc_streamoff() functions will be a bit more +complex, of course, since they will also need to deal with starting and +stopping the capture engine. + +Buffer allocation +----------------- + +Thus far, we have talked about buffers, but have not looked at how they are +allocated. The scatter/gather case is the most complex on this front. For +allocation, the driver can leave buffer allocation entirely up to the +videobuf layer; in this case, buffers will be allocated as anonymous +user-space pages and will be very scattered indeed. If the application is +using user-space buffers, no allocation is needed; the videobuf layer will +take care of calling get_user_pages() and filling in the scatterlist array. + +If the driver needs to do its own memory allocation, it should be done in +the vidioc_reqbufs() function, *after* calling videobuf_reqbufs(). The +first step is a call to: + +.. code-block:: none + + struct videobuf_dmabuf *videobuf_to_dma(struct videobuf_buffer *buf); + +The returned videobuf_dmabuf structure (defined in +<media/videobuf-dma-sg.h>) includes a couple of relevant fields: + +.. code-block:: none + + struct scatterlist *sglist; + int sglen; + +The driver must allocate an appropriately-sized scatterlist array and +populate it with pointers to the pieces of the allocated buffer; sglen +should be set to the length of the array. + +Drivers using the vmalloc() method need not (and cannot) concern themselves +with buffer allocation at all; videobuf will handle those details. The +same is normally true of contiguous-DMA drivers as well; videobuf will +allocate the buffers (with dma_alloc_coherent()) when it sees fit. That +means that these drivers may be trying to do high-order allocations at any +time, an operation which is not always guaranteed to work. Some drivers +play tricks by allocating DMA space at system boot time; videobuf does not +currently play well with those drivers. + +As of 2.6.31, contiguous-DMA drivers can work with a user-supplied buffer, +as long as that buffer is physically contiguous. Normal user-space +allocations will not meet that criterion, but buffers obtained from other +kernel drivers, or those contained within huge pages, will work with these +drivers. + +Filling the buffers +------------------- + +The final part of a videobuf implementation has no direct callback - it's +the portion of the code which actually puts frame data into the buffers, +usually in response to interrupts from the device. For all types of +drivers, this process works approximately as follows: + + - Obtain the next available buffer and make sure that somebody is actually + waiting for it. + + - Get a pointer to the memory and put video data there. + + - Mark the buffer as done and wake up the process waiting for it. + +Step (1) above is done by looking at the driver-managed list_head structure +- the one which is filled in the buf_queue() callback. Because starting +the engine and enqueueing buffers are done in separate steps, it's possible +for the engine to be running without any buffers available - in the +vmalloc() case especially. So the driver should be prepared for the list +to be empty. It is equally possible that nobody is yet interested in the +buffer; the driver should not remove it from the list or fill it until a +process is waiting on it. That test can be done by examining the buffer's +done field (a wait_queue_head_t structure) with waitqueue_active(). + +A buffer's state should be set to VIDEOBUF_ACTIVE before being mapped for +DMA; that ensures that the videobuf layer will not try to do anything with +it while the device is transferring data. + +For scatter/gather drivers, the needed memory pointers will be found in the +scatterlist structure described above. Drivers using the vmalloc() method +can get a memory pointer with: + +.. code-block:: none + + void *videobuf_to_vmalloc(struct videobuf_buffer *buf); + +For contiguous DMA drivers, the function to use is: + +.. code-block:: none + + dma_addr_t videobuf_to_dma_contig(struct videobuf_buffer *buf); + +The contiguous DMA API goes out of its way to hide the kernel-space address +of the DMA buffer from drivers. + +The final step is to set the size field of the relevant videobuf_buffer +structure to the actual size of the captured image, set state to +VIDEOBUF_DONE, then call wake_up() on the done queue. At this point, the +buffer is owned by the videobuf layer and the driver should not touch it +again. + +Developers who are interested in more information can go into the relevant +header files; there are a few low-level functions declared there which have +not been talked about here. Note also that all of these calls are exported +GPL-only, so they will not be available to non-GPL kernel modules. diff --git a/Documentation/driver-api/media/v4l2-videobuf2.rst b/Documentation/driver-api/media/v4l2-videobuf2.rst new file mode 100644 index 0000000000..1044f64ff1 --- /dev/null +++ b/Documentation/driver-api/media/v4l2-videobuf2.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _vb2_framework: + +V4L2 videobuf2 functions and data structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. kernel-doc:: include/media/videobuf2-core.h + +.. kernel-doc:: include/media/videobuf2-v4l2.h + +.. kernel-doc:: include/media/videobuf2-memops.h diff --git a/Documentation/driver-api/mei/hdcp.rst b/Documentation/driver-api/mei/hdcp.rst new file mode 100644 index 0000000000..e85a065b1c --- /dev/null +++ b/Documentation/driver-api/mei/hdcp.rst @@ -0,0 +1,32 @@ +.. SPDX-License-Identifier: GPL-2.0 + +HDCP: +===== + +ME FW as a security engine provides the capability for setting up +HDCP2.2 protocol negotiation between the Intel graphics device and +an HDC2.2 sink. + +ME FW prepares HDCP2.2 negotiation parameters, signs and encrypts them +according the HDCP 2.2 spec. The Intel graphics sends the created blob +to the HDCP2.2 sink. + +Similarly, the HDCP2.2 sink's response is transferred to ME FW +for decryption and verification. + +Once all the steps of HDCP2.2 negotiation are completed, +upon request ME FW will configure the port as authenticated and supply +the HDCP encryption keys to Intel graphics hardware. + + +mei_hdcp driver +--------------- +.. kernel-doc:: drivers/misc/mei/hdcp/mei_hdcp.c + :doc: MEI_HDCP Client Driver + +mei_hdcp api +------------ + +.. kernel-doc:: drivers/misc/mei/hdcp/mei_hdcp.c + :functions: + diff --git a/Documentation/driver-api/mei/iamt.rst b/Documentation/driver-api/mei/iamt.rst new file mode 100644 index 0000000000..6ef3e61368 --- /dev/null +++ b/Documentation/driver-api/mei/iamt.rst @@ -0,0 +1,101 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Intel(R) Active Management Technology (Intel AMT) +================================================= + +Prominent usage of the Intel ME Interface is to communicate with Intel(R) +Active Management Technology (Intel AMT) implemented in firmware running on +the Intel ME. + +Intel AMT provides the ability to manage a host remotely out-of-band (OOB) +even when the operating system running on the host processor has crashed or +is in a sleep state. + +Some examples of Intel AMT usage are: + - Monitoring hardware state and platform components + - Remote power off/on (useful for green computing or overnight IT + maintenance) + - OS updates + - Storage of useful platform information such as software assets + - Built-in hardware KVM + - Selective network isolation of Ethernet and IP protocol flows based + on policies set by a remote management console + - IDE device redirection from remote management console + +Intel AMT (OOB) communication is based on SOAP (deprecated +starting with Release 6.0) over HTTP/S or WS-Management protocol over +HTTP/S that are received from a remote management console application. + +For more information about Intel AMT: +https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm + + +Intel AMT Applications +---------------------- + + 1) Intel Local Management Service (Intel LMS) + + Applications running locally on the platform communicate with Intel AMT Release + 2.0 and later releases in the same way that network applications do via SOAP + over HTTP (deprecated starting with Release 6.0) or with WS-Management over + SOAP over HTTP. This means that some Intel AMT features can be accessed from a + local application using the same network interface as a remote application + communicating with Intel AMT over the network. + + When a local application sends a message addressed to the local Intel AMT host + name, the Intel LMS, which listens for traffic directed to the host name, + intercepts the message and routes it to the Intel MEI. + For more information: + https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm + Under "About Intel AMT" => "Local Access" + + For downloading Intel LMS: + https://github.com/intel/lms + + The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS + firmware feature using a defined GUID and then communicates with the feature + using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol). + The protocol is used to maintain multiple sessions with Intel AMT from a + single application. + + See the protocol specification in the Intel AMT Software Development Kit (SDK) + https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm + Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)" + => "Information for Intel(R) vPro(TM) Gateway Developers" + => "Description of the Intel AMT Port Forwarding (APF) Protocol" + + 2) Intel AMT Remote configuration using a Local Agent + + A Local Agent enables IT personnel to configure Intel AMT out-of-the-box + without requiring installing additional data to enable setup. The remote + configuration process may involve an ISV-developed remote configuration + agent that runs on the host. + For more information: + https://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide/default.htm + Under "Setup and Configuration of Intel AMT" => + "SDK Tools Supporting Setup and Configuration" => + "Using the Local Agent Sample" + +Intel AMT OS Health Watchdog +---------------------------- + +The Intel AMT Watchdog is an OS Health (Hang/Crash) watchdog. +Whenever the OS hangs or crashes, Intel AMT will send an event +to any subscriber to this event. This mechanism means that +IT knows when a platform crashes even when there is a hard failure on the host. + +The Intel AMT Watchdog is composed of two parts: + 1) Firmware feature - receives the heartbeats + and sends an event when the heartbeats stop. + 2) Intel MEI iAMT watchdog driver - connects to the watchdog feature, + configures the watchdog and sends the heartbeats. + +The Intel iAMT watchdog MEI driver uses the kernel watchdog API to configure +the Intel AMT Watchdog and to send heartbeats to it. The default timeout of the +watchdog is 120 seconds. + +If the Intel AMT is not enabled in the firmware then the watchdog client won't enumerate +on the me client bus and watchdog devices won't be exposed. + +--- +linux-mei@linux.intel.com diff --git a/Documentation/driver-api/mei/index.rst b/Documentation/driver-api/mei/index.rst new file mode 100644 index 0000000000..3a22b522ee --- /dev/null +++ b/Documentation/driver-api/mei/index.rst @@ -0,0 +1,23 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: <isonum.txt> + +=================================================== +Intel(R) Management Engine Interface (Intel(R) MEI) +=================================================== + +**Copyright** |copy| 2019 Intel Corporation + + +.. only:: html + + .. class:: toc-title + + Table of Contents + +.. toctree:: + :maxdepth: 3 + + mei + mei-client-bus + iamt diff --git a/Documentation/driver-api/mei/mei-client-bus.rst b/Documentation/driver-api/mei/mei-client-bus.rst new file mode 100644 index 0000000000..f242b3f8d6 --- /dev/null +++ b/Documentation/driver-api/mei/mei-client-bus.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================== +Intel(R) Management Engine (ME) Client bus API +============================================== + + +Rationale +========= + +The MEI character device is useful for dedicated applications to send and receive +data to the many FW appliance found in Intel's ME from the user space. +However, for some of the ME functionalities it makes sense to leverage existing software +stack and expose them through existing kernel subsystems. + +In order to plug seamlessly into the kernel device driver model we add kernel virtual +bus abstraction on top of the MEI driver. This allows implementing Linux kernel drivers +for the various MEI features as a stand alone entities found in their respective subsystem. +Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to +the existing code. + + +MEI CL bus API +============== + +A driver implementation for an MEI Client is very similar to any other existing bus +based device drivers. The driver registers itself as an MEI CL bus driver through +the ``struct mei_cl_driver`` structure defined in :file:`include/linux/mei_cl_bus.c` + +.. code-block:: C + + struct mei_cl_driver { + struct device_driver driver; + const char *name; + + const struct mei_cl_device_id *id_table; + + int (*probe)(struct mei_cl_device *dev, const struct mei_cl_id *id); + int (*remove)(struct mei_cl_device *dev); + }; + + + +The mei_cl_device_id structure defined in :file:`include/linux/mod_devicetable.h` allows a +driver to bind itself against a device name. + +.. code-block:: C + + struct mei_cl_device_id { + char name[MEI_CL_NAME_SIZE]; + uuid_le uuid; + __u8 version; + kernel_ulong_t driver_info; + }; + +To actually register a driver on the ME Client bus one must call the :c:func:`mei_cl_add_driver` +API. This is typically called at module initialization time. + +Once the driver is registered and bound to the device, a driver will typically +try to do some I/O on this bus and this should be done through the :c:func:`mei_cl_send` +and :c:func:`mei_cl_recv` functions. More detailed information is in :ref:`api` section. + +In order for a driver to be notified about pending traffic or event, the driver +should register a callback via :c:func:`mei_cl_devev_register_rx_cb` and +:c:func:`mei_cldev_register_notify_cb` function respectively. + +.. _api: + +API: +---- +.. kernel-doc:: drivers/misc/mei/bus.c + :export: drivers/misc/mei/bus.c + + + +Example +======= + +As a theoretical example let's pretend the ME comes with a "contact" NFC IP. +The driver init and exit routines for this device would look like: + +.. code-block:: C + + #define CONTACT_DRIVER_NAME "contact" + + static struct mei_cl_device_id contact_mei_cl_tbl[] = { + { CONTACT_DRIVER_NAME, }, + + /* required last entry */ + { } + }; + MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl); + + static struct mei_cl_driver contact_driver = { + .id_table = contact_mei_tbl, + .name = CONTACT_DRIVER_NAME, + + .probe = contact_probe, + .remove = contact_remove, + }; + + static int contact_init(void) + { + int r; + + r = mei_cl_driver_register(&contact_driver); + if (r) { + pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n"); + return r; + } + + return 0; + } + + static void __exit contact_exit(void) + { + mei_cl_driver_unregister(&contact_driver); + } + + module_init(contact_init); + module_exit(contact_exit); + +And the driver's simplified probe routine would look like that: + +.. code-block:: C + + int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id) + { + [...] + mei_cldev_enable(dev); + + mei_cldev_register_rx_cb(dev, contact_rx_cb); + + return 0; + } + +In the probe routine the driver first enable the MEI device and then registers +an rx handler which is as close as it can get to registering a threaded IRQ handler. +The handler implementation will typically call :c:func:`mei_cldev_recv` and then +process received data. + +.. code-block:: C + + #define MAX_PAYLOAD 128 + #define HDR_SIZE 4 + static void conntact_rx_cb(struct mei_cl_device *cldev) + { + struct contact *c = mei_cldev_get_drvdata(cldev); + unsigned char payload[MAX_PAYLOAD]; + ssize_t payload_sz; + + payload_sz = mei_cldev_recv(cldev, payload, MAX_PAYLOAD) + if (reply_size < HDR_SIZE) { + return; + } + + c->process_rx(payload); + + } + +MEI Client Bus Drivers +====================== + +.. toctree:: + :maxdepth: 2 + + hdcp + nfc diff --git a/Documentation/driver-api/mei/mei.rst b/Documentation/driver-api/mei/mei.rst new file mode 100644 index 0000000000..4f2ced4ccd --- /dev/null +++ b/Documentation/driver-api/mei/mei.rst @@ -0,0 +1,213 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Introduction +============ + +The Intel Management Engine (Intel ME) is an isolated and protected computing +resource (Co-processor) residing inside certain Intel chipsets. The Intel ME +provides support for computer/IT management and security features. +The actual feature set depends on the Intel chipset SKU. + +The Intel Management Engine Interface (Intel MEI, previously known as HECI) +is the interface between the Host and Intel ME. This interface is exposed +to the host as a PCI device, actually multiple PCI devices might be exposed. +The Intel MEI Driver is in charge of the communication channel between +a host application and the Intel ME features. + +Each Intel ME feature, or Intel ME Client is addressed by a unique GUID and +each client has its own protocol. The protocol is message-based with a +header and payload up to maximal number of bytes advertised by the client, +upon connection. + +Intel MEI Driver +================ + +The driver exposes a character device with device nodes /dev/meiX. + +An application maintains communication with an Intel ME feature while +/dev/meiX is open. The binding to a specific feature is performed by calling +:c:macro:`MEI_CONNECT_CLIENT_IOCTL`, which passes the desired GUID. +The number of instances of an Intel ME feature that can be opened +at the same time depends on the Intel ME feature, but most of the +features allow only a single instance. + +The driver is transparent to data that are passed between firmware feature +and host application. + +Because some of the Intel ME features can change the system +configuration, the driver by default allows only a privileged +user to access it. + +The session is terminated calling :c:expr:`close(fd)`. + +A code snippet for an application communicating with Intel AMTHI client: + +In order to support virtualization or sandboxing a trusted supervisor +can use :c:macro:`MEI_CONNECT_CLIENT_IOCTL_VTAG` to create +virtual channels with an Intel ME feature. Not all features support +virtual channels such client with answer EOPNOTSUPP. + +.. code-block:: C + + struct mei_connect_client_data data; + fd = open(MEI_DEVICE); + + data.d.in_client_uuid = AMTHI_GUID; + + ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &data); + + printf("Ver=%d, MaxLen=%ld\n", + data.d.in_client_uuid.protocol_version, + data.d.in_client_uuid.max_msg_length); + + [...] + + write(fd, amthi_req_data, amthi_req_data_len); + + [...] + + read(fd, &amthi_res_data, amthi_res_data_len); + + [...] + close(fd); + + +User space API + +IOCTLs: +======= + +The Intel MEI Driver supports the following IOCTL commands: + +IOCTL_MEI_CONNECT_CLIENT +------------------------- +Connect to firmware Feature/Client. + +.. code-block:: none + + Usage: + + struct mei_connect_client_data client_data; + + ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &client_data); + + Inputs: + + struct mei_connect_client_data - contain the following + Input field: + + in_client_uuid - GUID of the FW Feature that needs + to connect to. + Outputs: + out_client_properties - Client Properties: MTU and Protocol Version. + + Error returns: + + ENOTTY No such client (i.e. wrong GUID) or connection is not allowed. + EINVAL Wrong IOCTL Number + ENODEV Device or Connection is not initialized or ready. + ENOMEM Unable to allocate memory to client internal data. + EFAULT Fatal Error (e.g. Unable to access user input data) + EBUSY Connection Already Open + +:Note: + max_msg_length (MTU) in client properties describes the maximum + data that can be sent or received. (e.g. if MTU=2K, can send + requests up to bytes 2k and received responses up to 2k bytes). + +IOCTL_MEI_CONNECT_CLIENT_VTAG: +------------------------------ + +.. code-block:: none + + Usage: + + struct mei_connect_client_data_vtag client_data_vtag; + + ioctl(fd, IOCTL_MEI_CONNECT_CLIENT_VTAG, &client_data_vtag); + + Inputs: + + struct mei_connect_client_data_vtag - contain the following + Input field: + + in_client_uuid - GUID of the FW Feature that needs + to connect to. + vtag - virtual tag [1, 255] + + Outputs: + out_client_properties - Client Properties: MTU and Protocol Version. + + Error returns: + + ENOTTY No such client (i.e. wrong GUID) or connection is not allowed. + EINVAL Wrong IOCTL Number or tag == 0 + ENODEV Device or Connection is not initialized or ready. + ENOMEM Unable to allocate memory to client internal data. + EFAULT Fatal Error (e.g. Unable to access user input data) + EBUSY Connection Already Open + EOPNOTSUPP Vtag is not supported + +IOCTL_MEI_NOTIFY_SET +--------------------- +Enable or disable event notifications. + + +.. code-block:: none + + Usage: + + uint32_t enable; + + ioctl(fd, IOCTL_MEI_NOTIFY_SET, &enable); + + + uint32_t enable = 1; + or + uint32_t enable[disable] = 0; + + Error returns: + + + EINVAL Wrong IOCTL Number + ENODEV Device is not initialized or the client not connected + ENOMEM Unable to allocate memory to client internal data. + EFAULT Fatal Error (e.g. Unable to access user input data) + EOPNOTSUPP if the device doesn't support the feature + +:Note: + The client must be connected in order to enable notification events + + +IOCTL_MEI_NOTIFY_GET +-------------------- +Retrieve event + +.. code-block:: none + + Usage: + uint32_t event; + ioctl(fd, IOCTL_MEI_NOTIFY_GET, &event); + + Outputs: + 1 - if an event is pending + 0 - if there is no even pending + + Error returns: + EINVAL Wrong IOCTL Number + ENODEV Device is not initialized or the client not connected + ENOMEM Unable to allocate memory to client internal data. + EFAULT Fatal Error (e.g. Unable to access user input data) + EOPNOTSUPP if the device doesn't support the feature + +:Note: + The client must be connected and event notification has to be enabled + in order to receive an event + + + +Supported Chipsets +================== +82X38/X48 Express and newer + +linux-mei@linux.intel.com diff --git a/Documentation/driver-api/mei/nfc.rst b/Documentation/driver-api/mei/nfc.rst new file mode 100644 index 0000000000..8fe8664c28 --- /dev/null +++ b/Documentation/driver-api/mei/nfc.rst @@ -0,0 +1,28 @@ +.. SPDX-License-Identifier: GPL-2.0 + +MEI NFC +------- + +Some Intel 8 and 9 Series chipsets support NFC devices connected behind +the Intel Management Engine controller. +MEI client bus exposes the NFC chips as NFC phy devices and enables +binding with Microread and NXP PN544 NFC device driver from the Linux NFC +subsystem. + +.. kernel-render:: DOT + :alt: MEI NFC digraph + :caption: **MEI NFC** Stack + + digraph NFC { + cl_nfc -> me_cl_nfc; + "drivers/nfc/mei_phy" -> cl_nfc [lhead=bus]; + "drivers/nfc/microread/mei" -> cl_nfc; + "drivers/nfc/microread/mei" -> "drivers/nfc/mei_phy"; + "drivers/nfc/pn544/mei" -> cl_nfc; + "drivers/nfc/pn544/mei" -> "drivers/nfc/mei_phy"; + "net/nfc" -> "drivers/nfc/microread/mei"; + "net/nfc" -> "drivers/nfc/pn544/mei"; + "neard" -> "net/nfc"; + cl_nfc [label="mei/bus(nfc)"]; + me_cl_nfc [label="me fw (nfc)"]; + } diff --git a/Documentation/driver-api/memory-devices/index.rst b/Documentation/driver-api/memory-devices/index.rst new file mode 100644 index 0000000000..28101458cd --- /dev/null +++ b/Documentation/driver-api/memory-devices/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +Memory Controller drivers +========================= + +.. toctree:: + :maxdepth: 1 + + ti-emif + ti-gpmc + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/memory-devices/ti-emif.rst b/Documentation/driver-api/memory-devices/ti-emif.rst new file mode 100644 index 0000000000..dea2ad9bcd --- /dev/null +++ b/Documentation/driver-api/memory-devices/ti-emif.rst @@ -0,0 +1,64 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +TI EMIF SDRAM Controller Driver +=============================== + +Author +====== +Aneesh V <aneesh@ti.com> + +Location +======== +driver/memory/emif.c + +Supported SoCs: +=============== +TI OMAP44xx +TI OMAP54xx + +Menuconfig option: +================== +Device Drivers + Memory devices + Texas Instruments EMIF driver + +Description +=========== +This driver is for the EMIF module available in Texas Instruments +SoCs. EMIF is an SDRAM controller that, based on its revision, +supports one or more of DDR2, DDR3, and LPDDR2 SDRAM protocols. +This driver takes care of only LPDDR2 memories presently. The +functions of the driver includes re-configuring AC timing +parameters and other settings during frequency, voltage and +temperature changes + +Platform Data (see include/linux/platform_data/emif_plat.h) +=========================================================== +DDR device details and other board dependent and SoC dependent +information can be passed through platform data (struct emif_platform_data) + +- DDR device details: 'struct ddr_device_info' +- Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck' +- Custom configurations: customizable policy options through + 'struct emif_custom_configs' +- IP revision +- PHY type + +Interface to the external world +=============================== +EMIF driver registers notifiers for voltage and frequency changes +affecting EMIF and takes appropriate actions when these are invoked. + +- freq_pre_notify_handling() +- freq_post_notify_handling() +- volt_notify_handling() + +Debugfs +======= +The driver creates two debugfs entries per device. + +- regcache_dump : dump of register values calculated and saved for all + frequencies used so far. +- mr4 : last polled value of MR4 register in the LPDDR2 device. MR4 + indicates the current temperature level of the device. diff --git a/Documentation/driver-api/memory-devices/ti-gpmc.rst b/Documentation/driver-api/memory-devices/ti-gpmc.rst new file mode 100644 index 0000000000..b1bb86871a --- /dev/null +++ b/Documentation/driver-api/memory-devices/ti-gpmc.rst @@ -0,0 +1,179 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================== +GPMC (General Purpose Memory Controller) +======================================== + +GPMC is an unified memory controller dedicated to interfacing external +memory devices like + + * Asynchronous SRAM like memories and application specific integrated + circuit devices. + * Asynchronous, synchronous, and page mode burst NOR flash devices + NAND flash + * Pseudo-SRAM devices + +GPMC is found on Texas Instruments SoC's (OMAP based) +IP details: https://www.ti.com/lit/pdf/spruh73 section 7.1 + + +GPMC generic timing calculation: +================================ + +GPMC has certain timings that has to be programmed for proper +functioning of the peripheral, while peripheral has another set of +timings. To have peripheral work with gpmc, peripheral timings has to +be translated to the form gpmc can understand. The way it has to be +translated depends on the connected peripheral. Also there is a +dependency for certain gpmc timings on gpmc clock frequency. Hence a +generic timing routine was developed to achieve above requirements. + +Generic routine provides a generic method to calculate gpmc timings +from gpmc peripheral timings. struct gpmc_device_timings fields has to +be updated with timings from the datasheet of the peripheral that is +connected to gpmc. A few of the peripheral timings can be fed either +in time or in cycles, provision to handle this scenario has been +provided (refer struct gpmc_device_timings definition). It may so +happen that timing as specified by peripheral datasheet is not present +in timing structure, in this scenario, try to correlate peripheral +timing to the one available. If that doesn't work, try to add a new +field as required by peripheral, educate generic timing routine to +handle it, make sure that it does not break any of the existing. +Then there may be cases where peripheral datasheet doesn't mention +certain fields of struct gpmc_device_timings, zero those entries. + +Generic timing routine has been verified to work properly on +multiple onenand's and tusb6010 peripherals. + +A word of caution: generic timing routine has been developed based +on understanding of gpmc timings, peripheral timings, available +custom timing routines, a kind of reverse engineering without +most of the datasheets & hardware (to be exact none of those supported +in mainline having custom timing routine) and by simulation. + +gpmc timing dependency on peripheral timings: + +[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...] + +1. common + +cs_on: + t_ceasu +adv_on: + t_avdasu, t_ceavd + +2. sync common + +sync_clk: + clk +page_burst_access: + t_bacc +clk_activation: + t_ces, t_avds + +3. read async muxed + +adv_rd_off: + t_avdp_r +oe_on: + t_oeasu, t_aavdh +access: + t_iaa, t_oe, t_ce, t_aa +rd_cycle: + t_rd_cycle, t_cez_r, t_oez + +4. read async non-muxed + +adv_rd_off: + t_avdp_r +oe_on: + t_oeasu +access: + t_iaa, t_oe, t_ce, t_aa +rd_cycle: + t_rd_cycle, t_cez_r, t_oez + +5. read sync muxed + +adv_rd_off: + t_avdp_r, t_avdh +oe_on: + t_oeasu, t_ach, cyc_aavdh_oe +access: + t_iaa, cyc_iaa, cyc_oe +rd_cycle: + t_cez_r, t_oez, t_ce_rdyz + +6. read sync non-muxed + +adv_rd_off: + t_avdp_r +oe_on: + t_oeasu +access: + t_iaa, cyc_iaa, cyc_oe +rd_cycle: + t_cez_r, t_oez, t_ce_rdyz + +7. write async muxed + +adv_wr_off: + t_avdp_w +we_on, wr_data_mux_bus: + t_weasu, t_aavdh, cyc_aavhd_we +we_off: + t_wpl +cs_wr_off: + t_wph +wr_cycle: + t_cez_w, t_wr_cycle + +8. write async non-muxed + +adv_wr_off: + t_avdp_w +we_on, wr_data_mux_bus: + t_weasu +we_off: + t_wpl +cs_wr_off: + t_wph +wr_cycle: + t_cez_w, t_wr_cycle + +9. write sync muxed + +adv_wr_off: + t_avdp_w, t_avdh +we_on, wr_data_mux_bus: + t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we +we_off: + t_wpl, cyc_wpl +cs_wr_off: + t_wph +wr_cycle: + t_cez_w, t_ce_rdyz + +10. write sync non-muxed + +adv_wr_off: + t_avdp_w +we_on, wr_data_mux_bus: + t_weasu, t_rdyo +we_off: + t_wpl, cyc_wpl +cs_wr_off: + t_wph +wr_cycle: + t_cez_w, t_ce_rdyz + + +Note: + Many of gpmc timings are dependent on other gpmc timings (a few + gpmc timings purely dependent on other gpmc timings, a reason that + some of the gpmc timings are missing above), and it will result in + indirect dependency of peripheral timings to gpmc timings other than + mentioned above, refer timing routine for more details. To know what + these peripheral timings correspond to, please see explanations in + struct gpmc_device_timings definition. And for gpmc timings refer + IP details (link above). diff --git a/Documentation/driver-api/men-chameleon-bus.rst b/Documentation/driver-api/men-chameleon-bus.rst new file mode 100644 index 0000000000..6f0b9ee475 --- /dev/null +++ b/Documentation/driver-api/men-chameleon-bus.rst @@ -0,0 +1,187 @@ +================= +MEN Chameleon Bus +================= + +.. Table of Contents + ================= + 1 Introduction + 1.1 Scope of this Document + 1.2 Limitations of the current implementation + 2 Architecture + 2.1 MEN Chameleon Bus + 2.2 Carrier Devices + 2.3 Parser + 3 Resource handling + 3.1 Memory Resources + 3.2 IRQs + 4 Writing an MCB driver + 4.1 The driver structure + 4.2 Probing and attaching + 4.3 Initializing the driver + 4.4 Using DMA + + +Introduction +============ + +This document describes the architecture and implementation of the MEN +Chameleon Bus (called MCB throughout this document). + +Scope of this Document +---------------------- + +This document is intended to be a short overview of the current +implementation and does by no means describe the complete possibilities of MCB +based devices. + +Limitations of the current implementation +----------------------------------------- + +The current implementation is limited to PCI and PCIe based carrier devices +that only use a single memory resource and share the PCI legacy IRQ. Not +implemented are: + +- Multi-resource MCB devices like the VME Controller or M-Module carrier. +- MCB devices that need another MCB device, like SRAM for a DMA Controller's + buffer descriptors or a video controller's video memory. +- A per-carrier IRQ domain for carrier devices that have one (or more) IRQs + per MCB device like PCIe based carriers with MSI or MSI-X support. + +Architecture +============ + +MCB is divided into 3 functional blocks: + +- The MEN Chameleon Bus itself, +- drivers for MCB Carrier Devices and +- the parser for the Chameleon table. + +MEN Chameleon Bus +----------------- + +The MEN Chameleon Bus is an artificial bus system that attaches to a so +called Chameleon FPGA device found on some hardware produced my MEN Mikro +Elektronik GmbH. These devices are multi-function devices implemented in a +single FPGA and usually attached via some sort of PCI or PCIe link. Each +FPGA contains a header section describing the content of the FPGA. The +header lists the device id, PCI BAR, offset from the beginning of the PCI +BAR, size in the FPGA, interrupt number and some other properties currently +not handled by the MCB implementation. + +Carrier Devices +--------------- + +A carrier device is just an abstraction for the real world physical bus the +Chameleon FPGA is attached to. Some IP Core drivers may need to interact with +properties of the carrier device (like querying the IRQ number of a PCI +device). To provide abstraction from the real hardware bus, an MCB carrier +device provides callback methods to translate the driver's MCB function calls +to hardware related function calls. For example a carrier device may +implement the get_irq() method which can be translated into a hardware bus +query for the IRQ number the device should use. + +Parser +------ + +The parser reads the first 512 bytes of a Chameleon device and parses the +Chameleon table. Currently the parser only supports the Chameleon v2 variant +of the Chameleon table but can easily be adopted to support an older or +possible future variant. While parsing the table's entries new MCB devices +are allocated and their resources are assigned according to the resource +assignment in the Chameleon table. After resource assignment is finished, the +MCB devices are registered at the MCB and thus at the driver core of the +Linux kernel. + +Resource handling +================= + +The current implementation assigns exactly one memory and one IRQ resource +per MCB device. But this is likely going to change in the future. + +Memory Resources +---------------- + +Each MCB device has exactly one memory resource, which can be requested from +the MCB bus. This memory resource is the physical address of the MCB device +inside the carrier and is intended to be passed to ioremap() and friends. It +is already requested from the kernel by calling request_mem_region(). + +IRQs +---- + +Each MCB device has exactly one IRQ resource, which can be requested from the +MCB bus. If a carrier device driver implements the ->get_irq() callback +method, the IRQ number assigned by the carrier device will be returned, +otherwise the IRQ number inside the Chameleon table will be returned. This +number is suitable to be passed to request_irq(). + +Writing an MCB driver +===================== + +The driver structure +-------------------- + +Each MCB driver has a structure to identify the device driver as well as +device ids which identify the IP Core inside the FPGA. The driver structure +also contains callback methods which get executed on driver probe and +removal from the system:: + + static const struct mcb_device_id foo_ids[] = { + { .device = 0x123 }, + { } + }; + MODULE_DEVICE_TABLE(mcb, foo_ids); + + static struct mcb_driver foo_driver = { + driver = { + .name = "foo-bar", + .owner = THIS_MODULE, + }, + .probe = foo_probe, + .remove = foo_remove, + .id_table = foo_ids, + }; + +Probing and attaching +--------------------- + +When a driver is loaded and the MCB devices it services are found, the MCB +core will call the driver's probe callback method. When the driver is removed +from the system, the MCB core will call the driver's remove callback method:: + + static init foo_probe(struct mcb_device *mdev, const struct mcb_device_id *id); + static void foo_remove(struct mcb_device *mdev); + +Initializing the driver +----------------------- + +When the kernel is booted or your foo driver module is inserted, you have to +perform driver initialization. Usually it is enough to register your driver +module at the MCB core:: + + static int __init foo_init(void) + { + return mcb_register_driver(&foo_driver); + } + module_init(foo_init); + + static void __exit foo_exit(void) + { + mcb_unregister_driver(&foo_driver); + } + module_exit(foo_exit); + +The module_mcb_driver() macro can be used to reduce the above code:: + + module_mcb_driver(foo_driver); + +Using DMA +--------- + +To make use of the kernel's DMA-API's function, you will need to use the +carrier device's 'struct device'. Fortunately 'struct mcb_device' embeds a +pointer (->dma_dev) to the carrier's device for DMA purposes:: + + ret = dma_set_mask_and_coherent(&mdev->dma_dev, DMA_BIT_MASK(dma_bits)); + if (rc) + /* Handle errors */ diff --git a/Documentation/driver-api/message-based.rst b/Documentation/driver-api/message-based.rst new file mode 100644 index 0000000000..18ff94ef6d --- /dev/null +++ b/Documentation/driver-api/message-based.rst @@ -0,0 +1,12 @@ +Message-based devices +===================== + +Fusion message devices +---------------------- + +.. kernel-doc:: drivers/message/fusion/mptbase.c + :export: + +.. kernel-doc:: drivers/message/fusion/mptscsih.c + :export: + diff --git a/Documentation/driver-api/misc_devices.rst b/Documentation/driver-api/misc_devices.rst new file mode 100644 index 0000000000..c7ee7b02ba --- /dev/null +++ b/Documentation/driver-api/misc_devices.rst @@ -0,0 +1,5 @@ +Miscellaneous Devices +===================== + +.. kernel-doc:: drivers/char/misc.c + :export: diff --git a/Documentation/driver-api/miscellaneous.rst b/Documentation/driver-api/miscellaneous.rst new file mode 100644 index 0000000000..4a5104a368 --- /dev/null +++ b/Documentation/driver-api/miscellaneous.rst @@ -0,0 +1,48 @@ +Parallel Port Devices +===================== + +.. kernel-doc:: include/linux/parport.h + :internal: + +.. kernel-doc:: drivers/parport/ieee1284.c + :export: + +.. kernel-doc:: drivers/parport/share.c + :export: + +.. kernel-doc:: drivers/parport/daisy.c + :internal: + +16x50 UART Driver +================= + +.. kernel-doc:: drivers/tty/serial/8250/8250_core.c + :export: + +See serial/driver.rst for related APIs. + +Pulse-Width Modulation (PWM) +============================ + +Pulse-width modulation is a modulation technique primarily used to +control power supplied to electrical devices. + +The PWM framework provides an abstraction for providers and consumers of +PWM signals. A controller that provides one or more PWM signals is +registered as :c:type:`struct pwm_chip <pwm_chip>`. Providers +are expected to embed this structure in a driver-specific structure. +This structure contains fields that describe a particular chip. + +A chip exposes one or more PWM signal sources, each of which exposed as +a :c:type:`struct pwm_device <pwm_device>`. Operations can be +performed on PWM devices to control the period, duty cycle, polarity and +active state of the signal. + +Note that PWM devices are exclusive resources: they can always only be +used by one consumer at a time. + +.. kernel-doc:: include/linux/pwm.h + :internal: + +.. kernel-doc:: drivers/pwm/core.c + :export: diff --git a/Documentation/driver-api/mmc/index.rst b/Documentation/driver-api/mmc/index.rst new file mode 100644 index 0000000000..7339736ac7 --- /dev/null +++ b/Documentation/driver-api/mmc/index.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================== +MMC/SD/SDIO card support +======================== + +.. toctree:: + :maxdepth: 1 + + mmc-dev-attrs + mmc-dev-parts + mmc-async-req + mmc-tools diff --git a/Documentation/driver-api/mmc/mmc-async-req.rst b/Documentation/driver-api/mmc/mmc-async-req.rst new file mode 100644 index 0000000000..0f7197c9c3 --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-async-req.rst @@ -0,0 +1,98 @@ +======================== +MMC Asynchronous Request +======================== + +Rationale +========= + +How significant is the cache maintenance overhead? + +It depends. Fast eMMC and multiple cache levels with speculative cache +pre-fetch makes the cache overhead relatively significant. If the DMA +preparations for the next request are done in parallel with the current +transfer, the DMA preparation overhead would not affect the MMC performance. + +The intention of non-blocking (asynchronous) MMC requests is to minimize the +time between when an MMC request ends and another MMC request begins. + +Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and +dma_unmap_sg are processing. Using non-blocking MMC requests makes it +possible to prepare the caches for next job in parallel with an active +MMC request. + +MMC block driver +================ + +The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking. + +The increase in throughput is proportional to the time it takes to +prepare (major part of preparations are dma_map_sg() and dma_unmap_sg()) +a request and how fast the memory is. The faster the MMC/SD is the +more significant the prepare request time becomes. Roughly the expected +performance gain is 5% for large writes and 10% on large reads on a L2 cache +platform. In power save mode, when clocks run on a lower frequency, the DMA +preparation may cost even more. As long as these slower preparations are run +in parallel with the transfer performance won't be affected. + +Details on measurements from IOZone and mmc_test +================================================ + +https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req + +MMC core API extension +====================== + +There is one new public function mmc_start_req(). + +It starts a new MMC command request for a host. The function isn't +truly non-blocking. If there is an ongoing async request it waits +for completion of that request and starts the new one and returns. It +doesn't wait for the new request to complete. If there is no ongoing +request it starts the new request and returns immediately. + +MMC host extensions +=================== + +There are two optional members in the mmc_host_ops -- pre_req() and +post_req() -- that the host driver may implement in order to move work +to before and after the actual mmc_host_ops.request() function is called. + +In the DMA case pre_req() may do dma_map_sg() and prepare the DMA +descriptor, and post_req() runs the dma_unmap_sg(). + +Optimize for the first request +============================== + +The first request in a series of requests can't be prepared in parallel +with the previous transfer, since there is no previous request. + +The argument is_first_req in pre_req() indicates that there is no previous +request. The host driver may optimize for this scenario to minimize +the performance loss. A way to optimize for this is to split the current +request in two chunks, prepare the first chunk and start the request, +and finally prepare the second chunk and start the transfer. + +Pseudocode to handle is_first_req scenario with minimal prepare overhead:: + + if (is_first_req && req->size > threshold) + /* start MMC transfer for the complete transfer size */ + mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE); + + /* + * Begin to prepare DMA while cmd is being processed by MMC. + * The first chunk of the request should take the same time + * to prepare as the "MMC process command time". + * If prepare time exceeds MMC cmd time + * the transfer is delayed, guesstimate max 4k as first chunk size. + */ + prepare_1st_chunk_for_dma(req); + /* flush pending desc to the DMAC (dmaengine.h) */ + dma_issue_pending(req->dma_desc); + + prepare_2nd_chunk_for_dma(req); + /* + * The second issue_pending should be called before MMC runs out + * of the first chunk. If the MMC runs out of the first data chunk + * before this call, the transfer is delayed. + */ + dma_issue_pending(req->dma_desc); diff --git a/Documentation/driver-api/mmc/mmc-dev-attrs.rst b/Documentation/driver-api/mmc/mmc-dev-attrs.rst new file mode 100644 index 0000000000..4f44b1b730 --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-dev-attrs.rst @@ -0,0 +1,91 @@ +================================== +SD and MMC Block Device Attributes +================================== + +These attributes are defined for the block devices associated with the +SD or MMC device. + +The following attributes are read/write. + + ======== =============================================== + force_ro Enforce read-only access even if write protect switch is off. + ======== =============================================== + +SD and MMC Device Attributes +============================ + +All attributes are read-only. + + ====================== =============================================== + cid Card Identification Register + csd Card Specific Data Register + scr SD Card Configuration Register (SD only) + date Manufacturing Date (from CID Register) + fwrev Firmware/Product Revision (from CID Register) + (SD and MMCv1 only) + hwrev Hardware/Product Revision (from CID Register) + (SD and MMCv1 only) + manfid Manufacturer ID (from CID Register) + name Product Name (from CID Register) + oemid OEM/Application ID (from CID Register) + prv Product Revision (from CID Register) + (SD and MMCv4 only) + serial Product Serial Number (from CID Register) + erase_size Erase group size + preferred_erase_size Preferred erase size + raw_rpmb_size_mult RPMB partition size + rel_sectors Reliable write sector count + ocr Operation Conditions Register + dsr Driver Stage Register + cmdq_en Command Queue enabled: + + 1 => enabled, 0 => not enabled + ====================== =============================================== + +Note on Erase Size and Preferred Erase Size: + + "erase_size" is the minimum size, in bytes, of an erase + operation. For MMC, "erase_size" is the erase group size + reported by the card. Note that "erase_size" does not apply + to trim or secure trim operations where the minimum size is + always one 512 byte sector. For SD, "erase_size" is 512 + if the card is block-addressed, 0 otherwise. + + SD/MMC cards can erase an arbitrarily large area up to and + including the whole card. When erasing a large area it may + be desirable to do it in smaller chunks for three reasons: + + 1. A single erase command will make all other I/O on + the card wait. This is not a problem if the whole card + is being erased, but erasing one partition will make + I/O for another partition on the same card wait for the + duration of the erase - which could be a several + minutes. + 2. To be able to inform the user of erase progress. + 3. The erase timeout becomes too large to be very + useful. Because the erase timeout contains a margin + which is multiplied by the size of the erase area, + the value can end up being several minutes for large + areas. + + "erase_size" is not the most efficient unit to erase + (especially for SD where it is just one sector), + hence "preferred_erase_size" provides a good chunk + size for erasing large areas. + + For MMC, "preferred_erase_size" is the high-capacity + erase size if a card specifies one, otherwise it is + based on the capacity of the card. + + For SD, "preferred_erase_size" is the allocation unit + size specified by the card. + + "preferred_erase_size" is in bytes. + +Note on raw_rpmb_size_mult: + + "raw_rpmb_size_mult" is a multiple of 128kB block. + + RPMB size in byte is calculated by using the following equation: + + RPMB partition size = 128kB x raw_rpmb_size_mult diff --git a/Documentation/driver-api/mmc/mmc-dev-parts.rst b/Documentation/driver-api/mmc/mmc-dev-parts.rst new file mode 100644 index 0000000000..995922f1f7 --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-dev-parts.rst @@ -0,0 +1,41 @@ +============================ +SD and MMC Device Partitions +============================ + +Device partitions are additional logical block devices present on the +SD/MMC device. + +As of this writing, MMC boot partitions as supported and exposed as +/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the +parent /dev/mmcblkX. + +MMC Boot Partitions +=================== + +Read and write access is provided to the two MMC boot partitions. Due to +the sensitive nature of the boot partition contents, which often store +a bootloader or bootloader configuration tables crucial to booting the +platform, write access is disabled by default to reduce the chance of +accidental bricking. + +To enable write access to /dev/mmcblkXbootY, disable the forced read-only +access with:: + + echo 0 > /sys/block/mmcblkXbootY/force_ro + +To re-enable read-only access:: + + echo 1 > /sys/block/mmcblkXbootY/force_ro + +The boot partitions can also be locked read only until the next power on, +with:: + + echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on + +This is a feature of the card and not of the kernel. If the card does +not support boot partition locking, the file will not exist. If the +feature has been disabled on the card, the file will be read-only. + +The boot partitions can also be locked permanently, but this feature is +not accessible through sysfs in order to avoid accidental or malicious +bricking. diff --git a/Documentation/driver-api/mmc/mmc-tools.rst b/Documentation/driver-api/mmc/mmc-tools.rst new file mode 100644 index 0000000000..eee1c2ccfa --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-tools.rst @@ -0,0 +1,37 @@ +====================== +MMC tools introduction +====================== + +There is one MMC test tools called mmc-utils, which is maintained by Ulf Hansson, +you can find it at the below public git repository: + + https://git.kernel.org/pub/scm/utils/mmc/mmc-utils.git + +Functions +========= + +The mmc-utils tools can do the following: + + - Print and parse extcsd data. + - Determine the eMMC writeprotect status. + - Set the eMMC writeprotect status. + - Set the eMMC data sector size to 4KB by disabling emulation. + - Create general purpose partition. + - Enable the enhanced user area. + - Enable write reliability per partition. + - Print the response to STATUS_SEND (CMD13). + - Enable the boot partition. + - Set Boot Bus Conditions. + - Enable the eMMC BKOPS feature. + - Permanently enable the eMMC H/W Reset feature. + - Permanently disable the eMMC H/W Reset feature. + - Send Sanitize command. + - Program authentication key for the device. + - Counter value for the rpmb device will be read to stdout. + - Read from rpmb device to output. + - Write to rpmb device from data file. + - Enable the eMMC cache feature. + - Disable the eMMC cache feature. + - Print and parse CID data. + - Print and parse CSD data. + - Print and parse SCR data. diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst new file mode 100644 index 0000000000..6a4278f409 --- /dev/null +++ b/Documentation/driver-api/mtd/index.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +Memory Technology Device (MTD) +============================== + +.. toctree:: + :maxdepth: 1 + + spi-intel + nand_ecc + spi-nor diff --git a/Documentation/driver-api/mtd/nand_ecc.rst b/Documentation/driver-api/mtd/nand_ecc.rst new file mode 100644 index 0000000000..74347c14a7 --- /dev/null +++ b/Documentation/driver-api/mtd/nand_ecc.rst @@ -0,0 +1,763 @@ +========================== +NAND Error-correction Code +========================== + +Introduction +============ + +Having looked at the linux mtd/nand Hamming software ECC engine driver +I felt there was room for optimisation. I bashed the code for a few hours +performing tricks like table lookup removing superfluous code etc. +After that the speed was increased by 35-40%. +Still I was not too happy as I felt there was additional room for improvement. + +Bad! I was hooked. +I decided to annotate my steps in this file. Perhaps it is useful to someone +or someone learns something from it. + + +The problem +=========== + +NAND flash (at least SLC one) typically has sectors of 256 bytes. +However NAND flash is not extremely reliable so some error detection +(and sometimes correction) is needed. + +This is done by means of a Hamming code. I'll try to explain it in +laymans terms (and apologies to all the pro's in the field in case I do +not use the right terminology, my coding theory class was almost 30 +years ago, and I must admit it was not one of my favourites). + +As I said before the ecc calculation is performed on sectors of 256 +bytes. This is done by calculating several parity bits over the rows and +columns. The parity used is even parity which means that the parity bit = 1 +if the data over which the parity is calculated is 1 and the parity bit = 0 +if the data over which the parity is calculated is 0. So the total +number of bits over the data over which the parity is calculated + the +parity bit is even. (see wikipedia if you can't follow this). +Parity is often calculated by means of an exclusive or operation, +sometimes also referred to as xor. In C the operator for xor is ^ + +Back to ecc. +Let's give a small figure: + +========= ==== ==== ==== ==== ==== ==== ==== ==== === === === === ==== +byte 0: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp4 ... rp14 +byte 1: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp2 rp4 ... rp14 +byte 2: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp4 ... rp14 +byte 3: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp4 ... rp14 +byte 4: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp5 ... rp14 +... +byte 254: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp5 ... rp15 +byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15 + cp1 cp0 cp1 cp0 cp1 cp0 cp1 cp0 + cp3 cp3 cp2 cp2 cp3 cp3 cp2 cp2 + cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4 +========= ==== ==== ==== ==== ==== ==== ==== ==== === === === === ==== + +This figure represents a sector of 256 bytes. +cp is my abbreviation for column parity, rp for row parity. + +Let's start to explain column parity. + +- cp0 is the parity that belongs to all bit0, bit2, bit4, bit6. + + so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even. + +Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7. + +- cp2 is the parity over bit0, bit1, bit4 and bit5 +- cp3 is the parity over bit2, bit3, bit6 and bit7. +- cp4 is the parity over bit0, bit1, bit2 and bit3. +- cp5 is the parity over bit4, bit5, bit6 and bit7. + +Note that each of cp0 .. cp5 is exactly one bit. + +Row parity actually works almost the same. + +- rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254) +- rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255) +- rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ... + (so handle two bytes, then skip 2 bytes). +- rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...) +- for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc. + + so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...) +- and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, .. + +The story now becomes quite boring. I guess you get the idea. + +- rp6 covers 8 bytes then skips 8 etc +- rp7 skips 8 bytes then covers 8 etc +- rp8 covers 16 bytes then skips 16 etc +- rp9 skips 16 bytes then covers 16 etc +- rp10 covers 32 bytes then skips 32 etc +- rp11 skips 32 bytes then covers 32 etc +- rp12 covers 64 bytes then skips 64 etc +- rp13 skips 64 bytes then covers 64 etc +- rp14 covers 128 bytes then skips 128 +- rp15 skips 128 bytes then covers 128 + +In the end the parity bits are grouped together in three bytes as +follows: + +===== ===== ===== ===== ===== ===== ===== ===== ===== +ECC Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0 +===== ===== ===== ===== ===== ===== ===== ===== ===== +ECC 0 rp07 rp06 rp05 rp04 rp03 rp02 rp01 rp00 +ECC 1 rp15 rp14 rp13 rp12 rp11 rp10 rp09 rp08 +ECC 2 cp5 cp4 cp3 cp2 cp1 cp0 1 1 +===== ===== ===== ===== ===== ===== ===== ===== ===== + +I detected after writing this that ST application note AN1823 +(http://www.st.com/stonline/) gives a much +nicer picture.(but they use line parity as term where I use row parity) +Oh well, I'm graphically challenged, so suffer with me for a moment :-) + +And I could not reuse the ST picture anyway for copyright reasons. + + +Attempt 0 +========= + +Implementing the parity calculation is pretty simple. +In C pseudocode:: + + for (i = 0; i < 256; i++) + { + if (i & 0x01) + rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1; + else + rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0; + if (i & 0x02) + rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3; + else + rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2; + if (i & 0x04) + rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5; + else + rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4; + if (i & 0x08) + rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7; + else + rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6; + if (i & 0x10) + rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9; + else + rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8; + if (i & 0x20) + rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11; + else + rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10; + if (i & 0x40) + rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13; + else + rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12; + if (i & 0x80) + rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15; + else + rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14; + cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0; + cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1; + cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2; + cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3 + cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4 + cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5 + } + + +Analysis 0 +========== + +C does have bitwise operators but not really operators to do the above +efficiently (and most hardware has no such instructions either). +Therefore without implementing this it was clear that the code above was +not going to bring me a Nobel prize :-) + +Fortunately the exclusive or operation is commutative, so we can combine +the values in any order. So instead of calculating all the bits +individually, let us try to rearrange things. +For the column parity this is easy. We can just xor the bytes and in the +end filter out the relevant bits. This is pretty nice as it will bring +all cp calculation out of the for loop. + +Similarly we can first xor the bytes for the various rows. +This leads to: + + +Attempt 1 +========= + +:: + + const char parity[256] = { + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 + }; + + void ecc1(const unsigned char *buf, unsigned char *code) + { + int i; + const unsigned char *bp = buf; + unsigned char cur; + unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7; + unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15; + unsigned char par; + + par = 0; + rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0; + rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0; + rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0; + rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0; + + for (i = 0; i < 256; i++) + { + cur = *bp++; + par ^= cur; + if (i & 0x01) rp1 ^= cur; else rp0 ^= cur; + if (i & 0x02) rp3 ^= cur; else rp2 ^= cur; + if (i & 0x04) rp5 ^= cur; else rp4 ^= cur; + if (i & 0x08) rp7 ^= cur; else rp6 ^= cur; + if (i & 0x10) rp9 ^= cur; else rp8 ^= cur; + if (i & 0x20) rp11 ^= cur; else rp10 ^= cur; + if (i & 0x40) rp13 ^= cur; else rp12 ^= cur; + if (i & 0x80) rp15 ^= cur; else rp14 ^= cur; + } + code[0] = + (parity[rp7] << 7) | + (parity[rp6] << 6) | + (parity[rp5] << 5) | + (parity[rp4] << 4) | + (parity[rp3] << 3) | + (parity[rp2] << 2) | + (parity[rp1] << 1) | + (parity[rp0]); + code[1] = + (parity[rp15] << 7) | + (parity[rp14] << 6) | + (parity[rp13] << 5) | + (parity[rp12] << 4) | + (parity[rp11] << 3) | + (parity[rp10] << 2) | + (parity[rp9] << 1) | + (parity[rp8]); + code[2] = + (parity[par & 0xf0] << 7) | + (parity[par & 0x0f] << 6) | + (parity[par & 0xcc] << 5) | + (parity[par & 0x33] << 4) | + (parity[par & 0xaa] << 3) | + (parity[par & 0x55] << 2); + code[0] = ~code[0]; + code[1] = ~code[1]; + code[2] = ~code[2]; + } + +Still pretty straightforward. The last three invert statements are there to +give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash +all data is 0xff, so the checksum then matches. + +I also introduced the parity lookup. I expected this to be the fastest +way to calculate the parity, but I will investigate alternatives later +on. + + +Analysis 1 +========== + +The code works, but is not terribly efficient. On my system it took +almost 4 times as much time as the linux driver code. But hey, if it was +*that* easy this would have been done long before. +No pain. no gain. + +Fortunately there is plenty of room for improvement. + +In step 1 we moved from bit-wise calculation to byte-wise calculation. +However in C we can also use the unsigned long data type and virtually +every modern microprocessor supports 32 bit operations, so why not try +to write our code in such a way that we process data in 32 bit chunks. + +Of course this means some modification as the row parity is byte by +byte. A quick analysis: +for the column parity we use the par variable. When extending to 32 bits +we can in the end easily calculate rp0 and rp1 from it. +(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0 +respectively, from MSB to LSB) +also rp2 and rp3 can be easily retrieved from par as rp3 covers the +first two MSBs and rp2 covers the last two LSBs. + +Note that of course now the loop is executed only 64 times (256/4). +And note that care must taken wrt byte ordering. The way bytes are +ordered in a long is machine dependent, and might affect us. +Anyway, if there is an issue: this code is developed on x86 (to be +precise: a DELL PC with a D920 Intel CPU) + +And of course the performance might depend on alignment, but I expect +that the I/O buffers in the nand driver are aligned properly (and +otherwise that should be fixed to get maximum performance). + +Let's give it a try... + + +Attempt 2 +========= + +:: + + extern const char parity[256]; + + void ecc2(const unsigned char *buf, unsigned char *code) + { + int i; + const unsigned long *bp = (unsigned long *)buf; + unsigned long cur; + unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7; + unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15; + unsigned long par; + + par = 0; + rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0; + rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0; + rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0; + rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0; + + for (i = 0; i < 64; i++) + { + cur = *bp++; + par ^= cur; + if (i & 0x01) rp5 ^= cur; else rp4 ^= cur; + if (i & 0x02) rp7 ^= cur; else rp6 ^= cur; + if (i & 0x04) rp9 ^= cur; else rp8 ^= cur; + if (i & 0x08) rp11 ^= cur; else rp10 ^= cur; + if (i & 0x10) rp13 ^= cur; else rp12 ^= cur; + if (i & 0x20) rp15 ^= cur; else rp14 ^= cur; + } + /* + we need to adapt the code generation for the fact that rp vars are now + long; also the column parity calculation needs to be changed. + we'll bring rp4 to 15 back to single byte entities by shifting and + xoring + */ + rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff; + rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff; + rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff; + rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff; + rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff; + rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff; + rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff; + rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff; + rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff; + rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff; + rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff; + rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff; + rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff; + rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff; + par ^= (par >> 16); + rp1 = (par >> 8); rp1 &= 0xff; + rp0 = (par & 0xff); + par ^= (par >> 8); par &= 0xff; + + code[0] = + (parity[rp7] << 7) | + (parity[rp6] << 6) | + (parity[rp5] << 5) | + (parity[rp4] << 4) | + (parity[rp3] << 3) | + (parity[rp2] << 2) | + (parity[rp1] << 1) | + (parity[rp0]); + code[1] = + (parity[rp15] << 7) | + (parity[rp14] << 6) | + (parity[rp13] << 5) | + (parity[rp12] << 4) | + (parity[rp11] << 3) | + (parity[rp10] << 2) | + (parity[rp9] << 1) | + (parity[rp8]); + code[2] = + (parity[par & 0xf0] << 7) | + (parity[par & 0x0f] << 6) | + (parity[par & 0xcc] << 5) | + (parity[par & 0x33] << 4) | + (parity[par & 0xaa] << 3) | + (parity[par & 0x55] << 2); + code[0] = ~code[0]; + code[1] = ~code[1]; + code[2] = ~code[2]; + } + +The parity array is not shown any more. Note also that for these +examples I kinda deviated from my regular programming style by allowing +multiple statements on a line, not using { } in then and else blocks +with only a single statement and by using operators like ^= + + +Analysis 2 +========== + +The code (of course) works, and hurray: we are a little bit faster than +the linux driver code (about 15%). But wait, don't cheer too quickly. +There is more to be gained. +If we look at e.g. rp14 and rp15 we see that we either xor our data with +rp14 or with rp15. However we also have par which goes over all data. +This means there is no need to calculate rp14 as it can be calculated from +rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15; +(or if desired we can avoid calculating rp15 and calculate it from +rp14). That is why some places refer to inverse parity. +Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13. +Effectively this means we can eliminate the else clause from the if +statements. Also we can optimise the calculation in the end a little bit +by going from long to byte first. Actually we can even avoid the table +lookups + +Attempt 3 +========= + +Odd replaced:: + + if (i & 0x01) rp5 ^= cur; else rp4 ^= cur; + if (i & 0x02) rp7 ^= cur; else rp6 ^= cur; + if (i & 0x04) rp9 ^= cur; else rp8 ^= cur; + if (i & 0x08) rp11 ^= cur; else rp10 ^= cur; + if (i & 0x10) rp13 ^= cur; else rp12 ^= cur; + if (i & 0x20) rp15 ^= cur; else rp14 ^= cur; + +with:: + + if (i & 0x01) rp5 ^= cur; + if (i & 0x02) rp7 ^= cur; + if (i & 0x04) rp9 ^= cur; + if (i & 0x08) rp11 ^= cur; + if (i & 0x10) rp13 ^= cur; + if (i & 0x20) rp15 ^= cur; + +and outside the loop added:: + + rp4 = par ^ rp5; + rp6 = par ^ rp7; + rp8 = par ^ rp9; + rp10 = par ^ rp11; + rp12 = par ^ rp13; + rp14 = par ^ rp15; + +And after that the code takes about 30% more time, although the number of +statements is reduced. This is also reflected in the assembly code. + + +Analysis 3 +========== + +Very weird. Guess it has to do with caching or instruction parallellism +or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting +observation was that this one is only 30% slower (according to time) +executing the code as my 3Ghz D920 processor. + +Well, it was expected not to be easy so maybe instead move to a +different track: let's move back to the code from attempt2 and do some +loop unrolling. This will eliminate a few if statements. I'll try +different amounts of unrolling to see what works best. + + +Attempt 4 +========= + +Unrolled the loop 1, 2, 3 and 4 times. +For 4 the code starts with:: + + for (i = 0; i < 4; i++) + { + cur = *bp++; + par ^= cur; + rp4 ^= cur; + rp6 ^= cur; + rp8 ^= cur; + rp10 ^= cur; + if (i & 0x1) rp13 ^= cur; else rp12 ^= cur; + if (i & 0x2) rp15 ^= cur; else rp14 ^= cur; + cur = *bp++; + par ^= cur; + rp5 ^= cur; + rp6 ^= cur; + ... + + +Analysis 4 +========== + +Unrolling once gains about 15% + +Unrolling twice keeps the gain at about 15% + +Unrolling three times gives a gain of 30% compared to attempt 2. + +Unrolling four times gives a marginal improvement compared to unrolling +three times. + +I decided to proceed with a four time unrolled loop anyway. It was my gut +feeling that in the next steps I would obtain additional gain from it. + +The next step was triggered by the fact that par contains the xor of all +bytes and rp4 and rp5 each contain the xor of half of the bytes. +So in effect par = rp4 ^ rp5. But as xor is commutative we can also say +that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can +eliminate rp5 (or rp4, but I already foresaw another optimisation). +The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15. + + +Attempt 5 +========= + +Effectively so all odd digit rp assignments in the loop were removed. +This included the else clause of the if statements. +Of course after the loop we need to correct things by adding code like:: + + rp5 = par ^ rp4; + +Also the initial assignments (rp5 = 0; etc) could be removed. +Along the line I also removed the initialisation of rp0/1/2/3. + + +Analysis 5 +========== + +Measurements showed this was a good move. The run-time roughly halved +compared with attempt 4 with 4 times unrolled, and we only require 1/3rd +of the processor time compared to the current code in the linux kernel. + +However, still I thought there was more. I didn't like all the if +statements. Why not keep a running parity and only keep the last if +statement. Time for yet another version! + + +Attempt 6 +========= + +THe code within the for loop was changed to:: + + for (i = 0; i < 4; i++) + { + cur = *bp++; tmppar = cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= tmppar; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp8 ^= tmppar; + + cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; + + cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur; + cur = *bp++; tmppar ^= cur; rp8 ^= cur; + + cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; + + par ^= tmppar; + if ((i & 0x1) == 0) rp12 ^= tmppar; + if ((i & 0x2) == 0) rp14 ^= tmppar; + } + +As you can see tmppar is used to accumulate the parity within a for +iteration. In the last 3 statements is added to par and, if needed, +to rp12 and rp14. + +While making the changes I also found that I could exploit that tmppar +contains the running parity for this iteration. So instead of having: +rp4 ^= cur; rp6 ^= cur; +I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next +statement. A similar change was done for rp8 and rp10 + + +Analysis 6 +========== + +Measuring this code again showed big gain. When executing the original +linux code 1 million times, this took about 1 second on my system. +(using time to measure the performance). After this iteration I was back +to 0.075 sec. Actually I had to decide to start measuring over 10 +million iterations in order not to lose too much accuracy. This one +definitely seemed to be the jackpot! + +There is a little bit more room for improvement though. There are three +places with statements:: + + rp4 ^= cur; rp6 ^= cur; + +It seems more efficient to also maintain a variable rp4_6 in the while +loop; This eliminates 3 statements per loop. Of course after the loop we +need to correct by adding:: + + rp4 ^= rp4_6; + rp6 ^= rp4_6 + +Furthermore there are 4 sequential assignments to rp8. This can be +encoded slightly more efficiently by saving tmppar before those 4 lines +and later do rp8 = rp8 ^ tmppar ^ notrp8; +(where notrp8 is the value of rp8 before those 4 lines). +Again a use of the commutative property of xor. +Time for a new test! + + +Attempt 7 +========= + +The new code now looks like:: + + for (i = 0; i < 4; i++) + { + cur = *bp++; tmppar = cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= tmppar; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp8 ^= tmppar; + + cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; + + notrp8 = tmppar; + cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; + rp8 = rp8 ^ tmppar ^ notrp8; + + cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; + cur = *bp++; tmppar ^= cur; rp6 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; + + par ^= tmppar; + if ((i & 0x1) == 0) rp12 ^= tmppar; + if ((i & 0x2) == 0) rp14 ^= tmppar; + } + rp4 ^= rp4_6; + rp6 ^= rp4_6; + + +Not a big change, but every penny counts :-) + + +Analysis 7 +========== + +Actually this made things worse. Not very much, but I don't want to move +into the wrong direction. Maybe something to investigate later. Could +have to do with caching again. + +Guess that is what there is to win within the loop. Maybe unrolling one +more time will help. I'll keep the optimisations from 7 for now. + + +Attempt 8 +========= + +Unrolled the loop one more time. + + +Analysis 8 +========== + +This makes things worse. Let's stick with attempt 6 and continue from there. +Although it seems that the code within the loop cannot be optimised +further there is still room to optimize the generation of the ecc codes. +We can simply calculate the total parity. If this is 0 then rp4 = rp5 +etc. If the parity is 1, then rp4 = !rp5; + +But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits +in the result byte and then do something like:: + + code[0] |= (code[0] << 1); + +Lets test this. + + +Attempt 9 +========= + +Changed the code but again this slightly degrades performance. Tried all +kind of other things, like having dedicated parity arrays to avoid the +shift after parity[rp7] << 7; No gain. +Change the lookup using the parity array by using shift operators (e.g. +replace parity[rp7] << 7 with:: + + rp7 ^= (rp7 << 4); + rp7 ^= (rp7 << 2); + rp7 ^= (rp7 << 1); + rp7 &= 0x80; + +No gain. + +The only marginal change was inverting the parity bits, so we can remove +the last three invert statements. + +Ah well, pity this does not deliver more. Then again 10 million +iterations using the linux driver code takes between 13 and 13.5 +seconds, whereas my code now takes about 0.73 seconds for those 10 +million iterations. So basically I've improved the performance by a +factor 18 on my system. Not that bad. Of course on different hardware +you will get different results. No warranties! + +But of course there is no such thing as a free lunch. The codesize almost +tripled (from 562 bytes to 1434 bytes). Then again, it is not that much. + + +Correcting errors +================= + +For correcting errors I again used the ST application note as a starter, +but I also peeked at the existing code. + +The algorithm itself is pretty straightforward. Just xor the given and +the calculated ecc. If all bytes are 0 there is no problem. If 11 bits +are 1 we have one correctable bit error. If there is 1 bit 1, we have an +error in the given ecc code. + +It proved to be fastest to do some table lookups. Performance gain +introduced by this is about a factor 2 on my system when a repair had to +be done, and 1% or so if no repair had to be done. + +Code size increased from 330 bytes to 686 bytes for this function. +(gcc 4.2, -O3) + + +Conclusion +========== + +The gain when calculating the ecc is tremendous. Om my development hardware +a speedup of a factor of 18 for ecc calculation was achieved. On a test on an +embedded system with a MIPS core a factor 7 was obtained. + +On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor +5 (big endian mode, gcc 4.1.2, -O3) + +For correction not much gain could be obtained (as bitflips are rare). Then +again there are also much less cycles spent there. + +It seems there is not much more gain possible in this, at least when +programmed in C. Of course it might be possible to squeeze something more +out of it with an assembler program, but due to pipeline behaviour etc +this is very tricky (at least for intel hw). + +Author: Frans Meulenbroeks + +Copyright (C) 2008 Koninklijke Philips Electronics NV. diff --git a/Documentation/driver-api/mtd/spi-intel.rst b/Documentation/driver-api/mtd/spi-intel.rst new file mode 100644 index 0000000000..df854f20ea --- /dev/null +++ b/Documentation/driver-api/mtd/spi-intel.rst @@ -0,0 +1,90 @@ +============================== +Upgrading BIOS using spi-intel +============================== + +Many Intel CPUs like Baytrail and Braswell include SPI serial flash host +controller which is used to hold BIOS and other platform specific data. +Since contents of the SPI serial flash is crucial for machine to function, +it is typically protected by different hardware protection mechanisms to +avoid accidental (or on purpose) overwrite of the content. + +Not all manufacturers protect the SPI serial flash, mainly because it +allows upgrading the BIOS image directly from an OS. + +The spi-intel driver makes it possible to read and write the SPI serial +flash, if certain protection bits are not set and locked. If it finds +any of them set, the whole MTD device is made read-only to prevent +partial overwrites. By default the driver exposes SPI serial flash +contents as read-only but it can be changed from kernel command line, +passing "spi_intel.writeable=1". + +Please keep in mind that overwriting the BIOS image on SPI serial flash +might render the machine unbootable and requires special equipment like +Dediprog to revive. You have been warned! + +Below are the steps how to upgrade MinnowBoard MAX BIOS directly from +Linux. + + 1) Download and extract the latest Minnowboard MAX BIOS SPI image + [1]. At the time writing this the latest image is v92. + + 2) Install mtd-utils package [2]. We need this in order to erase the SPI + serial flash. Distros like Debian and Fedora have this prepackaged with + name "mtd-utils". + + 3) Add "spi_intel.writeable=1" to the kernel command line and reboot + the board (you can also reload the driver passing "writeable=1" as + module parameter to modprobe). + + 4) Once the board is up and running again, find the right MTD partition + (it is named as "BIOS"):: + + # cat /proc/mtd + dev: size erasesize name + mtd0: 00800000 00001000 "BIOS" + + So here it will be /dev/mtd0 but it may vary. + + 5) Make backup of the existing image first:: + + # dd if=/dev/mtd0ro of=bios.bak + 16384+0 records in + 16384+0 records out + 8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s + + 6) Verify the backup:: + + # sha1sum /dev/mtd0ro bios.bak + fdbb011920572ca6c991377c4b418a0502668b73 /dev/mtd0ro + fdbb011920572ca6c991377c4b418a0502668b73 bios.bak + + The SHA1 sums must match. Otherwise do not continue any further! + + 7) Erase the SPI serial flash. After this step, do not reboot the + board! Otherwise it will not start anymore:: + + # flash_erase /dev/mtd0 0 0 + Erasing 4 Kibyte @ 7ff000 -- 100 % complete + + 8) Once completed without errors you can write the new BIOS image:: + + # dd if=MNW2MAX1.X64.0092.R01.1605221712.bin of=/dev/mtd0 + + 9) Verify that the new content of the SPI serial flash matches the new + BIOS image:: + + # sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin + 9b4df9e4be2057fceec3a5529ec3d950836c87a2 /dev/mtd0ro + 9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin + + The SHA1 sums should match. + + 10) Now you can reboot your board and observe the new BIOS starting up + properly. + +References +---------- + +[1] https://firmware.intel.com/sites/default/files/MinnowBoard%2EMAX_%2EX64%2E92%2ER01%2Ezip + +[2] http://www.linux-mtd.infradead.org/ diff --git a/Documentation/driver-api/mtd/spi-nor.rst b/Documentation/driver-api/mtd/spi-nor.rst new file mode 100644 index 0000000000..c22f8c0f79 --- /dev/null +++ b/Documentation/driver-api/mtd/spi-nor.rst @@ -0,0 +1,65 @@ +================= +SPI NOR framework +================= + +Part I - Why do we need this framework? +--------------------------------------- + +SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus +controller operates agnostic of the specific device attached. However, some +controllers (such as Freescale's QuadSPI controller) cannot easily handle +arbitrary streams of bytes, but rather are designed specifically for SPI NOR. + +In particular, Freescale's QuadSPI controller must know the NOR commands to +find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of +opcodes, addresses, or data payloads; a SPI controller simply knows to send or +receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under +which the controller driver is aware of the opcodes, addressing, and other +details of the SPI NOR protocol. + +Part II - How does the framework work? +-------------------------------------- + +This framework just adds a new layer between the MTD and the SPI bus driver. +With this new layer, the SPI NOR controller driver does not depend on the +m25p80 code anymore. + +Before this framework, the layer is like:: + + MTD + ------------------------ + m25p80 + ------------------------ + SPI bus driver + ------------------------ + SPI NOR chip + +After this framework, the layer is like:: + + MTD + ------------------------ + SPI NOR framework + ------------------------ + m25p80 + ------------------------ + SPI bus driver + ------------------------ + SPI NOR chip + +With the SPI NOR controller driver (Freescale QuadSPI), it looks like:: + + MTD + ------------------------ + SPI NOR framework + ------------------------ + fsl-quadSPI + ------------------------ + SPI NOR chip + +Part III - How can drivers use the framework? +--------------------------------------------- + +The main API is spi_nor_scan(). Before you call the hook, a driver should +initialize the necessary fields for spi_nor{}. Please see +drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to spi-fsl-qspi.c +when you want to write a new driver for a SPI NOR controller. diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst new file mode 100644 index 0000000000..ce77e024c4 --- /dev/null +++ b/Documentation/driver-api/mtdnand.rst @@ -0,0 +1,1006 @@ +===================================== +MTD NAND Driver Programming Interface +===================================== + +:Author: Thomas Gleixner + +Introduction +============ + +The generic NAND driver supports almost all NAND and AG-AND based chips +and connects them to the Memory Technology Devices (MTD) subsystem of +the Linux Kernel. + +This documentation is provided for developers who want to implement +board drivers or filesystem drivers suitable for NAND devices. + +Known Bugs And Assumptions +========================== + +None. + +Documentation hints +=================== + +The function and structure docs are autogenerated. Each function and +struct member has a short description which is marked with an [XXX] +identifier. The following chapters explain the meaning of those +identifiers. + +Function identifiers [XXX] +-------------------------- + +The functions are marked with [XXX] identifiers in the short comment. +The identifiers explain the usage and scope of the functions. Following +identifiers are used: + +- [MTD Interface] + + These functions provide the interface to the MTD kernel API. They are + not replaceable and provide functionality which is complete hardware + independent. + +- [NAND Interface] + + These functions are exported and provide the interface to the NAND + kernel API. + +- [GENERIC] + + Generic functions are not replaceable and provide functionality which + is complete hardware independent. + +- [DEFAULT] + + Default functions provide hardware related functionality which is + suitable for most of the implementations. These functions can be + replaced by the board driver if necessary. Those functions are called + via pointers in the NAND chip description structure. The board driver + can set the functions which should be replaced by board dependent + functions before calling nand_scan(). If the function pointer is + NULL on entry to nand_scan() then the pointer is set to the default + function which is suitable for the detected chip type. + +Struct member identifiers [XXX] +------------------------------- + +The struct members are marked with [XXX] identifiers in the comment. The +identifiers explain the usage and scope of the members. Following +identifiers are used: + +- [INTERN] + + These members are for NAND driver internal use only and must not be + modified. Most of these values are calculated from the chip geometry + information which is evaluated during nand_scan(). + +- [REPLACEABLE] + + Replaceable members hold hardware related functions which can be + provided by the board driver. The board driver can set the functions + which should be replaced by board dependent functions before calling + nand_scan(). If the function pointer is NULL on entry to + nand_scan() then the pointer is set to the default function which is + suitable for the detected chip type. + +- [BOARDSPECIFIC] + + Board specific members hold hardware related information which must + be provided by the board driver. The board driver must set the + function pointers and datafields before calling nand_scan(). + +- [OPTIONAL] + + Optional members can hold information relevant for the board driver. + The generic NAND driver code does not use this information. + +Basic board driver +================== + +For most boards it will be sufficient to provide just the basic +functions and fill out some really board dependent members in the nand +chip description structure. + +Basic defines +------------- + +At least you have to provide a nand_chip structure and a storage for +the ioremap'ed chip address. You can allocate the nand_chip structure +using kmalloc or you can allocate it statically. The NAND chip structure +embeds an mtd structure which will be registered to the MTD subsystem. +You can extract a pointer to the mtd structure from a nand_chip pointer +using the nand_to_mtd() helper. + +Kmalloc based example + +:: + + static struct mtd_info *board_mtd; + static void __iomem *baseaddr; + + +Static example + +:: + + static struct nand_chip board_chip; + static void __iomem *baseaddr; + + +Partition defines +----------------- + +If you want to divide your device into partitions, then define a +partitioning scheme suitable to your board. + +:: + + #define NUM_PARTITIONS 2 + static struct mtd_partition partition_info[] = { + { .name = "Flash partition 1", + .offset = 0, + .size = 8 * 1024 * 1024 }, + { .name = "Flash partition 2", + .offset = MTDPART_OFS_NEXT, + .size = MTDPART_SIZ_FULL }, + }; + + +Hardware control function +------------------------- + +The hardware control function provides access to the control pins of the +NAND chip(s). The access can be done by GPIO pins or by address lines. +If you use address lines, make sure that the timing requirements are +met. + +*GPIO based example* + +:: + + static void board_hwcontrol(struct mtd_info *mtd, int cmd) + { + switch(cmd){ + case NAND_CTL_SETCLE: /* Set CLE pin high */ break; + case NAND_CTL_CLRCLE: /* Set CLE pin low */ break; + case NAND_CTL_SETALE: /* Set ALE pin high */ break; + case NAND_CTL_CLRALE: /* Set ALE pin low */ break; + case NAND_CTL_SETNCE: /* Set nCE pin low */ break; + case NAND_CTL_CLRNCE: /* Set nCE pin high */ break; + } + } + + +*Address lines based example.* It's assumed that the nCE pin is driven +by a chip select decoder. + +:: + + static void board_hwcontrol(struct mtd_info *mtd, int cmd) + { + struct nand_chip *this = mtd_to_nand(mtd); + switch(cmd){ + case NAND_CTL_SETCLE: this->legacy.IO_ADDR_W |= CLE_ADRR_BIT; break; + case NAND_CTL_CLRCLE: this->legacy.IO_ADDR_W &= ~CLE_ADRR_BIT; break; + case NAND_CTL_SETALE: this->legacy.IO_ADDR_W |= ALE_ADRR_BIT; break; + case NAND_CTL_CLRALE: this->legacy.IO_ADDR_W &= ~ALE_ADRR_BIT; break; + } + } + + +Device ready function +--------------------- + +If the hardware interface has the ready busy pin of the NAND chip +connected to a GPIO or other accessible I/O pin, this function is used +to read back the state of the pin. The function has no arguments and +should return 0, if the device is busy (R/B pin is low) and 1, if the +device is ready (R/B pin is high). If the hardware interface does not +give access to the ready busy pin, then the function must not be defined +and the function pointer this->legacy.dev_ready is set to NULL. + +Init function +------------- + +The init function allocates memory and sets up all the board specific +parameters and function pointers. When everything is set up nand_scan() +is called. This function tries to detect and identify then chip. If a +chip is found all the internal data fields are initialized accordingly. +The structure(s) have to be zeroed out first and then filled with the +necessary information about the device. + +:: + + static int __init board_init (void) + { + struct nand_chip *this; + int err = 0; + + /* Allocate memory for MTD device structure and private data */ + this = kzalloc(sizeof(struct nand_chip), GFP_KERNEL); + if (!this) { + printk ("Unable to allocate NAND MTD device structure.\n"); + err = -ENOMEM; + goto out; + } + + board_mtd = nand_to_mtd(this); + + /* map physical address */ + baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024); + if (!baseaddr) { + printk("Ioremap to access NAND chip failed\n"); + err = -EIO; + goto out_mtd; + } + + /* Set address of NAND IO lines */ + this->legacy.IO_ADDR_R = baseaddr; + this->legacy.IO_ADDR_W = baseaddr; + /* Reference hardware control function */ + this->hwcontrol = board_hwcontrol; + /* Set command delay time, see datasheet for correct value */ + this->legacy.chip_delay = CHIP_DEPENDEND_COMMAND_DELAY; + /* Assign the device ready function, if available */ + this->legacy.dev_ready = board_dev_ready; + this->eccmode = NAND_ECC_SOFT; + + /* Scan to find existence of the device */ + if (nand_scan (this, 1)) { + err = -ENXIO; + goto out_ior; + } + + add_mtd_partitions(board_mtd, partition_info, NUM_PARTITIONS); + goto out; + + out_ior: + iounmap(baseaddr); + out_mtd: + kfree (this); + out: + return err; + } + module_init(board_init); + + +Exit function +------------- + +The exit function is only necessary if the driver is compiled as a +module. It releases all resources which are held by the chip driver and +unregisters the partitions in the MTD layer. + +:: + + #ifdef MODULE + static void __exit board_cleanup (void) + { + /* Unregister device */ + WARN_ON(mtd_device_unregister(board_mtd)); + /* Release resources */ + nand_cleanup(mtd_to_nand(board_mtd)); + + /* unmap physical address */ + iounmap(baseaddr); + + /* Free the MTD device structure */ + kfree (mtd_to_nand(board_mtd)); + } + module_exit(board_cleanup); + #endif + + +Advanced board driver functions +=============================== + +This chapter describes the advanced functionality of the NAND driver. +For a list of functions which can be overridden by the board driver see +the documentation of the nand_chip structure. + +Multiple chip control +--------------------- + +The nand driver can control chip arrays. Therefore the board driver must +provide an own select_chip function. This function must (de)select the +requested chip. The function pointer in the nand_chip structure must be +set before calling nand_scan(). The maxchip parameter of nand_scan() +defines the maximum number of chips to scan for. Make sure that the +select_chip function can handle the requested number of chips. + +The nand driver concatenates the chips to one virtual chip and provides +this virtual chip to the MTD layer. + +*Note: The driver can only handle linear chip arrays of equally sized +chips. There is no support for parallel arrays which extend the +buswidth.* + +*GPIO based example* + +:: + + static void board_select_chip (struct mtd_info *mtd, int chip) + { + /* Deselect all chips, set all nCE pins high */ + GPIO(BOARD_NAND_NCE) |= 0xff; + if (chip >= 0) + GPIO(BOARD_NAND_NCE) &= ~ (1 << chip); + } + + +*Address lines based example.* Its assumed that the nCE pins are +connected to an address decoder. + +:: + + static void board_select_chip (struct mtd_info *mtd, int chip) + { + struct nand_chip *this = mtd_to_nand(mtd); + + /* Deselect all chips */ + this->legacy.IO_ADDR_R &= ~BOARD_NAND_ADDR_MASK; + this->legacy.IO_ADDR_W &= ~BOARD_NAND_ADDR_MASK; + switch (chip) { + case 0: + this->legacy.IO_ADDR_R |= BOARD_NAND_ADDR_CHIP0; + this->legacy.IO_ADDR_W |= BOARD_NAND_ADDR_CHIP0; + break; + .... + case n: + this->legacy.IO_ADDR_R |= BOARD_NAND_ADDR_CHIPn; + this->legacy.IO_ADDR_W |= BOARD_NAND_ADDR_CHIPn; + break; + } + } + + +Hardware ECC support +-------------------- + +Functions and constants +~~~~~~~~~~~~~~~~~~~~~~~ + +The nand driver supports three different types of hardware ECC. + +- NAND_ECC_HW3_256 + + Hardware ECC generator providing 3 bytes ECC per 256 byte. + +- NAND_ECC_HW3_512 + + Hardware ECC generator providing 3 bytes ECC per 512 byte. + +- NAND_ECC_HW6_512 + + Hardware ECC generator providing 6 bytes ECC per 512 byte. + +- NAND_ECC_HW8_512 + + Hardware ECC generator providing 8 bytes ECC per 512 byte. + +If your hardware generator has a different functionality add it at the +appropriate place in nand_base.c + +The board driver must provide following functions: + +- enable_hwecc + + This function is called before reading / writing to the chip. Reset + or initialize the hardware generator in this function. The function + is called with an argument which let you distinguish between read and + write operations. + +- calculate_ecc + + This function is called after read / write from / to the chip. + Transfer the ECC from the hardware to the buffer. If the option + NAND_HWECC_SYNDROME is set then the function is only called on + write. See below. + +- correct_data + + In case of an ECC error this function is called for error detection + and correction. Return 1 respectively 2 in case the error can be + corrected. If the error is not correctable return -1. If your + hardware generator matches the default algorithm of the nand_ecc + software generator then use the correction function provided by + nand_ecc instead of implementing duplicated code. + +Hardware ECC with syndrome calculation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many hardware ECC implementations provide Reed-Solomon codes and +calculate an error syndrome on read. The syndrome must be converted to a +standard Reed-Solomon syndrome before calling the error correction code +in the generic Reed-Solomon library. + +The ECC bytes must be placed immediately after the data bytes in order +to make the syndrome generator work. This is contrary to the usual +layout used by software ECC. The separation of data and out of band area +is not longer possible. The nand driver code handles this layout and the +remaining free bytes in the oob area are managed by the autoplacement +code. Provide a matching oob-layout in this case. See rts_from4.c and +diskonchip.c for implementation reference. In those cases we must also +use bad block tables on FLASH, because the ECC layout is interfering +with the bad block marker positions. See bad block table support for +details. + +Bad block table support +----------------------- + +Most NAND chips mark the bad blocks at a defined position in the spare +area. Those blocks must not be erased under any circumstances as the bad +block information would be lost. It is possible to check the bad block +mark each time when the blocks are accessed by reading the spare area of +the first page in the block. This is time consuming so a bad block table +is used. + +The nand driver supports various types of bad block tables. + +- Per device + + The bad block table contains all bad block information of the device + which can consist of multiple chips. + +- Per chip + + A bad block table is used per chip and contains the bad block + information for this particular chip. + +- Fixed offset + + The bad block table is located at a fixed offset in the chip + (device). This applies to various DiskOnChip devices. + +- Automatic placed + + The bad block table is automatically placed and detected either at + the end or at the beginning of a chip (device) + +- Mirrored tables + + The bad block table is mirrored on the chip (device) to allow updates + of the bad block table without data loss. + +nand_scan() calls the function nand_default_bbt(). +nand_default_bbt() selects appropriate default bad block table +descriptors depending on the chip information which was retrieved by +nand_scan(). + +The standard policy is scanning the device for bad blocks and build a +ram based bad block table which allows faster access than always +checking the bad block information on the flash chip itself. + +Flash based tables +~~~~~~~~~~~~~~~~~~ + +It may be desired or necessary to keep a bad block table in FLASH. For +AG-AND chips this is mandatory, as they have no factory marked bad +blocks. They have factory marked good blocks. The marker pattern is +erased when the block is erased to be reused. So in case of powerloss +before writing the pattern back to the chip this block would be lost and +added to the bad blocks. Therefore we scan the chip(s) when we detect +them the first time for good blocks and store this information in a bad +block table before erasing any of the blocks. + +The blocks in which the tables are stored are protected against +accidental access by marking them bad in the memory bad block table. The +bad block table management functions are allowed to circumvent this +protection. + +The simplest way to activate the FLASH based bad block table support is +to set the option NAND_BBT_USE_FLASH in the bbt_option field of the +nand chip structure before calling nand_scan(). For AG-AND chips is +this done by default. This activates the default FLASH based bad block +table functionality of the NAND driver. The default bad block table +options are + +- Store bad block table per chip + +- Use 2 bits per block + +- Automatic placement at the end of the chip + +- Use mirrored tables with version numbers + +- Reserve 4 blocks at the end of the chip + +User defined tables +~~~~~~~~~~~~~~~~~~~ + +User defined tables are created by filling out a nand_bbt_descr +structure and storing the pointer in the nand_chip structure member +bbt_td before calling nand_scan(). If a mirror table is necessary a +second structure must be created and a pointer to this structure must be +stored in bbt_md inside the nand_chip structure. If the bbt_md member +is set to NULL then only the main table is used and no scan for the +mirrored table is performed. + +The most important field in the nand_bbt_descr structure is the +options field. The options define most of the table properties. Use the +predefined constants from rawnand.h to define the options. + +- Number of bits per block + + The supported number of bits is 1, 2, 4, 8. + +- Table per chip + + Setting the constant NAND_BBT_PERCHIP selects that a bad block + table is managed for each chip in a chip array. If this option is not + set then a per device bad block table is used. + +- Table location is absolute + + Use the option constant NAND_BBT_ABSPAGE and define the absolute + page number where the bad block table starts in the field pages. If + you have selected bad block tables per chip and you have a multi chip + array then the start page must be given for each chip in the chip + array. Note: there is no scan for a table ident pattern performed, so + the fields pattern, veroffs, offs, len can be left uninitialized + +- Table location is automatically detected + + The table can either be located in the first or the last good blocks + of the chip (device). Set NAND_BBT_LASTBLOCK to place the bad block + table at the end of the chip (device). The bad block tables are + marked and identified by a pattern which is stored in the spare area + of the first page in the block which holds the bad block table. Store + a pointer to the pattern in the pattern field. Further the length of + the pattern has to be stored in len and the offset in the spare area + must be given in the offs member of the nand_bbt_descr structure. + For mirrored bad block tables different patterns are mandatory. + +- Table creation + + Set the option NAND_BBT_CREATE to enable the table creation if no + table can be found during the scan. Usually this is done only once if + a new chip is found. + +- Table write support + + Set the option NAND_BBT_WRITE to enable the table write support. + This allows the update of the bad block table(s) in case a block has + to be marked bad due to wear. The MTD interface function + block_markbad is calling the update function of the bad block table. + If the write support is enabled then the table is updated on FLASH. + + Note: Write support should only be enabled for mirrored tables with + version control. + +- Table version control + + Set the option NAND_BBT_VERSION to enable the table version + control. It's highly recommended to enable this for mirrored tables + with write support. It makes sure that the risk of losing the bad + block table information is reduced to the loss of the information + about the one worn out block which should be marked bad. The version + is stored in 4 consecutive bytes in the spare area of the device. The + position of the version number is defined by the member veroffs in + the bad block table descriptor. + +- Save block contents on write + + In case that the block which holds the bad block table does contain + other useful information, set the option NAND_BBT_SAVECONTENT. When + the bad block table is written then the whole block is read the bad + block table is updated and the block is erased and everything is + written back. If this option is not set only the bad block table is + written and everything else in the block is ignored and erased. + +- Number of reserved blocks + + For automatic placement some blocks must be reserved for bad block + table storage. The number of reserved blocks is defined in the + maxblocks member of the bad block table description structure. + Reserving 4 blocks for mirrored tables should be a reasonable number. + This also limits the number of blocks which are scanned for the bad + block table ident pattern. + +Spare area (auto)placement +-------------------------- + +The nand driver implements different possibilities for placement of +filesystem data in the spare area, + +- Placement defined by fs driver + +- Automatic placement + +The default placement function is automatic placement. The nand driver +has built in default placement schemes for the various chiptypes. If due +to hardware ECC functionality the default placement does not fit then +the board driver can provide a own placement scheme. + +File system drivers can provide a own placement scheme which is used +instead of the default placement scheme. + +Placement schemes are defined by a nand_oobinfo structure + +:: + + struct nand_oobinfo { + int useecc; + int eccbytes; + int eccpos[24]; + int oobfree[8][2]; + }; + + +- useecc + + The useecc member controls the ecc and placement function. The header + file include/mtd/mtd-abi.h contains constants to select ecc and + placement. MTD_NANDECC_OFF switches off the ecc complete. This is + not recommended and available for testing and diagnosis only. + MTD_NANDECC_PLACE selects caller defined placement, + MTD_NANDECC_AUTOPLACE selects automatic placement. + +- eccbytes + + The eccbytes member defines the number of ecc bytes per page. + +- eccpos + + The eccpos array holds the byte offsets in the spare area where the + ecc codes are placed. + +- oobfree + + The oobfree array defines the areas in the spare area which can be + used for automatic placement. The information is given in the format + {offset, size}. offset defines the start of the usable area, size the + length in bytes. More than one area can be defined. The list is + terminated by an {0, 0} entry. + +Placement defined by fs driver +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The calling function provides a pointer to a nand_oobinfo structure +which defines the ecc placement. For writes the caller must provide a +spare area buffer along with the data buffer. The spare area buffer size +is (number of pages) \* (size of spare area). For reads the buffer size +is (number of pages) \* ((size of spare area) + (number of ecc steps per +page) \* sizeof (int)). The driver stores the result of the ecc check +for each tuple in the spare buffer. The storage sequence is:: + + <spare data page 0><ecc result 0>...<ecc result n> + + ... + + <spare data page n><ecc result 0>...<ecc result n> + +This is a legacy mode used by YAFFS1. + +If the spare area buffer is NULL then only the ECC placement is done +according to the given scheme in the nand_oobinfo structure. + +Automatic placement +~~~~~~~~~~~~~~~~~~~ + +Automatic placement uses the built in defaults to place the ecc bytes in +the spare area. If filesystem data have to be stored / read into the +spare area then the calling function must provide a buffer. The buffer +size per page is determined by the oobfree array in the nand_oobinfo +structure. + +If the spare area buffer is NULL then only the ECC placement is done +according to the default builtin scheme. + +Spare area autoplacement default schemes +---------------------------------------- + +256 byte pagesize +~~~~~~~~~~~~~~~~~ + +======== ================== =================================================== +Offset Content Comment +======== ================== =================================================== +0x00 ECC byte 0 Error correction code byte 0 +0x01 ECC byte 1 Error correction code byte 1 +0x02 ECC byte 2 Error correction code byte 2 +0x03 Autoplace 0 +0x04 Autoplace 1 +0x05 Bad block marker If any bit in this byte is zero, then this + block is bad. This applies only to the first + page in a block. In the remaining pages this + byte is reserved +0x06 Autoplace 2 +0x07 Autoplace 3 +======== ================== =================================================== + +512 byte pagesize +~~~~~~~~~~~~~~~~~ + + +============= ================== ============================================== +Offset Content Comment +============= ================== ============================================== +0x00 ECC byte 0 Error correction code byte 0 of the lower + 256 Byte data in this page +0x01 ECC byte 1 Error correction code byte 1 of the lower + 256 Bytes of data in this page +0x02 ECC byte 2 Error correction code byte 2 of the lower + 256 Bytes of data in this page +0x03 ECC byte 3 Error correction code byte 0 of the upper + 256 Bytes of data in this page +0x04 reserved reserved +0x05 Bad block marker If any bit in this byte is zero, then this + block is bad. This applies only to the first + page in a block. In the remaining pages this + byte is reserved +0x06 ECC byte 4 Error correction code byte 1 of the upper + 256 Bytes of data in this page +0x07 ECC byte 5 Error correction code byte 2 of the upper + 256 Bytes of data in this page +0x08 - 0x0F Autoplace 0 - 7 +============= ================== ============================================== + +2048 byte pagesize +~~~~~~~~~~~~~~~~~~ + +=========== ================== ================================================ +Offset Content Comment +=========== ================== ================================================ +0x00 Bad block marker If any bit in this byte is zero, then this block + is bad. This applies only to the first page in a + block. In the remaining pages this byte is + reserved +0x01 Reserved Reserved +0x02-0x27 Autoplace 0 - 37 +0x28 ECC byte 0 Error correction code byte 0 of the first + 256 Byte data in this page +0x29 ECC byte 1 Error correction code byte 1 of the first + 256 Bytes of data in this page +0x2A ECC byte 2 Error correction code byte 2 of the first + 256 Bytes data in this page +0x2B ECC byte 3 Error correction code byte 0 of the second + 256 Bytes of data in this page +0x2C ECC byte 4 Error correction code byte 1 of the second + 256 Bytes of data in this page +0x2D ECC byte 5 Error correction code byte 2 of the second + 256 Bytes of data in this page +0x2E ECC byte 6 Error correction code byte 0 of the third + 256 Bytes of data in this page +0x2F ECC byte 7 Error correction code byte 1 of the third + 256 Bytes of data in this page +0x30 ECC byte 8 Error correction code byte 2 of the third + 256 Bytes of data in this page +0x31 ECC byte 9 Error correction code byte 0 of the fourth + 256 Bytes of data in this page +0x32 ECC byte 10 Error correction code byte 1 of the fourth + 256 Bytes of data in this page +0x33 ECC byte 11 Error correction code byte 2 of the fourth + 256 Bytes of data in this page +0x34 ECC byte 12 Error correction code byte 0 of the fifth + 256 Bytes of data in this page +0x35 ECC byte 13 Error correction code byte 1 of the fifth + 256 Bytes of data in this page +0x36 ECC byte 14 Error correction code byte 2 of the fifth + 256 Bytes of data in this page +0x37 ECC byte 15 Error correction code byte 0 of the sixth + 256 Bytes of data in this page +0x38 ECC byte 16 Error correction code byte 1 of the sixth + 256 Bytes of data in this page +0x39 ECC byte 17 Error correction code byte 2 of the sixth + 256 Bytes of data in this page +0x3A ECC byte 18 Error correction code byte 0 of the seventh + 256 Bytes of data in this page +0x3B ECC byte 19 Error correction code byte 1 of the seventh + 256 Bytes of data in this page +0x3C ECC byte 20 Error correction code byte 2 of the seventh + 256 Bytes of data in this page +0x3D ECC byte 21 Error correction code byte 0 of the eighth + 256 Bytes of data in this page +0x3E ECC byte 22 Error correction code byte 1 of the eighth + 256 Bytes of data in this page +0x3F ECC byte 23 Error correction code byte 2 of the eighth + 256 Bytes of data in this page +=========== ================== ================================================ + +Filesystem support +================== + +The NAND driver provides all necessary functions for a filesystem via +the MTD interface. + +Filesystems must be aware of the NAND peculiarities and restrictions. +One major restrictions of NAND Flash is, that you cannot write as often +as you want to a page. The consecutive writes to a page, before erasing +it again, are restricted to 1-3 writes, depending on the manufacturers +specifications. This applies similar to the spare area. + +Therefore NAND aware filesystems must either write in page size chunks +or hold a writebuffer to collect smaller writes until they sum up to +pagesize. Available NAND aware filesystems: JFFS2, YAFFS. + +The spare area usage to store filesystem data is controlled by the spare +area placement functionality which is described in one of the earlier +chapters. + +Tools +===== + +The MTD project provides a couple of helpful tools to handle NAND Flash. + +- flasherase, flasheraseall: Erase and format FLASH partitions + +- nandwrite: write filesystem images to NAND FLASH + +- nanddump: dump the contents of a NAND FLASH partitions + +These tools are aware of the NAND restrictions. Please use those tools +instead of complaining about errors which are caused by non NAND aware +access methods. + +Constants +========= + +This chapter describes the constants which might be relevant for a +driver developer. + +Chip option constants +--------------------- + +Constants for chip id table +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These constants are defined in rawnand.h. They are OR-ed together to +describe the chip functionality:: + + /* Buswitdh is 16 bit */ + #define NAND_BUSWIDTH_16 0x00000002 + /* Device supports partial programming without padding */ + #define NAND_NO_PADDING 0x00000004 + /* Chip has cache program function */ + #define NAND_CACHEPRG 0x00000008 + /* Chip has copy back function */ + #define NAND_COPYBACK 0x00000010 + /* AND Chip which has 4 banks and a confusing page / block + * assignment. See Renesas datasheet for further information */ + #define NAND_IS_AND 0x00000020 + /* Chip has a array of 4 pages which can be read without + * additional ready /busy waits */ + #define NAND_4PAGE_ARRAY 0x00000040 + + +Constants for runtime options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These constants are defined in rawnand.h. They are OR-ed together to +describe the functionality:: + + /* The hw ecc generator provides a syndrome instead a ecc value on read + * This can only work if we have the ecc bytes directly behind the + * data bytes. Applies for DOC and AG-AND Renesas HW Reed Solomon generators */ + #define NAND_HWECC_SYNDROME 0x00020000 + + +ECC selection constants +----------------------- + +Use these constants to select the ECC algorithm:: + + /* No ECC. Usage is not recommended ! */ + #define NAND_ECC_NONE 0 + /* Software ECC 3 byte ECC per 256 Byte data */ + #define NAND_ECC_SOFT 1 + /* Hardware ECC 3 byte ECC per 256 Byte data */ + #define NAND_ECC_HW3_256 2 + /* Hardware ECC 3 byte ECC per 512 Byte data */ + #define NAND_ECC_HW3_512 3 + /* Hardware ECC 6 byte ECC per 512 Byte data */ + #define NAND_ECC_HW6_512 4 + /* Hardware ECC 8 byte ECC per 512 Byte data */ + #define NAND_ECC_HW8_512 6 + + +Hardware control related constants +---------------------------------- + +These constants describe the requested hardware access function when the +boardspecific hardware control function is called:: + + /* Select the chip by setting nCE to low */ + #define NAND_CTL_SETNCE 1 + /* Deselect the chip by setting nCE to high */ + #define NAND_CTL_CLRNCE 2 + /* Select the command latch by setting CLE to high */ + #define NAND_CTL_SETCLE 3 + /* Deselect the command latch by setting CLE to low */ + #define NAND_CTL_CLRCLE 4 + /* Select the address latch by setting ALE to high */ + #define NAND_CTL_SETALE 5 + /* Deselect the address latch by setting ALE to low */ + #define NAND_CTL_CLRALE 6 + /* Set write protection by setting WP to high. Not used! */ + #define NAND_CTL_SETWP 7 + /* Clear write protection by setting WP to low. Not used! */ + #define NAND_CTL_CLRWP 8 + + +Bad block table related constants +--------------------------------- + +These constants describe the options used for bad block table +descriptors:: + + /* Options for the bad block table descriptors */ + + /* The number of bits used per block in the bbt on the device */ + #define NAND_BBT_NRBITS_MSK 0x0000000F + #define NAND_BBT_1BIT 0x00000001 + #define NAND_BBT_2BIT 0x00000002 + #define NAND_BBT_4BIT 0x00000004 + #define NAND_BBT_8BIT 0x00000008 + /* The bad block table is in the last good block of the device */ + #define NAND_BBT_LASTBLOCK 0x00000010 + /* The bbt is at the given page, else we must scan for the bbt */ + #define NAND_BBT_ABSPAGE 0x00000020 + /* bbt is stored per chip on multichip devices */ + #define NAND_BBT_PERCHIP 0x00000080 + /* bbt has a version counter at offset veroffs */ + #define NAND_BBT_VERSION 0x00000100 + /* Create a bbt if none axists */ + #define NAND_BBT_CREATE 0x00000200 + /* Write bbt if necessary */ + #define NAND_BBT_WRITE 0x00001000 + /* Read and write back block contents when writing bbt */ + #define NAND_BBT_SAVECONTENT 0x00002000 + + +Structures +========== + +This chapter contains the autogenerated documentation of the structures +which are used in the NAND driver and might be relevant for a driver +developer. Each struct member has a short description which is marked +with an [XXX] identifier. See the chapter "Documentation hints" for an +explanation. + +.. kernel-doc:: include/linux/mtd/rawnand.h + :internal: + +Public Functions Provided +========================= + +This chapter contains the autogenerated documentation of the NAND kernel +API functions which are exported. Each function has a short description +which is marked with an [XXX] identifier. See the chapter "Documentation +hints" for an explanation. + +.. kernel-doc:: drivers/mtd/nand/raw/nand_base.c + :export: + +Internal Functions Provided +=========================== + +This chapter contains the autogenerated documentation of the NAND driver +internal functions. Each function has a short description which is +marked with an [XXX] identifier. See the chapter "Documentation hints" +for an explanation. The functions marked with [DEFAULT] might be +relevant for a board driver developer. + +.. kernel-doc:: drivers/mtd/nand/raw/nand_base.c + :internal: + +.. kernel-doc:: drivers/mtd/nand/raw/nand_bbt.c + :internal: + +Credits +======= + +The following people have contributed to the NAND driver: + +1. Steven J. Hill\ sjhill@realitydiluted.com + +2. David Woodhouse\ dwmw2@infradead.org + +3. Thomas Gleixner\ tglx@linutronix.de + +A lot of users have provided bugfixes, improvements and helping hands +for testing. Thanks a lot. + +The following people have contributed to this document: + +1. Thomas Gleixner\ tglx@linutronix.de diff --git a/Documentation/driver-api/nfc/index.rst b/Documentation/driver-api/nfc/index.rst new file mode 100644 index 0000000000..b6e9eedbff --- /dev/null +++ b/Documentation/driver-api/nfc/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================== +Near Field Communication +======================== + +.. toctree:: + :maxdepth: 1 + + nfc-hci + nfc-pn544 diff --git a/Documentation/driver-api/nfc/nfc-hci.rst b/Documentation/driver-api/nfc/nfc-hci.rst new file mode 100644 index 0000000000..486aa647c4 --- /dev/null +++ b/Documentation/driver-api/nfc/nfc-hci.rst @@ -0,0 +1,311 @@ +======================== +HCI backend for NFC Core +======================== + +- Author: Eric Lapuyade, Samuel Ortiz +- Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com + +General +------- + +The HCI layer implements much of the ETSI TS 102 622 V10.2.0 specification. It +enables easy writing of HCI-based NFC drivers. The HCI layer runs as an NFC Core +backend, implementing an abstract nfc device and translating NFC Core API +to HCI commands and events. + +HCI +--- + +HCI registers as an nfc device with NFC Core. Requests coming from userspace are +routed through netlink sockets to NFC Core and then to HCI. From this point, +they are translated in a sequence of HCI commands sent to the HCI layer in the +host controller (the chip). Commands can be executed synchronously (the sending +context blocks waiting for response) or asynchronously (the response is returned +from HCI Rx context). +HCI events can also be received from the host controller. They will be handled +and a translation will be forwarded to NFC Core as needed. There are hooks to +let the HCI driver handle proprietary events or override standard behavior. +HCI uses 2 execution contexts: + +- one for executing commands : nfc_hci_msg_tx_work(). Only one command + can be executing at any given moment. +- one for dispatching received events and commands : nfc_hci_msg_rx_work(). + +HCI Session initialization +-------------------------- + +The Session initialization is an HCI standard which must unfortunately +support proprietary gates. This is the reason why the driver will pass a list +of proprietary gates that must be part of the session. HCI will ensure all +those gates have pipes connected when the hci device is set up. +In case the chip supports pre-opened gates and pseudo-static pipes, the driver +can pass that information to HCI core. + +HCI Gates and Pipes +------------------- + +A gate defines the 'port' where some service can be found. In order to access +a service, one must create a pipe to that gate and open it. In this +implementation, pipes are totally hidden. The public API only knows gates. +This is consistent with the driver need to send commands to proprietary gates +without knowing the pipe connected to it. + +Driver interface +---------------- + +A driver is generally written in two parts : the physical link management and +the HCI management. This makes it easier to maintain a driver for a chip that +can be connected using various phy (i2c, spi, ...) + +HCI Management +-------------- + +A driver would normally register itself with HCI and provide the following +entry points:: + + struct nfc_hci_ops { + int (*open)(struct nfc_hci_dev *hdev); + void (*close)(struct nfc_hci_dev *hdev); + int (*hci_ready) (struct nfc_hci_dev *hdev); + int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb); + int (*start_poll) (struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols); + int (*dep_link_up)(struct nfc_hci_dev *hdev, struct nfc_target *target, + u8 comm_mode, u8 *gb, size_t gb_len); + int (*dep_link_down)(struct nfc_hci_dev *hdev); + int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate, + struct nfc_target *target); + int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate, + struct nfc_target *target); + int (*im_transceive) (struct nfc_hci_dev *hdev, + struct nfc_target *target, struct sk_buff *skb, + data_exchange_cb_t cb, void *cb_context); + int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb); + int (*check_presence)(struct nfc_hci_dev *hdev, + struct nfc_target *target); + int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event, + struct sk_buff *skb); + }; + +- open() and close() shall turn the hardware on and off. +- hci_ready() is an optional entry point that is called right after the hci + session has been set up. The driver can use it to do additional initialization + that must be performed using HCI commands. +- xmit() shall simply write a frame to the physical link. +- start_poll() is an optional entrypoint that shall set the hardware in polling + mode. This must be implemented only if the hardware uses proprietary gates or a + mechanism slightly different from the HCI standard. +- dep_link_up() is called after a p2p target has been detected, to finish + the p2p connection setup with hardware parameters that need to be passed back + to nfc core. +- dep_link_down() is called to bring the p2p link down. +- target_from_gate() is an optional entrypoint to return the nfc protocols + corresponding to a proprietary gate. +- complete_target_discovered() is an optional entry point to let the driver + perform additional proprietary processing necessary to auto activate the + discovered target. +- im_transceive() must be implemented by the driver if proprietary HCI commands + are required to send data to the tag. Some tag types will require custom + commands, others can be written to using the standard HCI commands. The driver + can check the tag type and either do proprietary processing, or return 1 to ask + for standard processing. The data exchange command itself must be sent + asynchronously. +- tm_send() is called to send data in the case of a p2p connection +- check_presence() is an optional entry point that will be called regularly + by the core to check that an activated tag is still in the field. If this is + not implemented, the core will not be able to push tag_lost events to the user + space +- event_received() is called to handle an event coming from the chip. Driver + can handle the event or return 1 to let HCI attempt standard processing. + +On the rx path, the driver is responsible to push incoming HCP frames to HCI +using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling +This must be done from a context that can sleep. + +PHY Management +-------------- + +The physical link (i2c, ...) management is defined by the following structure:: + + struct nfc_phy_ops { + int (*write)(void *dev_id, struct sk_buff *skb); + int (*enable)(void *dev_id); + void (*disable)(void *dev_id); + }; + +enable(): + turn the phy on (power on), make it ready to transfer data +disable(): + turn the phy off +write(): + Send a data frame to the chip. Note that to enable higher + layers such as an llc to store the frame for re-emission, this + function must not alter the skb. It must also not return a positive + result (return 0 for success, negative for failure). + +Data coming from the chip shall be sent directly to nfc_hci_recv_frame(). + +LLC +--- + +Communication between the CPU and the chip often requires some link layer +protocol. Those are isolated as modules managed by the HCI layer. There are +currently two modules : nop (raw transfer) and shdlc. +A new llc must implement the following functions:: + + struct nfc_llc_ops { + void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, int *rx_headroom, int *rx_tailroom, + llc_failure_t llc_failure); + void (*deinit) (struct nfc_llc *llc); + int (*start) (struct nfc_llc *llc); + int (*stop) (struct nfc_llc *llc); + void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb); + int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb); + }; + +init(): + allocate and init your private storage +deinit(): + cleanup +start(): + establish the logical connection +stop (): + terminate the logical connection +rcv_from_drv(): + handle data coming from the chip, going to HCI +xmit_from_hci(): + handle data sent by HCI, going to the chip + +The llc must be registered with nfc before it can be used. Do that by +calling:: + + nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); + +Again, note that the llc does not handle the physical link. It is thus very +easy to mix any physical link with any llc for a given chip driver. + +Included Drivers +---------------- + +An HCI based driver for an NXP PN544, connected through I2C bus, and using +shdlc is included. + +Execution Contexts +------------------ + +The execution contexts are the following: +- IRQ handler (IRQH): +fast, cannot sleep. sends incoming frames to HCI where they are passed to +the current llc. In case of shdlc, the frame is queued in shdlc rx queue. + +- SHDLC State Machine worker (SMW) + + Only when llc_shdlc is used: handles shdlc rx & tx queues. + + Dispatches HCI cmd responses. + +- HCI Tx Cmd worker (MSGTXWQ) + + Serializes execution of HCI commands. + + Completes execution in case of response timeout. + +- HCI Rx worker (MSGRXWQ) + + Dispatches incoming HCI commands or events. + +- Syscall context from a userspace call (SYSCALL) + + Any entrypoint in HCI called from NFC Core + +Workflow executing an HCI command (using shdlc) +----------------------------------------------- + +Executing an HCI command can easily be performed synchronously using the +following API:: + + int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd, + const u8 *param, size_t param_len, struct sk_buff **skb) + +The API must be invoked from a context that can sleep. Most of the time, this +will be the syscall context. skb will return the result that was received in +the response. + +Internally, execution is asynchronous. So all this API does is to enqueue the +HCI command, setup a local wait queue on stack, and wait_event() for completion. +The wait is not interruptible because it is guaranteed that the command will +complete after some short timeout anyway. + +MSGTXWQ context will then be scheduled and invoke nfc_hci_msg_tx_work(). +This function will dequeue the next pending command and send its HCP fragments +to the lower layer which happens to be shdlc. It will then start a timer to be +able to complete the command with a timeout error if no response arrive. + +SMW context gets scheduled and invokes nfc_shdlc_sm_work(). This function +handles shdlc framing in and out. It uses the driver xmit to send frames and +receives incoming frames in an skb queue filled from the driver IRQ handler. +SHDLC I(nformation) frames payload are HCP fragments. They are aggregated to +form complete HCI frames, which can be a response, command, or event. + +HCI Responses are dispatched immediately from this context to unblock +waiting command execution. Response processing involves invoking the completion +callback that was provided by nfc_hci_msg_tx_work() when it sent the command. +The completion callback will then wake the syscall context. + +It is also possible to execute the command asynchronously using this API:: + + static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, + const u8 *param, size_t param_len, + data_exchange_cb_t cb, void *cb_context) + +The workflow is the same, except that the API call returns immediately, and +the callback will be called with the result from the SMW context. + +Workflow receiving an HCI event or command +------------------------------------------ + +HCI commands or events are not dispatched from SMW context. Instead, they are +queued to HCI rx_queue and will be dispatched from HCI rx worker +context (MSGRXWQ). This is done this way to allow a cmd or event handler +to also execute other commands (for example, handling the +NFC_HCI_EVT_TARGET_DISCOVERED event from PN544 requires to issue an +ANY_GET_PARAMETER to the reader A gate to get information on the target +that was discovered). + +Typically, such an event will be propagated to NFC Core from MSGRXWQ context. + +Error management +---------------- + +Errors that occur synchronously with the execution of an NFC Core request are +simply returned as the execution result of the request. These are easy. + +Errors that occur asynchronously (e.g. in a background protocol handling thread) +must be reported such that upper layers don't stay ignorant that something +went wrong below and know that expected events will probably never happen. +Handling of these errors is done as follows: + +- driver (pn544) fails to deliver an incoming frame: it stores the error such + that any subsequent call to the driver will result in this error. Then it + calls the standard nfc_shdlc_recv_frame() with a NULL argument to report the + problem above. shdlc stores a EREMOTEIO sticky status, which will trigger + SMW to report above in turn. + +- SMW is basically a background thread to handle incoming and outgoing shdlc + frames. This thread will also check the shdlc sticky status and report to HCI + when it discovers it is not able to run anymore because of an unrecoverable + error that happened within shdlc or below. If the problem occurs during shdlc + connection, the error is reported through the connect completion. + +- HCI: if an internal HCI error happens (frame is lost), or HCI is reported an + error from a lower layer, HCI will either complete the currently executing + command with that error, or notify NFC Core directly if no command is + executing. + +- NFC Core: when NFC Core is notified of an error from below and polling is + active, it will send a tag discovered event with an empty tag list to the user + space to let it know that the poll operation will never be able to detect a + tag. If polling is not active and the error was sticky, lower levels will + return it at next invocation. diff --git a/Documentation/driver-api/nfc/nfc-pn544.rst b/Documentation/driver-api/nfc/nfc-pn544.rst new file mode 100644 index 0000000000..6b2d8aae0c --- /dev/null +++ b/Documentation/driver-api/nfc/nfc-pn544.rst @@ -0,0 +1,34 @@ +============================================================================ +Kernel driver for the NXP Semiconductors PN544 Near Field Communication chip +============================================================================ + + +General +------- + +The PN544 is an integrated transmission module for contactless +communication. The driver goes under drives/nfc/ and is compiled as a +module named "pn544". + +Host Interfaces: I2C, SPI and HSU, this driver supports currently only I2C. + +Protocols +--------- + +In the normal (HCI) mode and in the firmware update mode read and +write functions behave a bit differently because the message formats +or the protocols are different. + +In the normal (HCI) mode the protocol used is derived from the ETSI +HCI specification. The firmware is updated using a specific protocol, +which is different from HCI. + +HCI messages consist of an eight bit header and the message body. The +header contains the message length. Maximum size for an HCI message is +33. In HCI mode sent messages are tested for a correct +checksum. Firmware update messages have the length in the second (MSB) +and third (LSB) bytes of the message. The maximum FW message length is +1024 bytes. + +For the ETSI HCI specification see +http://www.etsi.org/WebSite/Technologies/ProtocolSpecification.aspx diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst new file mode 100644 index 0000000000..e991d92b8b --- /dev/null +++ b/Documentation/driver-api/ntb.rst @@ -0,0 +1,263 @@ +=========== +NTB Drivers +=========== + +NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects +the separate memory systems of two or more computers to the same PCI-Express +fabric. Existing NTB hardware supports a common feature set: doorbell +registers and memory translation windows, as well as non common features like +scratchpad and message registers. Scratchpad registers are read-and-writable +registers that are accessible from either side of the device, so that peers can +exchange a small amount of information at a fixed address. Message registers can +be utilized for the same purpose. Additionally they are provided with +special status bits to make sure the information isn't rewritten by another +peer. Doorbell registers provide a way for peers to send interrupt events. +Memory windows allow translated read and write access to the peer memory. + +NTB Core Driver (ntb) +===================== + +The NTB core driver defines an api wrapping the common feature set, and allows +clients interested in NTB features to discover NTB the devices supported by +hardware drivers. The term "client" is used here to mean an upper layer +component making use of the NTB api. The term "driver," or "hardware driver," +is used here to mean a driver for a specific vendor and model of NTB hardware. + +NTB Client Drivers +================== + +NTB client drivers should register with the NTB core driver. After +registering, the client probe and remove functions will be called appropriately +as ntb hardware, or hardware drivers, are inserted and removed. The +registration uses the Linux Device framework, so it should feel familiar to +anyone who has written a pci driver. + +NTB Typical client driver implementation +---------------------------------------- + +Primary purpose of NTB is to share some peace of memory between at least two +systems. So the NTB device features like Scratchpad/Message registers are +mainly used to perform the proper memory window initialization. Typically +there are two types of memory window interfaces supported by the NTB API: +inbound translation configured on the local ntb port and outbound translation +configured by the peer, on the peer ntb port. The first type is +depicted on the next figure:: + + Inbound translation: + + Memory: Local NTB Port: Peer NTB Port: Peer MMIO: + ____________ + | dma-mapped |-ntb_mw_set_trans(addr) | + | memory | _v____________ | ______________ + | (addr) |<======| MW xlat addr |<====| MW base addr |<== memory-mapped IO + |------------| |--------------| | |--------------| + +So typical scenario of the first type memory window initialization looks: +1) allocate a memory region, 2) put translated address to NTB config, +3) somehow notify a peer device of performed initialization, 4) peer device +maps corresponding outbound memory window so to have access to the shared +memory region. + +The second type of interface, that implies the shared windows being +initialized by a peer device, is depicted on the figure:: + + Outbound translation: + + Memory: Local NTB Port: Peer NTB Port: Peer MMIO: + ____________ ______________ + | dma-mapped | | | MW base addr |<== memory-mapped IO + | memory | | |--------------| + | (addr) |<===================| MW xlat addr |<-ntb_peer_mw_set_trans(addr) + |------------| | |--------------| + +Typical scenario of the second type interface initialization would be: +1) allocate a memory region, 2) somehow deliver a translated address to a peer +device, 3) peer puts the translated address to NTB config, 4) peer device maps +outbound memory window so to have access to the shared memory region. + +As one can see the described scenarios can be combined in one portable +algorithm. + + Local device: + 1) Allocate memory for a shared window + 2) Initialize memory window by translated address of the allocated region + (it may fail if local memory window initialization is unsupported) + 3) Send the translated address and memory window index to a peer device + + Peer device: + 1) Initialize memory window with retrieved address of the allocated + by another device memory region (it may fail if peer memory window + initialization is unsupported) + 2) Map outbound memory window + +In accordance with this scenario, the NTB Memory Window API can be used as +follows: + + Local device: + 1) ntb_mw_count(pidx) - retrieve number of memory ranges, which can + be allocated for memory windows between local device and peer device + of port with specified index. + 2) ntb_get_align(pidx, midx) - retrieve parameters restricting the + shared memory region alignment and size. Then memory can be properly + allocated. + 3) Allocate physically contiguous memory region in compliance with + restrictions retrieved in 2). + 4) ntb_mw_set_trans(pidx, midx) - try to set translation address of + the memory window with specified index for the defined peer device + (it may fail if local translated address setting is not supported) + 5) Send translated base address (usually together with memory window + number) to the peer device using, for instance, scratchpad or message + registers. + + Peer device: + 1) ntb_peer_mw_set_trans(pidx, midx) - try to set received from other + device (related to pidx) translated address for specified memory + window. It may fail if retrieved address, for instance, exceeds + maximum possible address or isn't properly aligned. + 2) ntb_peer_mw_get_addr(widx) - retrieve MMIO address to map the memory + window so to have an access to the shared memory. + +Also it is worth to note, that method ntb_mw_count(pidx) should return the +same value as ntb_peer_mw_count() on the peer with port index - pidx. + +NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev) +------------------------------------------------------------------ + +The primary client for NTB is the Transport client, used in tandem with NTB +Netdev. These drivers function together to create a logical link to the peer, +across the ntb, to exchange packets of network data. The Transport client +establishes a logical link to the peer, and creates queue pairs to exchange +messages and data. The NTB Netdev then creates an ethernet device using a +Transport queue pair. Network data is copied between socket buffers and the +Transport queue pair buffer. The Transport client may be used for other things +besides Netdev, however no other applications have yet been written. + +NTB Ping Pong Test Client (ntb\_pingpong) +----------------------------------------- + +The Ping Pong test client serves as a demonstration to exercise the doorbell +and scratchpad registers of NTB hardware, and as an example simple NTB client. +Ping Pong enables the link when started, waits for the NTB link to come up, and +then proceeds to read and write the doorbell scratchpad registers of the NTB. +The peers interrupt each other using a bit mask of doorbell bits, which is +shifted by one in each round, to test the behavior of multiple doorbell bits +and interrupt vectors. The Ping Pong driver also reads the first local +scratchpad, and writes the value plus one to the first peer scratchpad, each +round before writing the peer doorbell register. + +Module Parameters: + +* unsafe - Some hardware has known issues with scratchpad and doorbell + registers. By default, Ping Pong will not attempt to exercise such + hardware. You may override this behavior at your own risk by setting + unsafe=1. +* delay\_ms - Specify the delay between receiving a doorbell + interrupt event and setting the peer doorbell register for the next + round. +* init\_db - Specify the doorbell bits to start new series of rounds. A new + series begins once all the doorbell bits have been shifted out of + range. +* dyndbg - It is suggested to specify dyndbg=+p when loading this module, and + then to observe debugging output on the console. + +NTB Tool Test Client (ntb\_tool) +-------------------------------- + +The Tool test client serves for debugging, primarily, ntb hardware and drivers. +The Tool provides access through debugfs for reading, setting, and clearing the +NTB doorbell, and reading and writing scratchpads. + +The Tool does not currently have any module parameters. + +Debugfs Files: + +* *debugfs*/ntb\_tool/*hw*/ + A directory in debugfs will be created for each + NTB device probed by the tool. This directory is shortened to *hw* + below. +* *hw*/db + This file is used to read, set, and clear the local doorbell. Not + all operations may be supported by all hardware. To read the doorbell, + read the file. To set the doorbell, write `s` followed by the bits to + set (eg: `echo 's 0x0101' > db`). To clear the doorbell, write `c` + followed by the bits to clear. +* *hw*/mask + This file is used to read, set, and clear the local doorbell mask. + See *db* for details. +* *hw*/peer\_db + This file is used to read, set, and clear the peer doorbell. + See *db* for details. +* *hw*/peer\_mask + This file is used to read, set, and clear the peer doorbell + mask. See *db* for details. +* *hw*/spad + This file is used to read and write local scratchpads. To read + the values of all scratchpads, read the file. To write values, write a + series of pairs of scratchpad number and value + (eg: `echo '4 0x123 7 0xabc' > spad` + # to set scratchpads `4` and `7` to `0x123` and `0xabc`, respectively). +* *hw*/peer\_spad + This file is used to read and write peer scratchpads. See + *spad* for details. + +NTB MSI Test Client (ntb\_msi\_test) +------------------------------------ + +The MSI test client serves to test and debug the MSI library which +allows for passing MSI interrupts across NTB memory windows. The +test client is interacted with through the debugfs filesystem: + +* *debugfs*/ntb\_msi\_test/*hw*/ + A directory in debugfs will be created for each + NTB device probed by the msi test. This directory is shortened to *hw* + below. +* *hw*/port + This file describes the local port number +* *hw*/irq*_occurrences + One occurrences file exists for each interrupt and, when read, + returns the number of times the interrupt has been triggered. +* *hw*/peer*/port + This file describes the port number for each peer +* *hw*/peer*/count + This file describes the number of interrupts that can be + triggered on each peer +* *hw*/peer*/trigger + Writing an interrupt number (any number less than the value + specified in count) will trigger the interrupt on the + specified peer. That peer's interrupt's occurrence file + should be incremented. + +NTB Hardware Drivers +==================== + +NTB hardware drivers should register devices with the NTB core driver. After +registering, clients probe and remove functions will be called. + +NTB Intel Hardware Driver (ntb\_hw\_intel) +------------------------------------------ + +The Intel hardware driver supports NTB on Xeon and Atom CPUs. + +Module Parameters: + +* b2b\_mw\_idx + If the peer ntb is to be accessed via a memory window, then use + this memory window to access the peer ntb. A value of zero or positive + starts from the first mw idx, and a negative value starts from the last + mw idx. Both sides MUST set the same value here! The default value is + `-1`. +* b2b\_mw\_share + If the peer ntb is to be accessed via a memory window, and if + the memory window is large enough, still allow the client to use the + second half of the memory window for address translation to the peer. +* xeon\_b2b\_usd\_bar2\_addr64 + If using B2B topology on Xeon hardware, use + this 64 bit address on the bus between the NTB devices for the window + at BAR2, on the upstream side of the link. +* xeon\_b2b\_usd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_usd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_usd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar2\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. diff --git a/Documentation/driver-api/nvdimm/btt.rst b/Documentation/driver-api/nvdimm/btt.rst new file mode 100644 index 0000000000..107395c042 --- /dev/null +++ b/Documentation/driver-api/nvdimm/btt.rst @@ -0,0 +1,285 @@ +============================= +BTT - Block Translation Table +============================= + + +1. Introduction +=============== + +Persistent memory based storage is able to perform IO at byte (or more +accurately, cache line) granularity. However, we often want to expose such +storage as traditional block devices. The block drivers for persistent memory +will do exactly this. However, they do not provide any atomicity guarantees. +Traditional SSDs typically provide protection against torn sectors in hardware, +using stored energy in capacitors to complete in-flight block writes, or perhaps +in firmware. We don't have this luxury with persistent memory - if a write is in +progress, and we experience a power failure, the block will contain a mix of old +and new data. Applications may not be prepared to handle such a scenario. + +The Block Translation Table (BTT) provides atomic sector update semantics for +persistent memory devices, so that applications that rely on sector writes not +being torn can continue to do so. The BTT manifests itself as a stacked block +device, and reserves a portion of the underlying storage for its metadata. At +the heart of it, is an indirection table that re-maps all the blocks on the +volume. It can be thought of as an extremely simple file system that only +provides atomic sector updates. + + +2. Static Layout +================ + +The underlying storage on which a BTT can be laid out is not limited in any way. +The BTT, however, splits the available space into chunks of up to 512 GiB, +called "Arenas". + +Each arena follows the same layout for its metadata, and all references in an +arena are internal to it (with the exception of one field that points to the +next arena). The following depicts the "On-disk" metadata layout:: + + + Backing Store +-------> Arena + +---------------+ | +------------------+ + | | | | Arena info block | + | Arena 0 +---+ | 4K | + | 512G | +------------------+ + | | | | + +---------------+ | | + | | | | + | Arena 1 | | Data Blocks | + | 512G | | | + | | | | + +---------------+ | | + | . | | | + | . | | | + | . | | | + | | | | + | | | | + +---------------+ +------------------+ + | | + | BTT Map | + | | + | | + +------------------+ + | | + | BTT Flog | + | | + +------------------+ + | Info block copy | + | 4K | + +------------------+ + + +3. Theory of Operation +====================== + + +a. The BTT Map +-------------- + +The map is a simple lookup/indirection table that maps an LBA to an internal +block. Each map entry is 32 bits. The two most significant bits are special +flags, and the remaining form the internal block number. + +======== ============================================================= +Bit Description +======== ============================================================= +31 - 30 Error and Zero flags - Used in the following way:: + + == == ==================================================== + 31 30 Description + == == ==================================================== + 0 0 Initial state. Reads return zeroes; Premap = Postmap + 0 1 Zero state: Reads return zeroes + 1 0 Error state: Reads fail; Writes clear 'E' bit + 1 1 Normal Block – has valid postmap + == == ==================================================== + +29 - 0 Mappings to internal 'postmap' blocks +======== ============================================================= + + +Some of the terminology that will be subsequently used: + +============ ================================================================ +External LBA LBA as made visible to upper layers. +ABA Arena Block Address - Block offset/number within an arena +Premap ABA The block offset into an arena, which was decided upon by range + checking the External LBA +Postmap ABA The block number in the "Data Blocks" area obtained after + indirection from the map +nfree The number of free blocks that are maintained at any given time. + This is the number of concurrent writes that can happen to the + arena. +============ ================================================================ + + +For example, after adding a BTT, we surface a disk of 1024G. We get a read for +the external LBA at 768G. This falls into the second arena, and of the 512G +worth of blocks that this arena contributes, this block is at 256G. Thus, the +premap ABA is 256G. We now refer to the map, and find out the mapping for block +'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64. + + +b. The BTT Flog +--------------- + +The BTT provides sector atomicity by making every write an "allocating write", +i.e. Every write goes to a "free" block. A running list of free blocks is +maintained in the form of the BTT flog. 'Flog' is a combination of the words +"free list" and "log". The flog contains 'nfree' entries, and an entry contains: + +======== ===================================================================== +lba The premap ABA that is being written to +old_map The old postmap ABA - after 'this' write completes, this will be a + free block. +new_map The new postmap ABA. The map will up updated to reflect this + lba->postmap_aba mapping, but we log it here in case we have to + recover. +seq Sequence number to mark which of the 2 sections of this flog entry is + valid/newest. It cycles between 01->10->11->01 (binary) under normal + operation, with 00 indicating an uninitialized state. +lba' alternate lba entry +old_map' alternate old postmap entry +new_map' alternate new postmap entry +seq' alternate sequence number. +======== ===================================================================== + +Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also +padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are +done such that for any entry being written, it: +a. overwrites the 'old' section in the entry based on sequence numbers +b. writes the 'new' section such that the sequence number is written last. + + +c. The concept of lanes +----------------------- + +While 'nfree' describes the number of concurrent IOs an arena can process +concurrently, 'nlanes' is the number of IOs the BTT device as a whole can +process:: + + nlanes = min(nfree, num_cpus) + +A lane number is obtained at the start of any IO, and is used for indexing into +all the on-disk and in-memory data structures for the duration of the IO. If +there are more CPUs than the max number of available lanes, than lanes are +protected by spinlocks. + + +d. In-memory data structure: Read Tracking Table (RTT) +------------------------------------------------------ + +Consider a case where we have two threads, one doing reads and the other, +writes. We can hit a condition where the writer thread grabs a free block to do +a new IO, but the (slow) reader thread is still reading from it. In other words, +the reader consulted a map entry, and started reading the corresponding block. A +writer started writing to the same external LBA, and finished the write updating +the map for that external LBA to point to its new postmap ABA. At this point the +internal, postmap block that the reader is (still) reading has been inserted +into the list of free blocks. If another write comes in for the same LBA, it can +grab this free block, and start writing to it, causing the reader to read +incorrect data. To prevent this, we introduce the RTT. + +The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts +into rtt[lane_number], the postmap ABA it is reading, and clears it after the +read is complete. Every writer thread, after grabbing a free block, checks the +RTT for its presence. If the postmap free block is in the RTT, it waits till the +reader clears the RTT entry, and only then starts writing to it. + + +e. In-memory data structure: map locks +-------------------------------------- + +Consider a case where two writer threads are writing to the same LBA. There can +be a race in the following sequence of steps:: + + free[lane] = map[premap_aba] + map[premap_aba] = postmap_aba + +Both threads can update their respective free[lane] with the same old, freed +postmap_aba. This has made the layout inconsistent by losing a free entry, and +at the same time, duplicating another free entry for two lanes. + +To solve this, we could have a single map lock (per arena) that has to be taken +before performing the above sequence, but we feel that could be too contentious. +Instead we use an array of (nfree) map_locks that is indexed by +(premap_aba modulo nfree). + + +f. Reconstruction from the Flog +------------------------------- + +On startup, we analyze the BTT flog to create our list of free blocks. We walk +through all the entries, and for each lane, of the set of two possible +'sections', we always look at the most recent one only (based on the sequence +number). The reconstruction rules/steps are simple: + +- Read map[log_entry.lba]. +- If log_entry.new matches the map entry, then log_entry.old is free. +- If log_entry.new does not match the map entry, then log_entry.new is free. + (This case can only be caused by power-fails/unsafe shutdowns) + + +g. Summarizing - Read and Write flows +------------------------------------- + +Read: + +1. Convert external LBA to arena number + pre-map ABA +2. Get a lane (and take lane_lock) +3. Read map to get the entry for this pre-map ABA +4. Enter post-map ABA into RTT[lane] +5. If TRIM flag set in map, return zeroes, and end IO (go to step 8) +6. If ERROR flag set in map, end IO with EIO (go to step 8) +7. Read data from this block +8. Remove post-map ABA entry from RTT[lane] +9. Release lane (and lane_lock) + +Write: + +1. Convert external LBA to Arena number + pre-map ABA +2. Get a lane (and take lane_lock) +3. Use lane to index into in-memory free list and obtain a new block, next flog + index, next sequence number +4. Scan the RTT to check if free block is present, and spin/wait if it is. +5. Write data to this free block +6. Read map to get the existing post-map ABA entry for this pre-map ABA +7. Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num] +8. Write new post-map ABA into map. +9. Write old post-map entry into the free list +10. Calculate next sequence number and write into the free list entry +11. Release lane (and lane_lock) + + +4. Error Handling +================= + +An arena would be in an error state if any of the metadata is corrupted +irrecoverably, either due to a bug or a media error. The following conditions +indicate an error: + +- Info block checksum does not match (and recovering from the copy also fails) +- All internal available blocks are not uniquely and entirely addressed by the + sum of mapped blocks and free blocks (from the BTT flog). +- Rebuilding free list from the flog reveals missing/duplicate/impossible + entries +- A map entry is out of bounds + +If any of these error conditions are encountered, the arena is put into a read +only state using a flag in the info block. + + +5. Usage +======== + +The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem +(pmem, or blk mode). The easiest way to set up such a namespace is using the +'ndctl' utility [1]: + +For example, the ndctl command line to setup a btt with a 4k sector size is:: + + ndctl create-namespace -f -e namespace0.0 -m sector -l 4k + +See ndctl create-namespace --help for more options. + +[1]: https://github.com/pmem/ndctl diff --git a/Documentation/driver-api/nvdimm/firmware-activate.rst b/Documentation/driver-api/nvdimm/firmware-activate.rst new file mode 100644 index 0000000000..7ee7decbbd --- /dev/null +++ b/Documentation/driver-api/nvdimm/firmware-activate.rst @@ -0,0 +1,86 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +NVDIMM Runtime Firmware Activation +================================== + +Some persistent memory devices run a firmware locally on the device / +"DIMM" to perform tasks like media management, capacity provisioning, +and health monitoring. The process of updating that firmware typically +involves a reboot because it has implications for in-flight memory +transactions. However, reboots are disruptive and at least the Intel +persistent memory platform implementation, described by the Intel ACPI +DSM specification [1], has added support for activating firmware at +runtime. + +A native sysfs interface is implemented in libnvdimm to allow platform +to advertise and control their local runtime firmware activation +capability. + +The libnvdimm bus object, ndbusX, implements an ndbusX/firmware/activate +attribute that shows the state of the firmware activation as one of 'idle', +'armed', 'overflow', and 'busy'. + +- idle: + No devices are set / armed to activate firmware + +- armed: + At least one device is armed + +- busy: + In the busy state armed devices are in the process of transitioning + back to idle and completing an activation cycle. + +- overflow: + If the platform has a concept of incremental work needed to perform + the activation it could be the case that too many DIMMs are armed for + activation. In that scenario the potential for firmware activation to + timeout is indicated by the 'overflow' state. + +The 'ndbusX/firmware/activate' property can be written with a value of +either 'live', or 'quiesce'. A value of 'quiesce' triggers the kernel to +run firmware activation from within the equivalent of the hibernation +'freeze' state where drivers and applications are notified to stop their +modifications of system memory. A value of 'live' attempts +firmware activation without this hibernation cycle. The +'ndbusX/firmware/activate' property will be elided completely if no +firmware activation capability is detected. + +Another property 'ndbusX/firmware/capability' indicates a value of +'live' or 'quiesce', where 'live' indicates that the firmware +does not require or inflict any quiesce period on the system to update +firmware. A capability value of 'quiesce' indicates that firmware does +expect and injects a quiet period for the memory controller, but 'live' +may still be written to 'ndbusX/firmware/activate' as an override to +assume the risk of racing firmware update with in-flight device and +application activity. The 'ndbusX/firmware/capability' property will be +elided completely if no firmware activation capability is detected. + +The libnvdimm memory-device / DIMM object, nmemX, implements +'nmemX/firmware/activate' and 'nmemX/firmware/result' attributes to +communicate the per-device firmware activation state. Similar to the +'ndbusX/firmware/activate' attribute, the 'nmemX/firmware/activate' +attribute indicates 'idle', 'armed', or 'busy'. The state transitions +from 'armed' to 'idle' when the system is prepared to activate firmware, +firmware staged + state set to armed, and 'ndbusX/firmware/activate' is +triggered. After that activation event the nmemX/firmware/result +attribute reflects the state of the last activation as one of: + +- none: + No runtime activation triggered since the last time the device was reset + +- success: + The last runtime activation completed successfully. + +- fail: + The last runtime activation failed for device-specific reasons. + +- not_staged: + The last runtime activation failed due to a sequencing error of the + firmware image not being staged. + +- need_reset: + Runtime firmware activation failed, but the firmware can still be + activated via the legacy method of power-cycling the system. + +[1]: https://docs.pmem.io/persistent-memory/ diff --git a/Documentation/driver-api/nvdimm/index.rst b/Documentation/driver-api/nvdimm/index.rst new file mode 100644 index 0000000000..5863bd04f0 --- /dev/null +++ b/Documentation/driver-api/nvdimm/index.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +Non-Volatile Memory Device (NVDIMM) +=================================== + +.. toctree:: + :maxdepth: 1 + + nvdimm + btt + security + firmware-activate diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst new file mode 100644 index 0000000000..ca16b5acbf --- /dev/null +++ b/Documentation/driver-api/nvdimm/nvdimm.rst @@ -0,0 +1,657 @@ +=============================== +LIBNVDIMM: Non-Volatile Devices +=============================== + +libnvdimm - kernel / libndctl - userspace helper library + +nvdimm@lists.linux.dev + +Version 13 + +.. contents: + + Glossary + Overview + Supporting Documents + Git Trees + LIBNVDIMM PMEM + PMEM-REGIONs, Atomic Sectors, and DAX + Example NVDIMM Platform + LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API + LIBNDCTL: Context + libndctl: instantiate a new library context example + LIBNVDIMM/LIBNDCTL: Bus + libnvdimm: control class device in /sys/class + libnvdimm: bus + libndctl: bus enumeration example + LIBNVDIMM/LIBNDCTL: DIMM (NMEM) + libnvdimm: DIMM (NMEM) + libndctl: DIMM enumeration example + LIBNVDIMM/LIBNDCTL: Region + libnvdimm: region + libndctl: region enumeration example + Why Not Encode the Region Type into the Region Name? + How Do I Determine the Major Type of a Region? + LIBNVDIMM/LIBNDCTL: Namespace + libnvdimm: namespace + libndctl: namespace enumeration example + libndctl: namespace creation example + Why the Term "namespace"? + LIBNVDIMM/LIBNDCTL: Block Translation Table "btt" + libnvdimm: btt layout + libndctl: btt creation example + Summary LIBNDCTL Diagram + + +Glossary +======== + +PMEM: + A system-physical-address range where writes are persistent. A + block device composed of PMEM is capable of DAX. A PMEM address range + may span an interleave of several DIMMs. + +DPA: + DIMM Physical Address, is a DIMM-relative offset. With one DIMM in + the system there would be a 1:1 system-physical-address:DPA association. + Once more DIMMs are added a memory controller interleave must be + decoded to determine the DPA associated with a given + system-physical-address. + +DAX: + File system extensions to bypass the page cache and block layer to + mmap persistent memory, from a PMEM block device, directly into a + process address space. + +DSM: + Device Specific Method: ACPI method to control specific + device - in this case the firmware. + +DCR: + NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5. + It defines a vendor-id, device-id, and interface format for a given DIMM. + +BTT: + Block Translation Table: Persistent memory is byte addressable. + Existing software may have an expectation that the power-fail-atomicity + of writes is at least one sector, 512 bytes. The BTT is an indirection + table with atomic update semantics to front a PMEM block device + driver and present arbitrary atomic sector sizes. + +LABEL: + Metadata stored on a DIMM device that partitions and identifies + (persistently names) capacity allocated to different PMEM namespaces. It + also indicates whether an address abstraction like a BTT is applied to + the namespace. Note that traditional partition tables, GPT/MBR, are + layered on top of a PMEM namespace, or an address abstraction like BTT + if present, but partition support is deprecated going forward. + + +Overview +======== + +The LIBNVDIMM subsystem provides support for PMEM described by platform +firmware or a device driver. On ACPI based systems the platform firmware +conveys persistent memory resource via the ACPI NFIT "NVDIMM Firmware +Interface Table" in ACPI 6. While the LIBNVDIMM subsystem implementation +is generic and supports pre-NFIT platforms, it was guided by the +superset of capabilities need to support this ACPI 6 definition for +NVDIMM resources. The original implementation supported the +block-window-aperture capability described in the NFIT, but that support +has since been abandoned and never shipped in a product. + +Supporting Documents +-------------------- + +ACPI 6: + https://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf +NVDIMM Namespace: + https://pmem.io/documents/NVDIMM_Namespace_Spec.pdf +DSM Interface Example: + https://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf +Driver Writer's Guide: + https://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf + +Git Trees +--------- + +LIBNVDIMM: + https://git.kernel.org/cgit/linux/kernel/git/nvdimm/nvdimm.git +LIBNDCTL: + https://github.com/pmem/ndctl.git + + +LIBNVDIMM PMEM +============== + +Prior to the arrival of the NFIT, non-volatile memory was described to a +system in various ad-hoc ways. Usually only the bare minimum was +provided, namely, a single system-physical-address range where writes +are expected to be durable after a system power loss. Now, the NFIT +specification standardizes not only the description of PMEM, but also +platform message-passing entry points for control and configuration. + +PMEM (nd_pmem.ko): Drives a system-physical-address range. This range is +contiguous in system memory and may be interleaved (hardware memory controller +striped) across multiple DIMMs. When interleaved the platform may optionally +provide details of which DIMMs are participating in the interleave. + +It is worth noting that when the labeling capability is detected (a EFI +namespace label index block is found), then no block device is created +by default as userspace needs to do at least one allocation of DPA to +the PMEM range. In contrast ND_NAMESPACE_IO ranges, once registered, +can be immediately attached to nd_pmem. This latter mode is called +label-less or "legacy". + +PMEM-REGIONs, Atomic Sectors, and DAX +------------------------------------- + +For the cases where an application or filesystem still needs atomic sector +update guarantees it can register a BTT on a PMEM device or partition. See +LIBNVDIMM/NDCTL: Block Translation Table "btt" + + +Example NVDIMM Platform +======================= + +For the remainder of this document the following diagram will be +referenced for any example sysfs layouts:: + + + (a) (b) DIMM + +-------------------+--------+--------+--------+ + +------+ | pm0.0 | free | pm1.0 | free | 0 + | imc0 +--+- - - region0- - - +--------+ +--------+ + +--+---+ | pm0.0 | free | pm1.0 | free | 1 + | +-------------------+--------v v--------+ + +--+---+ | | + | cpu0 | region1 + +--+---+ | | + | +----------------------------^ ^--------+ + +--+---+ | free | pm1.0 | free | 2 + | imc1 +--+----------------------------| +--------+ + +------+ | free | pm1.0 | free | 3 + +----------------------------+--------+--------+ + +In this platform we have four DIMMs and two memory controllers in one +socket. Each PMEM interleave set is identified by a region device with +a dynamically assigned id. + + 1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A + single PMEM namespace is created in the REGION0-SPA-range that spans most + of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that + interleaved system-physical-address range is left free for + another PMEM namespace to be defined. + + 2. In the last portion of DIMM0 and DIMM1 we have an interleaved + system-physical-address range, REGION1, that spans those two DIMMs as + well as DIMM2 and DIMM3. Some of REGION1 is allocated to a PMEM namespace + named "pm1.0". + + This bus is provided by the kernel under the device + /sys/devices/platform/nfit_test.0 when the nfit_test.ko module from + tools/testing/nvdimm is loaded. This module is a unit test for + LIBNVDIMM and the acpi_nfit.ko driver. + + +LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API +======================================================== + +What follows is a description of the LIBNVDIMM sysfs layout and a +corresponding object hierarchy diagram as viewed through the LIBNDCTL +API. The example sysfs paths and diagrams are relative to the Example +NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit +test. + +LIBNDCTL: Context +----------------- + +Every API call in the LIBNDCTL library requires a context that holds the +logging parameters and other library instance state. The library is +based on the libabc template: + + https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git + +LIBNDCTL: instantiate a new library context example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + struct ndctl_ctx *ctx; + + if (ndctl_new(&ctx) == 0) + return ctx; + else + return NULL; + +LIBNVDIMM/LIBNDCTL: Bus +----------------------- + +A bus has a 1:1 relationship with an NFIT. The current expectation for +ACPI based systems is that there is only ever one platform-global NFIT. +That said, it is trivial to register multiple NFITs, the specification +does not preclude it. The infrastructure supports multiple busses and +we use this capability to test multiple NFIT configurations in the unit +test. + +LIBNVDIMM: control class device in /sys/class +--------------------------------------------- + +This character device accepts DSM messages to be passed to DIMM +identified by its NFIT handle:: + + /sys/class/nd/ndctl0 + |-- dev + |-- device -> ../../../ndbus0 + |-- subsystem -> ../../../../../../../class/nd + + + +LIBNVDIMM: bus +-------------- + +:: + + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc); + +:: + + /sys/devices/platform/nfit_test.0/ndbus0 + |-- commands + |-- nd + |-- nfit + |-- nmem0 + |-- nmem1 + |-- nmem2 + |-- nmem3 + |-- power + |-- provider + |-- region0 + |-- region1 + |-- region2 + |-- region3 + |-- region4 + |-- region5 + |-- uevent + `-- wait_probe + +LIBNDCTL: bus enumeration example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Find the bus handle that describes the bus from Example NVDIMM Platform:: + + static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx, + const char *provider) + { + struct ndctl_bus *bus; + + ndctl_bus_foreach(ctx, bus) + if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0) + return bus; + + return NULL; + } + + bus = get_bus_by_provider(ctx, "nfit_test.0"); + + +LIBNVDIMM/LIBNDCTL: DIMM (NMEM) +------------------------------- + +The DIMM device provides a character device for sending commands to +hardware, and it is a container for LABELs. If the DIMM is defined by +NFIT then an optional 'nfit' attribute sub-directory is available to add +NFIT-specifics. + +Note that the kernel device name for "DIMMs" is "nmemX". The NFIT +describes these devices via "Memory Device to System Physical Address +Range Mapping Structure", and there is no requirement that they actually +be physical DIMMs, so we use a more generic name. + +LIBNVDIMM: DIMM (NMEM) +^^^^^^^^^^^^^^^^^^^^^^ + +:: + + struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, + const struct attribute_group **groups, unsigned long flags, + unsigned long *dsm_mask); + +:: + + /sys/devices/platform/nfit_test.0/ndbus0 + |-- nmem0 + | |-- available_slots + | |-- commands + | |-- dev + | |-- devtype + | |-- driver -> ../../../../../bus/nd/drivers/nvdimm + | |-- modalias + | |-- nfit + | | |-- device + | | |-- format + | | |-- handle + | | |-- phys_id + | | |-- rev_id + | | |-- serial + | | `-- vendor + | |-- state + | |-- subsystem -> ../../../../../bus/nd + | `-- uevent + |-- nmem1 + [..] + + +LIBNDCTL: DIMM enumeration example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note, in this example we are assuming NFIT-defined DIMMs which are +identified by an "nfit_handle" a 32-bit value where: + + - Bit 3:0 DIMM number within the memory channel + - Bit 7:4 memory channel number + - Bit 11:8 memory controller ID + - Bit 15:12 socket ID (within scope of a Node controller if node + controller is present) + - Bit 27:16 Node Controller ID + - Bit 31:28 Reserved + +:: + + static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus, + unsigned int handle) + { + struct ndctl_dimm *dimm; + + ndctl_dimm_foreach(bus, dimm) + if (ndctl_dimm_get_handle(dimm) == handle) + return dimm; + + return NULL; + } + + #define DIMM_HANDLE(n, s, i, c, d) \ + (((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \ + | ((c & 0xf) << 4) | (d & 0xf)) + + dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0)); + +LIBNVDIMM/LIBNDCTL: Region +-------------------------- + +A generic REGION device is registered for each PMEM interleave-set / +range. Per the example there are 2 PMEM regions on the "nfit_test.0" +bus. The primary role of regions are to be a container of "mappings". A +mapping is a tuple of <DIMM, DPA-start-offset, length>. + +LIBNVDIMM provides a built-in driver for REGION devices. This driver +is responsible for all parsing LABELs, if present, and then emitting NAMESPACE +devices for the nd_pmem driver to consume. + +In addition to the generic attributes of "mapping"s, "interleave_ways" +and "size" the REGION device also exports some convenience attributes. +"nstype" indicates the integer type of namespace-device this region +emits, "devtype" duplicates the DEVTYPE variable stored by udev at the +'add' event, "modalias" duplicates the MODALIAS variable stored by udev +at the 'add' event, and finally, the optional "spa_index" is provided in +the case where the region is defined by a SPA. + +LIBNVDIMM: region:: + + struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); + +:: + + /sys/devices/platform/nfit_test.0/ndbus0 + |-- region0 + | |-- available_size + | |-- btt0 + | |-- btt_seed + | |-- devtype + | |-- driver -> ../../../../../bus/nd/drivers/nd_region + | |-- init_namespaces + | |-- mapping0 + | |-- mapping1 + | |-- mappings + | |-- modalias + | |-- namespace0.0 + | |-- namespace_seed + | |-- numa_node + | |-- nfit + | | `-- spa_index + | |-- nstype + | |-- set_cookie + | |-- size + | |-- subsystem -> ../../../../../bus/nd + | `-- uevent + |-- region1 + [..] + +LIBNDCTL: region enumeration example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sample region retrieval routines based on NFIT-unique data like +"spa_index" (interleave set id). + +:: + + static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus, + unsigned int spa_index) + { + struct ndctl_region *region; + + ndctl_region_foreach(bus, region) { + if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM) + continue; + if (ndctl_region_get_spa_index(region) == spa_index) + return region; + } + return NULL; + } + + +LIBNVDIMM/LIBNDCTL: Namespace +----------------------------- + +A REGION, after resolving DPA aliasing and LABEL specified boundaries, surfaces +one or more "namespace" devices. The arrival of a "namespace" device currently +triggers the nd_pmem driver to load and register a disk/block device. + +LIBNVDIMM: namespace +^^^^^^^^^^^^^^^^^^^^ + +Here is a sample layout from the 2 major types of NAMESPACE where namespace0.0 +represents DIMM-info-backed PMEM (note that it has a 'uuid' attribute), and +namespace1.0 represents an anonymous PMEM namespace (note that has no 'uuid' +attribute due to not support a LABEL) + +:: + + /sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0 + |-- alt_name + |-- devtype + |-- dpa_extents + |-- force_raw + |-- modalias + |-- numa_node + |-- resource + |-- size + |-- subsystem -> ../../../../../../bus/nd + |-- type + |-- uevent + `-- uuid + /sys/devices/platform/nfit_test.1/ndbus1/region1/namespace1.0 + |-- block + | `-- pmem0 + |-- devtype + |-- driver -> ../../../../../../bus/nd/drivers/pmem + |-- force_raw + |-- modalias + |-- numa_node + |-- resource + |-- size + |-- subsystem -> ../../../../../../bus/nd + |-- type + `-- uevent + +LIBNDCTL: namespace enumeration example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Namespaces are indexed relative to their parent region, example below. +These indexes are mostly static from boot to boot, but subsystem makes +no guarantees in this regard. For a static namespace identifier use its +'uuid' attribute. + +:: + + static struct ndctl_namespace + *get_namespace_by_id(struct ndctl_region *region, unsigned int id) + { + struct ndctl_namespace *ndns; + + ndctl_namespace_foreach(region, ndns) + if (ndctl_namespace_get_id(ndns) == id) + return ndns; + + return NULL; + } + +LIBNDCTL: namespace creation example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Idle namespaces are automatically created by the kernel if a given +region has enough available capacity to create a new namespace. +Namespace instantiation involves finding an idle namespace and +configuring it. For the most part the setting of namespace attributes +can occur in any order, the only constraint is that 'uuid' must be set +before 'size'. This enables the kernel to track DPA allocations +internally with a static identifier:: + + static int configure_namespace(struct ndctl_region *region, + struct ndctl_namespace *ndns, + struct namespace_parameters *parameters) + { + char devname[50]; + + snprintf(devname, sizeof(devname), "namespace%d.%d", + ndctl_region_get_id(region), paramaters->id); + + ndctl_namespace_set_alt_name(ndns, devname); + /* 'uuid' must be set prior to setting size! */ + ndctl_namespace_set_uuid(ndns, paramaters->uuid); + ndctl_namespace_set_size(ndns, paramaters->size); + /* unlike pmem namespaces, blk namespaces have a sector size */ + if (parameters->lbasize) + ndctl_namespace_set_sector_size(ndns, parameters->lbasize); + ndctl_namespace_enable(ndns); + } + + +Why the Term "namespace"? +^^^^^^^^^^^^^^^^^^^^^^^^^ + + 1. Why not "volume" for instance? "volume" ran the risk of confusing + ND (libnvdimm subsystem) to a volume manager like device-mapper. + + 2. The term originated to describe the sub-devices that can be created + within a NVME controller (see the nvme specification: + https://www.nvmexpress.org/specifications/), and NFIT namespaces are + meant to parallel the capabilities and configurability of + NVME-namespaces. + + +LIBNVDIMM/LIBNDCTL: Block Translation Table "btt" +------------------------------------------------- + +A BTT (design document: https://pmem.io/2014/09/23/btt.html) is a +personality driver for a namespace that fronts entire namespace as an +'address abstraction'. + +LIBNVDIMM: btt layout +^^^^^^^^^^^^^^^^^^^^^ + +Every region will start out with at least one BTT device which is the +seed device. To activate it set the "namespace", "uuid", and +"sector_size" attributes and then bind the device to the nd_pmem or +nd_blk driver depending on the region type:: + + /sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/ + |-- namespace + |-- delete + |-- devtype + |-- modalias + |-- numa_node + |-- sector_size + |-- subsystem -> ../../../../../bus/nd + |-- uevent + `-- uuid + +LIBNDCTL: btt creation example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Similar to namespaces an idle BTT device is automatically created per +region. Each time this "seed" btt device is configured and enabled a new +seed is created. Creating a BTT configuration involves two steps of +finding and idle BTT and assigning it to consume a namespace. + +:: + + static struct ndctl_btt *get_idle_btt(struct ndctl_region *region) + { + struct ndctl_btt *btt; + + ndctl_btt_foreach(region, btt) + if (!ndctl_btt_is_enabled(btt) + && !ndctl_btt_is_configured(btt)) + return btt; + + return NULL; + } + + static int configure_btt(struct ndctl_region *region, + struct btt_parameters *parameters) + { + btt = get_idle_btt(region); + + ndctl_btt_set_uuid(btt, parameters->uuid); + ndctl_btt_set_sector_size(btt, parameters->sector_size); + ndctl_btt_set_namespace(btt, parameters->ndns); + /* turn off raw mode device */ + ndctl_namespace_disable(parameters->ndns); + /* turn on btt access */ + ndctl_btt_enable(btt); + } + +Once instantiated a new inactive btt seed device will appear underneath +the region. + +Once a "namespace" is removed from a BTT that instance of the BTT device +will be deleted or otherwise reset to default values. This deletion is +only at the device model level. In order to destroy a BTT the "info +block" needs to be destroyed. Note, that to destroy a BTT the media +needs to be written in raw mode. By default, the kernel will autodetect +the presence of a BTT and disable raw mode. This autodetect behavior +can be suppressed by enabling raw mode for the namespace via the +ndctl_namespace_set_raw_mode() API. + + +Summary LIBNDCTL Diagram +------------------------ + +For the given example above, here is the view of the objects as seen by the +LIBNDCTL API:: + + +---+ + |CTX| + +-+-+ + | + +-------+ | + | DIMM0 <-+ | +---------+ +--------------+ +---------------+ + +-------+ | | +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" | + | DIMM1 <-+ +-v--+ | +---------+ +--------------+ +---------------+ + +-------+ +-+BUS0+-| +---------+ +--------------+ +----------------------+ + | DIMM2 <-+ +----+ +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" | BTT1 | + +-------+ | | +---------+ +--------------+ +---------------+------+ + | DIMM3 <-+ + +-------+ diff --git a/Documentation/driver-api/nvdimm/security.rst b/Documentation/driver-api/nvdimm/security.rst new file mode 100644 index 0000000000..eb3d35e6a9 --- /dev/null +++ b/Documentation/driver-api/nvdimm/security.rst @@ -0,0 +1,143 @@ +=============== +NVDIMM Security +=============== + +1. Introduction +--------------- + +With the introduction of Intel Device Specific Methods (DSM) v1.8 +specification [1], security DSMs are introduced. The spec added the following +security DSMs: "get security state", "set passphrase", "disable passphrase", +"unlock unit", "freeze lock", "secure erase", and "overwrite". A security_ops +data structure has been added to struct dimm in order to support the security +operations and generic APIs are exposed to allow vendor neutral operations. + +2. Sysfs Interface +------------------ +The "security" sysfs attribute is provided in the nvdimm sysfs directory. For +example: +/sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security + +The "show" attribute of that attribute will display the security state for +that DIMM. The following states are available: disabled, unlocked, locked, +frozen, and overwrite. If security is not supported, the sysfs attribute +will not be visible. + +The "store" attribute takes several commands when it is being written to +in order to support some of the security functionalities: +update <old_keyid> <new_keyid> - enable or update passphrase. +disable <keyid> - disable enabled security and remove key. +freeze - freeze changing of security states. +erase <keyid> - delete existing user encryption key. +overwrite <keyid> - wipe the entire nvdimm. +master_update <keyid> <new_keyid> - enable or update master passphrase. +master_erase <keyid> - delete existing user encryption key. + +3. Key Management +----------------- + +The key is associated to the payload by the DIMM id. For example: +# cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/nfit/id +8089-a2-1740-00000133 +The DIMM id would be provided along with the key payload (passphrase) to +the kernel. + +The security keys are managed on the basis of a single key per DIMM. The +key "passphrase" is expected to be 32bytes long. This is similar to the ATA +security specification [2]. A key is initially acquired via the request_key() +kernel API call during nvdimm unlock. It is up to the user to make sure that +all the keys are in the kernel user keyring for unlock. + +A nvdimm encrypted-key of format enc32 has the description format of: +nvdimm:<bus-provider-specific-unique-id> + +See file ``Documentation/security/keys/trusted-encrypted.rst`` for creating +encrypted-keys of enc32 format. TPM usage with a master trusted key is +preferred for sealing the encrypted-keys. + +4. Unlocking +------------ +When the DIMMs are being enumerated by the kernel, the kernel will attempt to +retrieve the key from the kernel user keyring. This is the only time +a locked DIMM can be unlocked. Once unlocked, the DIMM will remain unlocked +until reboot. Typically an entity (i.e. shell script) will inject all the +relevant encrypted-keys into the kernel user keyring during the initramfs phase. +This provides the unlock function access to all the related keys that contain +the passphrase for the respective nvdimms. It is also recommended that the +keys are injected before libnvdimm is loaded by modprobe. + +5. Update +--------- +When doing an update, it is expected that the existing key is removed from +the kernel user keyring and reinjected as different (old) key. It's irrelevant +what the key description is for the old key since we are only interested in the +keyid when doing the update operation. It is also expected that the new key +is injected with the description format described from earlier in this +document. The update command written to the sysfs attribute will be with +the format: +update <old keyid> <new keyid> + +If there is no old keyid due to a security enabling, then a 0 should be +passed in. + +6. Freeze +--------- +The freeze operation does not require any keys. The security config can be +frozen by a user with root privilege. + +7. Disable +---------- +The security disable command format is: +disable <keyid> + +An key with the current passphrase payload that is tied to the nvdimm should be +in the kernel user keyring. + +8. Secure Erase +--------------- +The command format for doing a secure erase is: +erase <keyid> + +An key with the current passphrase payload that is tied to the nvdimm should be +in the kernel user keyring. + +9. Overwrite +------------ +The command format for doing an overwrite is: +overwrite <keyid> + +Overwrite can be done without a key if security is not enabled. A key serial +of 0 can be passed in to indicate no key. + +The sysfs attribute "security" can be polled to wait on overwrite completion. +Overwrite can last tens of minutes or more depending on nvdimm size. + +An encrypted-key with the current user passphrase that is tied to the nvdimm +should be injected and its keyid should be passed in via sysfs. + +10. Master Update +----------------- +The command format for doing a master update is: +update <old keyid> <new keyid> + +The operating mechanism for master update is identical to update except the +master passphrase key is passed to the kernel. The master passphrase key +is just another encrypted-key. + +This command is only available when security is disabled. + +11. Master Erase +---------------- +The command format for doing a master erase is: +master_erase <current keyid> + +This command has the same operating mechanism as erase except the master +passphrase key is passed to the kernel. The master passphrase key is just +another encrypted-key. + +This command is only available when the master security is enabled, indicated +by the extended security status. + +[1]: https://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf + +[2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf diff --git a/Documentation/driver-api/nvmem.rst b/Documentation/driver-api/nvmem.rst new file mode 100644 index 0000000000..de221e91c8 --- /dev/null +++ b/Documentation/driver-api/nvmem.rst @@ -0,0 +1,202 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +NVMEM Subsystem +=============== + + Srinivas Kandagatla <srinivas.kandagatla@linaro.org> + +This document explains the NVMEM Framework along with the APIs provided, +and how to use it. + +1. Introduction +=============== +*NVMEM* is the abbreviation for Non Volatile Memory layer. It is used to +retrieve configuration of SOC or Device specific data from non volatile +memories like eeprom, efuses and so on. + +Before this framework existed, NVMEM drivers like eeprom were stored in +drivers/misc, where they all had to duplicate pretty much the same code to +register a sysfs file, allow in-kernel users to access the content of the +devices they were driving, etc. + +This was also a problem as far as other in-kernel users were involved, since +the solutions used were pretty much different from one driver to another, there +was a rather big abstraction leak. + +This framework aims at solve these problems. It also introduces DT +representation for consumer devices to go get the data they require (MAC +Addresses, SoC/Revision ID, part numbers, and so on) from the NVMEMs. + +NVMEM Providers ++++++++++++++++ + +NVMEM provider refers to an entity that implements methods to initialize, read +and write the non-volatile memory. + +2. Registering/Unregistering the NVMEM provider +=============================================== + +A NVMEM provider can register with NVMEM core by supplying relevant +nvmem configuration to nvmem_register(), on success core would return a valid +nvmem_device pointer. + +nvmem_unregister(nvmem) is used to unregister a previously registered provider. + +For example, a simple nvram case:: + + static int brcm_nvram_probe(struct platform_device *pdev) + { + struct nvmem_config config = { + .name = "brcm-nvram", + .reg_read = brcm_nvram_read, + }; + ... + config.dev = &pdev->dev; + config.priv = priv; + config.size = resource_size(res); + + devm_nvmem_register(&config); + } + +Users of board files can define and register nvmem cells using the +nvmem_cell_table struct:: + + static struct nvmem_cell_info foo_nvmem_cells[] = { + { + .name = "macaddr", + .offset = 0x7f00, + .bytes = ETH_ALEN, + } + }; + + static struct nvmem_cell_table foo_nvmem_cell_table = { + .nvmem_name = "i2c-eeprom", + .cells = foo_nvmem_cells, + .ncells = ARRAY_SIZE(foo_nvmem_cells), + }; + + nvmem_add_cell_table(&foo_nvmem_cell_table); + +Additionally it is possible to create nvmem cell lookup entries and register +them with the nvmem framework from machine code as shown in the example below:: + + static struct nvmem_cell_lookup foo_nvmem_lookup = { + .nvmem_name = "i2c-eeprom", + .cell_name = "macaddr", + .dev_id = "foo_mac.0", + .con_id = "mac-address", + }; + + nvmem_add_cell_lookups(&foo_nvmem_lookup, 1); + +NVMEM Consumers ++++++++++++++++ + +NVMEM consumers are the entities which make use of the NVMEM provider to +read from and to NVMEM. + +3. NVMEM cell based consumer APIs +================================= + +NVMEM cells are the data entries/fields in the NVMEM. +The NVMEM framework provides 3 APIs to read/write NVMEM cells:: + + struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name); + struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name); + + void nvmem_cell_put(struct nvmem_cell *cell); + void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); + + void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len); + int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len); + +`*nvmem_cell_get()` apis will get a reference to nvmem cell for a given id, +and nvmem_cell_read/write() can then read or write to the cell. +Once the usage of the cell is finished the consumer should call +`*nvmem_cell_put()` to free all the allocation memory for the cell. + +4. Direct NVMEM device based consumer APIs +========================================== + +In some instances it is necessary to directly read/write the NVMEM. +To facilitate such consumers NVMEM framework provides below apis:: + + struct nvmem_device *nvmem_device_get(struct device *dev, const char *name); + struct nvmem_device *devm_nvmem_device_get(struct device *dev, + const char *name); + struct nvmem_device *nvmem_device_find(void *data, + int (*match)(struct device *dev, const void *data)); + void nvmem_device_put(struct nvmem_device *nvmem); + int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset, + size_t bytes, void *buf); + int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset, + size_t bytes, void *buf); + int nvmem_device_cell_read(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf); + int nvmem_device_cell_write(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf); + +Before the consumers can read/write NVMEM directly, it should get hold +of nvmem_controller from one of the `*nvmem_device_get()` api. + +The difference between these apis and cell based apis is that these apis always +take nvmem_device as parameter. + +5. Releasing a reference to the NVMEM +===================================== + +When a consumer no longer needs the NVMEM, it has to release the reference +to the NVMEM it has obtained using the APIs mentioned in the above section. +The NVMEM framework provides 2 APIs to release a reference to the NVMEM:: + + void nvmem_cell_put(struct nvmem_cell *cell); + void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); + void nvmem_device_put(struct nvmem_device *nvmem); + void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem); + +Both these APIs are used to release a reference to the NVMEM and +devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated +with this NVMEM. + +Userspace ++++++++++ + +6. Userspace binary interface +============================== + +Userspace can read/write the raw NVMEM file located at:: + + /sys/bus/nvmem/devices/*/nvmem + +ex:: + + hexdump /sys/bus/nvmem/devices/qfprom0/nvmem + + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00 + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + ... + * + 0001000 + +7. DeviceTree Binding +===================== + +See Documentation/devicetree/bindings/nvmem/nvmem.txt + +8. NVMEM layouts +================ + +NVMEM layouts are yet another mechanism to create cells. With the device +tree binding it is possible to specify simple cells by using an offset +and a length. Sometimes, the cells doesn't have a static offset, but +the content is still well defined, e.g. tag-length-values. In this case, +the NVMEM device content has to be first parsed and the cells need to +be added accordingly. Layouts let you read the content of the NVMEM device +and let you add cells dynamically. + +Another use case for layouts is the post processing of cells. With layouts, +it is possible to associate a custom post processing hook to a cell. It +even possible to add this hook to cells not created by the layout itself. diff --git a/Documentation/driver-api/parport-lowlevel.rst b/Documentation/driver-api/parport-lowlevel.rst new file mode 100644 index 0000000000..0633d70ffd --- /dev/null +++ b/Documentation/driver-api/parport-lowlevel.rst @@ -0,0 +1,1832 @@ +=============================== +PARPORT interface documentation +=============================== + +:Time-stamp: <2000-02-24 13:30:20 twaugh> + +Described here are the following functions: + +Global functions:: + parport_register_driver + parport_unregister_driver + parport_enumerate + parport_register_device + parport_unregister_device + parport_claim + parport_claim_or_block + parport_release + parport_yield + parport_yield_blocking + parport_wait_peripheral + parport_poll_peripheral + parport_wait_event + parport_negotiate + parport_read + parport_write + parport_open + parport_close + parport_device_id + parport_device_coords + parport_find_class + parport_find_device + parport_set_timeout + +Port functions (can be overridden by low-level drivers): + + SPP:: + port->ops->read_data + port->ops->write_data + port->ops->read_status + port->ops->read_control + port->ops->write_control + port->ops->frob_control + port->ops->enable_irq + port->ops->disable_irq + port->ops->data_forward + port->ops->data_reverse + + EPP:: + port->ops->epp_write_data + port->ops->epp_read_data + port->ops->epp_write_addr + port->ops->epp_read_addr + + ECP:: + port->ops->ecp_write_data + port->ops->ecp_read_data + port->ops->ecp_write_addr + + Other:: + port->ops->nibble_read_data + port->ops->byte_read_data + port->ops->compat_write_data + +The parport subsystem comprises ``parport`` (the core port-sharing +code), and a variety of low-level drivers that actually do the port +accesses. Each low-level driver handles a particular style of port +(PC, Amiga, and so on). + +The parport interface to the device driver author can be broken down +into global functions and port functions. + +The global functions are mostly for communicating between the device +driver and the parport subsystem: acquiring a list of available ports, +claiming a port for exclusive use, and so on. They also include +``generic`` functions for doing standard things that will work on any +IEEE 1284-capable architecture. + +The port functions are provided by the low-level drivers, although the +core parport module provides generic ``defaults`` for some routines. +The port functions can be split into three groups: SPP, EPP, and ECP. + +SPP (Standard Parallel Port) functions modify so-called ``SPP`` +registers: data, status, and control. The hardware may not actually +have registers exactly like that, but the PC does and this interface is +modelled after common PC implementations. Other low-level drivers may +be able to emulate most of the functionality. + +EPP (Enhanced Parallel Port) functions are provided for reading and +writing in IEEE 1284 EPP mode, and ECP (Extended Capabilities Port) +functions are used for IEEE 1284 ECP mode. (What about BECP? Does +anyone care?) + +Hardware assistance for EPP and/or ECP transfers may or may not be +available, and if it is available it may or may not be used. If +hardware is not used, the transfer will be software-driven. In order +to cope with peripherals that only tenuously support IEEE 1284, a +low-level driver specific function is provided, for altering 'fudge +factors'. + +Global functions +================ + +parport_register_driver - register a device driver with parport +--------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_driver { + const char *name; + void (*attach) (struct parport *); + void (*detach) (struct parport *); + struct parport_driver *next; + }; + int parport_register_driver (struct parport_driver *driver); + +DESCRIPTION +^^^^^^^^^^^ + +In order to be notified about parallel ports when they are detected, +parport_register_driver should be called. Your driver will +immediately be notified of all ports that have already been detected, +and of each new port as low-level drivers are loaded. + +A ``struct parport_driver`` contains the textual name of your driver, +a pointer to a function to handle new ports, and a pointer to a +function to handle ports going away due to a low-level driver +unloading. Ports will only be detached if they are not being used +(i.e. there are no devices registered on them). + +The visible parts of the ``struct parport *`` argument given to +attach/detach are:: + + struct parport + { + struct parport *next; /* next parport in list */ + const char *name; /* port's name */ + unsigned int modes; /* bitfield of hardware modes */ + struct parport_device_info probe_info; + /* IEEE1284 info */ + int number; /* parport index */ + struct parport_operations *ops; + ... + }; + +There are other members of the structure, but they should not be +touched. + +The ``modes`` member summarises the capabilities of the underlying +hardware. It consists of flags which may be bitwise-ored together: + + ============================= =============================================== + PARPORT_MODE_PCSPP IBM PC registers are available, + i.e. functions that act on data, + control and status registers are + probably writing directly to the + hardware. + PARPORT_MODE_TRISTATE The data drivers may be turned off. + This allows the data lines to be used + for reverse (peripheral to host) + transfers. + PARPORT_MODE_COMPAT The hardware can assist with + compatibility-mode (printer) + transfers, i.e. compat_write_block. + PARPORT_MODE_EPP The hardware can assist with EPP + transfers. + PARPORT_MODE_ECP The hardware can assist with ECP + transfers. + PARPORT_MODE_DMA The hardware can use DMA, so you might + want to pass ISA DMA-able memory + (i.e. memory allocated using the + GFP_DMA flag with kmalloc) to the + low-level driver in order to take + advantage of it. + ============================= =============================================== + +There may be other flags in ``modes`` as well. + +The contents of ``modes`` is advisory only. For example, if the +hardware is capable of DMA, and PARPORT_MODE_DMA is in ``modes``, it +doesn't necessarily mean that DMA will always be used when possible. +Similarly, hardware that is capable of assisting ECP transfers won't +necessarily be used. + +RETURN VALUE +^^^^^^^^^^^^ + +Zero on success, otherwise an error code. + +ERRORS +^^^^^^ + +None. (Can it fail? Why return int?) + +EXAMPLE +^^^^^^^ + +:: + + static void lp_attach (struct parport *port) + { + ... + private = kmalloc (...); + dev[count++] = parport_register_device (...); + ... + } + + static void lp_detach (struct parport *port) + { + ... + } + + static struct parport_driver lp_driver = { + "lp", + lp_attach, + lp_detach, + NULL /* always put NULL here */ + }; + + int lp_init (void) + { + ... + if (parport_register_driver (&lp_driver)) { + /* Failed; nothing we can do. */ + return -EIO; + } + ... + } + + +SEE ALSO +^^^^^^^^ + +parport_unregister_driver, parport_register_device, parport_enumerate + + + +parport_unregister_driver - tell parport to forget about this driver +-------------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_driver { + const char *name; + void (*attach) (struct parport *); + void (*detach) (struct parport *); + struct parport_driver *next; + }; + void parport_unregister_driver (struct parport_driver *driver); + +DESCRIPTION +^^^^^^^^^^^ + +This tells parport not to notify the device driver of new ports or of +ports going away. Registered devices belonging to that driver are NOT +unregistered: parport_unregister_device must be used for each one. + +EXAMPLE +^^^^^^^ + +:: + + void cleanup_module (void) + { + ... + /* Stop notifications. */ + parport_unregister_driver (&lp_driver); + + /* Unregister devices. */ + for (i = 0; i < NUM_DEVS; i++) + parport_unregister_device (dev[i]); + ... + } + +SEE ALSO +^^^^^^^^ + +parport_register_driver, parport_enumerate + + + +parport_enumerate - retrieve a list of parallel ports (DEPRECATED) +------------------------------------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport *parport_enumerate (void); + +DESCRIPTION +^^^^^^^^^^^ + +Retrieve the first of a list of valid parallel ports for this machine. +Successive parallel ports can be found using the ``struct parport +*next`` element of the ``struct parport *`` that is returned. If ``next`` +is NULL, there are no more parallel ports in the list. The number of +ports in the list will not exceed PARPORT_MAX. + +RETURN VALUE +^^^^^^^^^^^^ + +A ``struct parport *`` describing a valid parallel port for the machine, +or NULL if there are none. + +ERRORS +^^^^^^ + +This function can return NULL to indicate that there are no parallel +ports to use. + +EXAMPLE +^^^^^^^ + +:: + + int detect_device (void) + { + struct parport *port; + + for (port = parport_enumerate (); + port != NULL; + port = port->next) { + /* Try to detect a device on the port... */ + ... + } + } + + ... + } + +NOTES +^^^^^ + +parport_enumerate is deprecated; parport_register_driver should be +used instead. + +SEE ALSO +^^^^^^^^ + +parport_register_driver, parport_unregister_driver + + + +parport_register_device - register to use a port +------------------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + typedef int (*preempt_func) (void *handle); + typedef void (*wakeup_func) (void *handle); + typedef int (*irq_func) (int irq, void *handle, struct pt_regs *); + + struct pardevice *parport_register_device(struct parport *port, + const char *name, + preempt_func preempt, + wakeup_func wakeup, + irq_func irq, + int flags, + void *handle); + +DESCRIPTION +^^^^^^^^^^^ + +Use this function to register your device driver on a parallel port +(``port``). Once you have done that, you will be able to use +parport_claim and parport_release in order to use the port. + +The (``name``) argument is the name of the device that appears in /proc +filesystem. The string must be valid for the whole lifetime of the +device (until parport_unregister_device is called). + +This function will register three callbacks into your driver: +``preempt``, ``wakeup`` and ``irq``. Each of these may be NULL in order to +indicate that you do not want a callback. + +When the ``preempt`` function is called, it is because another driver +wishes to use the parallel port. The ``preempt`` function should return +non-zero if the parallel port cannot be released yet -- if zero is +returned, the port is lost to another driver and the port must be +re-claimed before use. + +The ``wakeup`` function is called once another driver has released the +port and no other driver has yet claimed it. You can claim the +parallel port from within the ``wakeup`` function (in which case the +claim is guaranteed to succeed), or choose not to if you don't need it +now. + +If an interrupt occurs on the parallel port your driver has claimed, +the ``irq`` function will be called. (Write something about shared +interrupts here.) + +The ``handle`` is a pointer to driver-specific data, and is passed to +the callback functions. + +``flags`` may be a bitwise combination of the following flags: + + ===================== ================================================= + Flag Meaning + ===================== ================================================= + PARPORT_DEV_EXCL The device cannot share the parallel port at all. + Use this only when absolutely necessary. + ===================== ================================================= + +The typedefs are not actually defined -- they are only shown in order +to make the function prototype more readable. + +The visible parts of the returned ``struct pardevice`` are:: + + struct pardevice { + struct parport *port; /* Associated port */ + void *private; /* Device driver's 'handle' */ + ... + }; + +RETURN VALUE +^^^^^^^^^^^^ + +A ``struct pardevice *``: a handle to the registered parallel port +device that can be used for parport_claim, parport_release, etc. + +ERRORS +^^^^^^ + +A return value of NULL indicates that there was a problem registering +a device on that port. + +EXAMPLE +^^^^^^^ + +:: + + static int preempt (void *handle) + { + if (busy_right_now) + return 1; + + must_reclaim_port = 1; + return 0; + } + + static void wakeup (void *handle) + { + struct toaster *private = handle; + struct pardevice *dev = private->dev; + if (!dev) return; /* avoid races */ + + if (want_port) + parport_claim (dev); + } + + static int toaster_detect (struct toaster *private, struct parport *port) + { + private->dev = parport_register_device (port, "toaster", preempt, + wakeup, NULL, 0, + private); + if (!private->dev) + /* Couldn't register with parport. */ + return -EIO; + + must_reclaim_port = 0; + busy_right_now = 1; + parport_claim_or_block (private->dev); + ... + /* Don't need the port while the toaster warms up. */ + busy_right_now = 0; + ... + busy_right_now = 1; + if (must_reclaim_port) { + parport_claim_or_block (private->dev); + must_reclaim_port = 0; + } + ... + } + +SEE ALSO +^^^^^^^^ + +parport_unregister_device, parport_claim + + + +parport_unregister_device - finish using a port +----------------------------------------------- + +SYNPOPSIS + +:: + + #include <linux/parport.h> + + void parport_unregister_device (struct pardevice *dev); + +DESCRIPTION +^^^^^^^^^^^ + +This function is the opposite of parport_register_device. After using +parport_unregister_device, ``dev`` is no longer a valid device handle. + +You should not unregister a device that is currently claimed, although +if you do it will be released automatically. + +EXAMPLE +^^^^^^^ + +:: + + ... + kfree (dev->private); /* before we lose the pointer */ + parport_unregister_device (dev); + ... + +SEE ALSO +^^^^^^^^ + + +parport_unregister_driver + +parport_claim, parport_claim_or_block - claim the parallel port for a device +---------------------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_claim (struct pardevice *dev); + int parport_claim_or_block (struct pardevice *dev); + +DESCRIPTION +^^^^^^^^^^^ + +These functions attempt to gain control of the parallel port on which +``dev`` is registered. ``parport_claim`` does not block, but +``parport_claim_or_block`` may do. (Put something here about blocking +interruptibly or non-interruptibly.) + +You should not try to claim a port that you have already claimed. + +RETURN VALUE +^^^^^^^^^^^^ + +A return value of zero indicates that the port was successfully +claimed, and the caller now has possession of the parallel port. + +If ``parport_claim_or_block`` blocks before returning successfully, the +return value is positive. + +ERRORS +^^^^^^ + +========== ========================================================== + -EAGAIN The port is unavailable at the moment, but another attempt + to claim it may succeed. +========== ========================================================== + +SEE ALSO +^^^^^^^^ + + +parport_release + +parport_release - release the parallel port +------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + void parport_release (struct pardevice *dev); + +DESCRIPTION +^^^^^^^^^^^ + +Once a parallel port device has been claimed, it can be released using +``parport_release``. It cannot fail, but you should not release a +device that you do not have possession of. + +EXAMPLE +^^^^^^^ + +:: + + static size_t write (struct pardevice *dev, const void *buf, + size_t len) + { + ... + written = dev->port->ops->write_ecp_data (dev->port, buf, + len); + parport_release (dev); + ... + } + + +SEE ALSO +^^^^^^^^ + +change_mode, parport_claim, parport_claim_or_block, parport_yield + + + +parport_yield, parport_yield_blocking - temporarily release a parallel port +--------------------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_yield (struct pardevice *dev) + int parport_yield_blocking (struct pardevice *dev); + +DESCRIPTION +^^^^^^^^^^^ + +When a driver has control of a parallel port, it may allow another +driver to temporarily ``borrow`` it. ``parport_yield`` does not block; +``parport_yield_blocking`` may do. + +RETURN VALUE +^^^^^^^^^^^^ + +A return value of zero indicates that the caller still owns the port +and the call did not block. + +A positive return value from ``parport_yield_blocking`` indicates that +the caller still owns the port and the call blocked. + +A return value of -EAGAIN indicates that the caller no longer owns the +port, and it must be re-claimed before use. + +ERRORS +^^^^^^ + +========= ========================================================== + -EAGAIN Ownership of the parallel port was given away. +========= ========================================================== + +SEE ALSO +^^^^^^^^ + +parport_release + + + +parport_wait_peripheral - wait for status lines, up to 35ms +----------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_wait_peripheral (struct parport *port, + unsigned char mask, + unsigned char val); + +DESCRIPTION +^^^^^^^^^^^ + +Wait for the status lines in mask to match the values in val. + +RETURN VALUE +^^^^^^^^^^^^ + +======== ========================================================== + -EINTR a signal is pending + 0 the status lines in mask have values in val + 1 timed out while waiting (35ms elapsed) +======== ========================================================== + +SEE ALSO +^^^^^^^^ + +parport_poll_peripheral + + + +parport_poll_peripheral - wait for status lines, in usec +-------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_poll_peripheral (struct parport *port, + unsigned char mask, + unsigned char val, + int usec); + +DESCRIPTION +^^^^^^^^^^^ + +Wait for the status lines in mask to match the values in val. + +RETURN VALUE +^^^^^^^^^^^^ + +======== ========================================================== + -EINTR a signal is pending + 0 the status lines in mask have values in val + 1 timed out while waiting (usec microseconds have elapsed) +======== ========================================================== + +SEE ALSO +^^^^^^^^ + +parport_wait_peripheral + + + +parport_wait_event - wait for an event on a port +------------------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_wait_event (struct parport *port, signed long timeout) + +DESCRIPTION +^^^^^^^^^^^ + +Wait for an event (e.g. interrupt) on a port. The timeout is in +jiffies. + +RETURN VALUE +^^^^^^^^^^^^ + +======= ========================================================== + 0 success + <0 error (exit as soon as possible) + >0 timed out +======= ========================================================== + +parport_negotiate - perform IEEE 1284 negotiation +------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_negotiate (struct parport *, int mode); + +DESCRIPTION +^^^^^^^^^^^ + +Perform IEEE 1284 negotiation. + +RETURN VALUE +^^^^^^^^^^^^ + +======= ========================================================== + 0 handshake OK; IEEE 1284 peripheral and mode available + -1 handshake failed; peripheral not compliant (or none present) + 1 handshake OK; IEEE 1284 peripheral present but mode not + available +======= ========================================================== + +SEE ALSO +^^^^^^^^ + +parport_read, parport_write + + + +parport_read - read data from device +------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + ssize_t parport_read (struct parport *, void *buf, size_t len); + +DESCRIPTION +^^^^^^^^^^^ + +Read data from device in current IEEE 1284 transfer mode. This only +works for modes that support reverse data transfer. + +RETURN VALUE +^^^^^^^^^^^^ + +If negative, an error code; otherwise the number of bytes transferred. + +SEE ALSO +^^^^^^^^ + +parport_write, parport_negotiate + + + +parport_write - write data to device +------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + ssize_t parport_write (struct parport *, const void *buf, size_t len); + +DESCRIPTION +^^^^^^^^^^^ + +Write data to device in current IEEE 1284 transfer mode. This only +works for modes that support forward data transfer. + +RETURN VALUE +^^^^^^^^^^^^ + +If negative, an error code; otherwise the number of bytes transferred. + +SEE ALSO +^^^^^^^^ + +parport_read, parport_negotiate + + + +parport_open - register device for particular device number +----------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct pardevice *parport_open (int devnum, const char *name, + int (*pf) (void *), + void (*kf) (void *), + void (*irqf) (int, void *, + struct pt_regs *), + int flags, void *handle); + +DESCRIPTION +^^^^^^^^^^^ + +This is like parport_register_device but takes a device number instead +of a pointer to a struct parport. + +RETURN VALUE +^^^^^^^^^^^^ + +See parport_register_device. If no device is associated with devnum, +NULL is returned. + +SEE ALSO +^^^^^^^^ + +parport_register_device + + + +parport_close - unregister device for particular device number +-------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + void parport_close (struct pardevice *dev); + +DESCRIPTION +^^^^^^^^^^^ + +This is the equivalent of parport_unregister_device for parport_open. + +SEE ALSO +^^^^^^^^ + +parport_unregister_device, parport_open + + + +parport_device_id - obtain IEEE 1284 Device ID +---------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + ssize_t parport_device_id (int devnum, char *buffer, size_t len); + +DESCRIPTION +^^^^^^^^^^^ + +Obtains the IEEE 1284 Device ID associated with a given device. + +RETURN VALUE +^^^^^^^^^^^^ + +If negative, an error code; otherwise, the number of bytes of buffer +that contain the device ID. The format of the device ID is as +follows:: + + [length][ID] + +The first two bytes indicate the inclusive length of the entire Device +ID, and are in big-endian order. The ID is a sequence of pairs of the +form:: + + key:value; + +NOTES +^^^^^ + +Many devices have ill-formed IEEE 1284 Device IDs. + +SEE ALSO +^^^^^^^^ + +parport_find_class, parport_find_device + + + +parport_device_coords - convert device number to device coordinates +------------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_device_coords (int devnum, int *parport, int *mux, + int *daisy); + +DESCRIPTION +^^^^^^^^^^^ + +Convert between device number (zero-based) and device coordinates +(port, multiplexor, daisy chain address). + +RETURN VALUE +^^^^^^^^^^^^ + +Zero on success, in which case the coordinates are (``*parport``, ``*mux``, +``*daisy``). + +SEE ALSO +^^^^^^^^ + +parport_open, parport_device_id + + + +parport_find_class - find a device by its class +----------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + typedef enum { + PARPORT_CLASS_LEGACY = 0, /* Non-IEEE1284 device */ + PARPORT_CLASS_PRINTER, + PARPORT_CLASS_MODEM, + PARPORT_CLASS_NET, + PARPORT_CLASS_HDC, /* Hard disk controller */ + PARPORT_CLASS_PCMCIA, + PARPORT_CLASS_MEDIA, /* Multimedia device */ + PARPORT_CLASS_FDC, /* Floppy disk controller */ + PARPORT_CLASS_PORTS, + PARPORT_CLASS_SCANNER, + PARPORT_CLASS_DIGCAM, + PARPORT_CLASS_OTHER, /* Anything else */ + PARPORT_CLASS_UNSPEC, /* No CLS field in ID */ + PARPORT_CLASS_SCSIADAPTER + } parport_device_class; + + int parport_find_class (parport_device_class cls, int from); + +DESCRIPTION +^^^^^^^^^^^ + +Find a device by class. The search starts from device number from+1. + +RETURN VALUE +^^^^^^^^^^^^ + +The device number of the next device in that class, or -1 if no such +device exists. + +NOTES +^^^^^ + +Example usage:: + + int devnum = -1; + while ((devnum = parport_find_class (PARPORT_CLASS_DIGCAM, devnum)) != -1) { + struct pardevice *dev = parport_open (devnum, ...); + ... + } + +SEE ALSO +^^^^^^^^ + +parport_find_device, parport_open, parport_device_id + + + +parport_find_device - find a device by its class +------------------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + int parport_find_device (const char *mfg, const char *mdl, int from); + +DESCRIPTION +^^^^^^^^^^^ + +Find a device by vendor and model. The search starts from device +number from+1. + +RETURN VALUE +^^^^^^^^^^^^ + +The device number of the next device matching the specifications, or +-1 if no such device exists. + +NOTES +^^^^^ + +Example usage:: + + int devnum = -1; + while ((devnum = parport_find_device ("IOMEGA", "ZIP+", devnum)) != -1) { + struct pardevice *dev = parport_open (devnum, ...); + ... + } + +SEE ALSO +^^^^^^^^ + +parport_find_class, parport_open, parport_device_id + + + +parport_set_timeout - set the inactivity timeout +------------------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + long parport_set_timeout (struct pardevice *dev, long inactivity); + +DESCRIPTION +^^^^^^^^^^^ + +Set the inactivity timeout, in jiffies, for a registered device. The +previous timeout is returned. + +RETURN VALUE +^^^^^^^^^^^^ + +The previous timeout, in jiffies. + +NOTES +^^^^^ + +Some of the port->ops functions for a parport may take time, owing to +delays at the peripheral. After the peripheral has not responded for +``inactivity`` jiffies, a timeout will occur and the blocking function +will return. + +A timeout of 0 jiffies is a special case: the function must do as much +as it can without blocking or leaving the hardware in an unknown +state. If port operations are performed from within an interrupt +handler, for instance, a timeout of 0 jiffies should be used. + +Once set for a registered device, the timeout will remain at the set +value until set again. + +SEE ALSO +^^^^^^^^ + +port->ops->xxx_read/write_yyy + + + + +PORT FUNCTIONS +============== + +The functions in the port->ops structure (struct parport_operations) +are provided by the low-level driver responsible for that port. + +port->ops->read_data - read the data register +--------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + unsigned char (*read_data) (struct parport *port); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +If port->modes contains the PARPORT_MODE_TRISTATE flag and the +PARPORT_CONTROL_DIRECTION bit in the control register is set, this +returns the value on the data pins. If port->modes contains the +PARPORT_MODE_TRISTATE flag and the PARPORT_CONTROL_DIRECTION bit is +not set, the return value _may_ be the last value written to the data +register. Otherwise the return value is undefined. + +SEE ALSO +^^^^^^^^ + +write_data, read_status, write_control + + + +port->ops->write_data - write the data register +----------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + void (*write_data) (struct parport *port, unsigned char d); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Writes to the data register. May have side-effects (a STROBE pulse, +for instance). + +SEE ALSO +^^^^^^^^ + +read_data, read_status, write_control + + + +port->ops->read_status - read the status register +------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + unsigned char (*read_status) (struct parport *port); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Reads from the status register. This is a bitmask: + +- PARPORT_STATUS_ERROR (printer fault, "nFault") +- PARPORT_STATUS_SELECT (on-line, "Select") +- PARPORT_STATUS_PAPEROUT (no paper, "PError") +- PARPORT_STATUS_ACK (handshake, "nAck") +- PARPORT_STATUS_BUSY (busy, "Busy") + +There may be other bits set. + +SEE ALSO +^^^^^^^^ + +read_data, write_data, write_control + + + +port->ops->read_control - read the control register +--------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + unsigned char (*read_control) (struct parport *port); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Returns the last value written to the control register (either from +write_control or frob_control). No port access is performed. + +SEE ALSO +^^^^^^^^ + +read_data, write_data, read_status, write_control + + + +port->ops->write_control - write the control register +----------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + void (*write_control) (struct parport *port, unsigned char s); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Writes to the control register. This is a bitmask:: + + _______ + - PARPORT_CONTROL_STROBE (nStrobe) + _______ + - PARPORT_CONTROL_AUTOFD (nAutoFd) + _____ + - PARPORT_CONTROL_INIT (nInit) + _________ + - PARPORT_CONTROL_SELECT (nSelectIn) + +SEE ALSO +^^^^^^^^ + +read_data, write_data, read_status, frob_control + + + +port->ops->frob_control - write control register bits +----------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + unsigned char (*frob_control) (struct parport *port, + unsigned char mask, + unsigned char val); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +This is equivalent to reading from the control register, masking out +the bits in mask, exclusive-or'ing with the bits in val, and writing +the result to the control register. + +As some ports don't allow reads from the control port, a software copy +of its contents is maintained, so frob_control is in fact only one +port access. + +SEE ALSO +^^^^^^^^ + +read_data, write_data, read_status, write_control + + + +port->ops->enable_irq - enable interrupt generation +--------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + void (*enable_irq) (struct parport *port); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +The parallel port hardware is instructed to generate interrupts at +appropriate moments, although those moments are +architecture-specific. For the PC architecture, interrupts are +commonly generated on the rising edge of nAck. + +SEE ALSO +^^^^^^^^ + +disable_irq + + + +port->ops->disable_irq - disable interrupt generation +----------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + void (*disable_irq) (struct parport *port); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +The parallel port hardware is instructed not to generate interrupts. +The interrupt itself is not masked. + +SEE ALSO +^^^^^^^^ + +enable_irq + + + +port->ops->data_forward - enable data drivers +--------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + void (*data_forward) (struct parport *port); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Enables the data line drivers, for 8-bit host-to-peripheral +communications. + +SEE ALSO +^^^^^^^^ + +data_reverse + + + +port->ops->data_reverse - tristate the buffer +--------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + void (*data_reverse) (struct parport *port); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Places the data bus in a high impedance state, if port->modes has the +PARPORT_MODE_TRISTATE bit set. + +SEE ALSO +^^^^^^^^ + +data_forward + + + +port->ops->epp_write_data - write EPP data +------------------------------------------ + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*epp_write_data) (struct parport *port, const void *buf, + size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Writes data in EPP mode, and returns the number of bytes written. + +The ``flags`` parameter may be one or more of the following, +bitwise-or'ed together: + +======================= ================================================= +PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and + 32-bit registers. However, if a transfer + times out, the return value may be unreliable. +======================= ================================================= + +SEE ALSO +^^^^^^^^ + +epp_read_data, epp_write_addr, epp_read_addr + + + +port->ops->epp_read_data - read EPP data +---------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*epp_read_data) (struct parport *port, void *buf, + size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Reads data in EPP mode, and returns the number of bytes read. + +The ``flags`` parameter may be one or more of the following, +bitwise-or'ed together: + +======================= ================================================= +PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and + 32-bit registers. However, if a transfer + times out, the return value may be unreliable. +======================= ================================================= + +SEE ALSO +^^^^^^^^ + +epp_write_data, epp_write_addr, epp_read_addr + + + +port->ops->epp_write_addr - write EPP address +--------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*epp_write_addr) (struct parport *port, + const void *buf, size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Writes EPP addresses (8 bits each), and returns the number written. + +The ``flags`` parameter may be one or more of the following, +bitwise-or'ed together: + +======================= ================================================= +PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and + 32-bit registers. However, if a transfer + times out, the return value may be unreliable. +======================= ================================================= + +(Does PARPORT_EPP_FAST make sense for this function?) + +SEE ALSO +^^^^^^^^ + +epp_write_data, epp_read_data, epp_read_addr + + + +port->ops->epp_read_addr - read EPP address +------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*epp_read_addr) (struct parport *port, void *buf, + size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Reads EPP addresses (8 bits each), and returns the number read. + +The ``flags`` parameter may be one or more of the following, +bitwise-or'ed together: + +======================= ================================================= +PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and + 32-bit registers. However, if a transfer + times out, the return value may be unreliable. +======================= ================================================= + +(Does PARPORT_EPP_FAST make sense for this function?) + +SEE ALSO +^^^^^^^^ + +epp_write_data, epp_read_data, epp_write_addr + + + +port->ops->ecp_write_data - write a block of ECP data +----------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*ecp_write_data) (struct parport *port, + const void *buf, size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Writes a block of ECP data. The ``flags`` parameter is ignored. + +RETURN VALUE +^^^^^^^^^^^^ + +The number of bytes written. + +SEE ALSO +^^^^^^^^ + +ecp_read_data, ecp_write_addr + + + +port->ops->ecp_read_data - read a block of ECP data +--------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*ecp_read_data) (struct parport *port, + void *buf, size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Reads a block of ECP data. The ``flags`` parameter is ignored. + +RETURN VALUE +^^^^^^^^^^^^ + +The number of bytes read. NB. There may be more unread data in a +FIFO. Is there a way of stunning the FIFO to prevent this? + +SEE ALSO +^^^^^^^^ + +ecp_write_block, ecp_write_addr + + + +port->ops->ecp_write_addr - write a block of ECP addresses +---------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*ecp_write_addr) (struct parport *port, + const void *buf, size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Writes a block of ECP addresses. The ``flags`` parameter is ignored. + +RETURN VALUE +^^^^^^^^^^^^ + +The number of bytes written. + +NOTES +^^^^^ + +This may use a FIFO, and if so shall not return until the FIFO is empty. + +SEE ALSO +^^^^^^^^ + +ecp_read_data, ecp_write_data + + + +port->ops->nibble_read_data - read a block of data in nibble mode +----------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*nibble_read_data) (struct parport *port, + void *buf, size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Reads a block of data in nibble mode. The ``flags`` parameter is ignored. + +RETURN VALUE +^^^^^^^^^^^^ + +The number of whole bytes read. + +SEE ALSO +^^^^^^^^ + +byte_read_data, compat_write_data + + + +port->ops->byte_read_data - read a block of data in byte mode +------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*byte_read_data) (struct parport *port, + void *buf, size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Reads a block of data in byte mode. The ``flags`` parameter is ignored. + +RETURN VALUE +^^^^^^^^^^^^ + +The number of bytes read. + +SEE ALSO +^^^^^^^^ + +nibble_read_data, compat_write_data + + + +port->ops->compat_write_data - write a block of data in compatibility mode +-------------------------------------------------------------------------- + +SYNOPSIS +^^^^^^^^ + +:: + + #include <linux/parport.h> + + struct parport_operations { + ... + size_t (*compat_write_data) (struct parport *port, + const void *buf, size_t len, int flags); + ... + }; + +DESCRIPTION +^^^^^^^^^^^ + +Writes a block of data in compatibility mode. The ``flags`` parameter +is ignored. + +RETURN VALUE +^^^^^^^^^^^^ + +The number of bytes written. + +SEE ALSO +^^^^^^^^ + +nibble_read_data, byte_read_data diff --git a/Documentation/driver-api/pci/index.rst b/Documentation/driver-api/pci/index.rst new file mode 100644 index 0000000000..c6cf1fef61 --- /dev/null +++ b/Documentation/driver-api/pci/index.rst @@ -0,0 +1,22 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================ +The Linux PCI driver implementer's API guide +============================================ + +.. class:: toc-title + + Table of contents + +.. toctree:: + :maxdepth: 2 + + pci + p2pdma + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/pci/p2pdma.rst b/Documentation/driver-api/pci/p2pdma.rst new file mode 100644 index 0000000000..d0b241628c --- /dev/null +++ b/Documentation/driver-api/pci/p2pdma.rst @@ -0,0 +1,131 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================ +PCI Peer-to-Peer DMA Support +============================ + +The PCI bus has pretty decent support for performing DMA transfers +between two devices on the bus. This type of transaction is henceforth +called Peer-to-Peer (or P2P). However, there are a number of issues that +make P2P transactions tricky to do in a perfectly safe way. + +One of the biggest issues is that PCI doesn't require forwarding +transactions between hierarchy domains, and in PCIe, each Root Port +defines a separate hierarchy domain. To make things worse, there is no +simple way to determine if a given Root Complex supports this or not. +(See PCIe r4.0, sec 1.3.1). Therefore, as of this writing, the kernel +only supports doing P2P when the endpoints involved are all behind the +same PCI bridge, as such devices are all in the same PCI hierarchy +domain, and the spec guarantees that all transactions within the +hierarchy will be routable, but it does not require routing +between hierarchies. + +The second issue is that to make use of existing interfaces in Linux, +memory that is used for P2P transactions needs to be backed by struct +pages. However, PCI BARs are not typically cache coherent so there are +a few corner case gotchas with these pages so developers need to +be careful about what they do with them. + + +Driver Writer's Guide +===================== + +In a given P2P implementation there may be three or more different +types of kernel drivers in play: + +* Provider - A driver which provides or publishes P2P resources like + memory or doorbell registers to other drivers. +* Client - A driver which makes use of a resource by setting up a + DMA transaction to or from it. +* Orchestrator - A driver which orchestrates the flow of data between + clients and providers. + +In many cases there could be overlap between these three types (i.e., +it may be typical for a driver to be both a provider and a client). + +For example, in the NVMe Target Copy Offload implementation: + +* The NVMe PCI driver is both a client, provider and orchestrator + in that it exposes any CMB (Controller Memory Buffer) as a P2P memory + resource (provider), it accepts P2P memory pages as buffers in requests + to be used directly (client) and it can also make use of the CMB as + submission queue entries (orchestrator). +* The RDMA driver is a client in this arrangement so that an RNIC + can DMA directly to the memory exposed by the NVMe device. +* The NVMe Target driver (nvmet) can orchestrate the data from the RNIC + to the P2P memory (CMB) and then to the NVMe device (and vice versa). + +This is currently the only arrangement supported by the kernel but +one could imagine slight tweaks to this that would allow for the same +functionality. For example, if a specific RNIC added a BAR with some +memory behind it, its driver could add support as a P2P provider and +then the NVMe Target could use the RNIC's memory instead of the CMB +in cases where the NVMe cards in use do not have CMB support. + + +Provider Drivers +---------------- + +A provider simply needs to register a BAR (or a portion of a BAR) +as a P2P DMA resource using :c:func:`pci_p2pdma_add_resource()`. +This will register struct pages for all the specified memory. + +After that it may optionally publish all of its resources as +P2P memory using :c:func:`pci_p2pmem_publish()`. This will allow +any orchestrator drivers to find and use the memory. When marked in +this way, the resource must be regular memory with no side effects. + +For the time being this is fairly rudimentary in that all resources +are typically going to be P2P memory. Future work will likely expand +this to include other types of resources like doorbells. + + +Client Drivers +-------------- + +A client driver only has to use the mapping API :c:func:`dma_map_sg()` +and :c:func:`dma_unmap_sg()` functions as usual, and the implementation +will do the right thing for the P2P capable memory. + + +Orchestrator Drivers +-------------------- + +The first task an orchestrator driver must do is compile a list of +all client devices that will be involved in a given transaction. For +example, the NVMe Target driver creates a list including the namespace +block device and the RNIC in use. If the orchestrator has access to +a specific P2P provider to use it may check compatibility using +:c:func:`pci_p2pdma_distance()` otherwise it may find a memory provider +that's compatible with all clients using :c:func:`pci_p2pmem_find()`. +If more than one provider is supported, the one nearest to all the clients will +be chosen first. If more than one provider is an equal distance away, the +one returned will be chosen at random (it is not an arbitrary but +truly random). This function returns the PCI device to use for the provider +with a reference taken and therefore when it's no longer needed it should be +returned with pci_dev_put(). + +Once a provider is selected, the orchestrator can then use +:c:func:`pci_alloc_p2pmem()` and :c:func:`pci_free_p2pmem()` to +allocate P2P memory from the provider. :c:func:`pci_p2pmem_alloc_sgl()` +and :c:func:`pci_p2pmem_free_sgl()` are convenience functions for +allocating scatter-gather lists with P2P memory. + +Struct Page Caveats +------------------- + +Driver writers should be very careful about not passing these special +struct pages to code that isn't prepared for it. At this time, the kernel +interfaces do not have any checks for ensuring this. This obviously +precludes passing these pages to userspace. + +P2P memory is also technically IO memory but should never have any side +effects behind it. Thus, the order of loads and stores should not be important +and ioreadX(), iowriteX() and friends should not be necessary. + + +P2P DMA Support Library +======================= + +.. kernel-doc:: drivers/pci/p2pdma.c + :export: diff --git a/Documentation/driver-api/pci/pci.rst b/Documentation/driver-api/pci/pci.rst new file mode 100644 index 0000000000..4843cfad4f --- /dev/null +++ b/Documentation/driver-api/pci/pci.rst @@ -0,0 +1,47 @@ +PCI Support Library +------------------- + +.. kernel-doc:: drivers/pci/pci.c + :export: + +.. kernel-doc:: drivers/pci/pci-driver.c + :export: + +.. kernel-doc:: drivers/pci/remove.c + :export: + +.. kernel-doc:: drivers/pci/search.c + :export: + +.. kernel-doc:: drivers/pci/msi/msi.c + :export: + +.. kernel-doc:: drivers/pci/bus.c + :export: + +.. kernel-doc:: drivers/pci/access.c + :export: + +.. kernel-doc:: drivers/pci/irq.c + :export: + +.. kernel-doc:: drivers/pci/probe.c + :export: + +.. kernel-doc:: drivers/pci/slot.c + :export: + +.. kernel-doc:: drivers/pci/rom.c + :export: + +.. kernel-doc:: drivers/pci/iov.c + :export: + +.. kernel-doc:: drivers/pci/pci-sysfs.c + :internal: + +PCI Hotplug Support Library +--------------------------- + +.. kernel-doc:: drivers/pci/hotplug/pci_hotplug_core.c + :export: diff --git a/Documentation/driver-api/phy/index.rst b/Documentation/driver-api/phy/index.rst new file mode 100644 index 0000000000..69ba1216de --- /dev/null +++ b/Documentation/driver-api/phy/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +Generic PHY Framework +===================== + +.. toctree:: + + phy + samsung-usb2 + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` + diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst new file mode 100644 index 0000000000..81785c084f --- /dev/null +++ b/Documentation/driver-api/phy/phy.rst @@ -0,0 +1,224 @@ +============= +PHY subsystem +============= + +:Author: Kishon Vijay Abraham I <kishon@ti.com> + +This document explains the Generic PHY Framework along with the APIs provided, +and how-to-use. + +Introduction +============ + +*PHY* is the abbreviation for physical layer. It is used to connect a device +to the physical medium e.g., the USB controller has a PHY to provide functions +such as serialization, de-serialization, encoding, decoding and is responsible +for obtaining the required data transmission rate. Note that some USB +controllers have PHY functionality embedded into it and others use an external +PHY. Other peripherals that use PHY include Wireless LAN, Ethernet, +SATA etc. + +The intention of creating this framework is to bring the PHY drivers spread +all over the Linux kernel to drivers/phy to increase code re-use and for +better code maintainability. + +This framework will be of use only to devices that use external PHY (PHY +functionality is not embedded within the controller). + +Registering/Unregistering the PHY provider +========================================== + +PHY provider refers to an entity that implements one or more PHY instances. +For the simple case where the PHY provider implements only a single instance of +the PHY, the framework provides its own implementation of of_xlate in +of_phy_simple_xlate. If the PHY provider implements multiple instances, it +should provide its own implementation of of_xlate. of_xlate is used only for +dt boot case. + +:: + + #define of_phy_provider_register(dev, xlate) \ + __of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate)) + + #define devm_of_phy_provider_register(dev, xlate) \ + __devm_of_phy_provider_register((dev), NULL, THIS_MODULE, + (xlate)) + +of_phy_provider_register and devm_of_phy_provider_register macros can be used to +register the phy_provider and it takes device and of_xlate as +arguments. For the dt boot case, all PHY providers should use one of the above +2 macros to register the PHY provider. + +Often the device tree nodes associated with a PHY provider will contain a set +of children that each represent a single PHY. Some bindings may nest the child +nodes within extra levels for context and extensibility, in which case the low +level of_phy_provider_register_full() and devm_of_phy_provider_register_full() +macros can be used to override the node containing the children. + +:: + + #define of_phy_provider_register_full(dev, children, xlate) \ + __of_phy_provider_register(dev, children, THIS_MODULE, xlate) + + #define devm_of_phy_provider_register_full(dev, children, xlate) \ + __devm_of_phy_provider_register_full(dev, children, + THIS_MODULE, xlate) + + void devm_of_phy_provider_unregister(struct device *dev, + struct phy_provider *phy_provider); + void of_phy_provider_unregister(struct phy_provider *phy_provider); + +devm_of_phy_provider_unregister and of_phy_provider_unregister can be used to +unregister the PHY. + +Creating the PHY +================ + +The PHY driver should create the PHY in order for other peripheral controllers +to make use of it. The PHY framework provides 2 APIs to create the PHY. + +:: + + struct phy *phy_create(struct device *dev, struct device_node *node, + const struct phy_ops *ops); + struct phy *devm_phy_create(struct device *dev, + struct device_node *node, + const struct phy_ops *ops); + +The PHY drivers can use one of the above 2 APIs to create the PHY by passing +the device pointer and phy ops. +phy_ops is a set of function pointers for performing PHY operations such as +init, exit, power_on and power_off. + +Inorder to dereference the private data (in phy_ops), the phy provider driver +can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in +phy_ops to get back the private data. + +Getting a reference to the PHY +============================== + +Before the controller can make use of the PHY, it has to get a reference to +it. This framework provides the following APIs to get a reference to the PHY. + +:: + + struct phy *phy_get(struct device *dev, const char *string); + struct phy *devm_phy_get(struct device *dev, const char *string); + struct phy *devm_phy_optional_get(struct device *dev, + const char *string); + struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, + const char *con_id); + struct phy *devm_of_phy_optional_get(struct device *dev, + struct device_node *np, + const char *con_id); + struct phy *devm_of_phy_get_by_index(struct device *dev, + struct device_node *np, + int index); + +phy_get, devm_phy_get and devm_phy_optional_get can be used to get the PHY. +In the case of dt boot, the string arguments +should contain the phy name as given in the dt data and in the case of +non-dt boot, it should contain the label of the PHY. The two +devm_phy_get associates the device with the PHY using devres on +successful PHY get. On driver detach, release function is invoked on +the devres data and devres data is freed. +The _optional_get variants should be used when the phy is optional. These +functions will never return -ENODEV, but instead return NULL when +the phy cannot be found. +Some generic drivers, such as ehci, may use multiple phys. In this case, +devm_of_phy_get or devm_of_phy_get_by_index can be used to get a phy +reference based on name or index. + +It should be noted that NULL is a valid phy reference. All phy +consumer calls on the NULL phy become NOPs. That is the release calls, +the phy_init() and phy_exit() calls, and phy_power_on() and +phy_power_off() calls are all NOP when applied to a NULL phy. The NULL +phy is useful in devices for handling optional phy devices. + +Order of API calls +================== + +The general order of calls should be:: + + [devm_][of_]phy_get() + phy_init() + phy_power_on() + [phy_set_mode[_ext]()] + ... + phy_power_off() + phy_exit() + [[of_]phy_put()] + +Some PHY drivers may not implement :c:func:`phy_init` or :c:func:`phy_power_on`, +but controllers should always call these functions to be compatible with other +PHYs. Some PHYs may require :c:func:`phy_set_mode <phy_set_mode_ext>`, while +others may use a default mode (typically configured via devicetree or other +firmware). For compatibility, you should always call this function if you know +what mode you will be using. Generally, this function should be called after +:c:func:`phy_power_on`, although some PHY drivers may allow it at any time. + +Releasing a reference to the PHY +================================ + +When the controller no longer needs the PHY, it has to release the reference +to the PHY it has obtained using the APIs mentioned in the above section. The +PHY framework provides 2 APIs to release a reference to the PHY. + +:: + + void phy_put(struct phy *phy); + void devm_phy_put(struct device *dev, struct phy *phy); + +Both these APIs are used to release a reference to the PHY and devm_phy_put +destroys the devres associated with this PHY. + +Destroying the PHY +================== + +When the driver that created the PHY is unloaded, it should destroy the PHY it +created using one of the following 2 APIs:: + + void phy_destroy(struct phy *phy); + void devm_phy_destroy(struct device *dev, struct phy *phy); + +Both these APIs destroy the PHY and devm_phy_destroy destroys the devres +associated with this PHY. + +PM Runtime +========== + +This subsystem is pm runtime enabled. So while creating the PHY, +pm_runtime_enable of the phy device created by this subsystem is called and +while destroying the PHY, pm_runtime_disable is called. Note that the phy +device created by this subsystem will be a child of the device that calls +phy_create (PHY provider device). + +So pm_runtime_get_sync of the phy_device created by this subsystem will invoke +pm_runtime_get_sync of PHY provider device because of parent-child relationship. +It should also be noted that phy_power_on and phy_power_off performs +phy_pm_runtime_get_sync and phy_pm_runtime_put respectively. +There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync, +phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and +phy_pm_runtime_forbid for performing PM operations. + +PHY Mappings +============ + +In order to get reference to a PHY without help from DeviceTree, the framework +offers lookups which can be compared to clkdev that allow clk structures to be +bound to devices. A lookup can be made during runtime when a handle to the +struct phy already exists. + +The framework offers the following API for registering and unregistering the +lookups:: + + int phy_create_lookup(struct phy *phy, const char *con_id, + const char *dev_id); + void phy_remove_lookup(struct phy *phy, const char *con_id, + const char *dev_id); + +DeviceTree Binding +================== + +The documentation for PHY dt binding can be found @ +Documentation/devicetree/bindings/phy/phy-bindings.txt diff --git a/Documentation/driver-api/phy/samsung-usb2.rst b/Documentation/driver-api/phy/samsung-usb2.rst new file mode 100644 index 0000000000..c48c8b9797 --- /dev/null +++ b/Documentation/driver-api/phy/samsung-usb2.rst @@ -0,0 +1,137 @@ +==================================== +Samsung USB 2.0 PHY adaptation layer +==================================== + +1. Description +-------------- + +The architecture of the USB 2.0 PHY module in Samsung SoCs is similar +among many SoCs. In spite of the similarities it proved difficult to +create a one driver that would fit all these PHY controllers. Often +the differences were minor and were found in particular bits of the +registers of the PHY. In some rare cases the order of register writes or +the PHY powering up process had to be altered. This adaptation layer is +a compromise between having separate drivers and having a single driver +with added support for many special cases. + +2. Files description +-------------------- + +- phy-samsung-usb2.c + This is the main file of the adaptation layer. This file contains + the probe function and provides two callbacks to the Generic PHY + Framework. This two callbacks are used to power on and power off the + phy. They carry out the common work that has to be done on all version + of the PHY module. Depending on which SoC was chosen they execute SoC + specific callbacks. The specific SoC version is selected by choosing + the appropriate compatible string. In addition, this file contains + struct of_device_id definitions for particular SoCs. + +- phy-samsung-usb2.h + This is the include file. It declares the structures used by this + driver. In addition it should contain extern declarations for + structures that describe particular SoCs. + +3. Supporting SoCs +------------------ + +To support a new SoC a new file should be added to the drivers/phy +directory. Each SoC's configuration is stored in an instance of the +struct samsung_usb2_phy_config:: + + struct samsung_usb2_phy_config { + const struct samsung_usb2_common_phy *phys; + int (*rate_to_clk)(unsigned long, u32 *); + unsigned int num_phys; + bool has_mode_switch; + }; + +The num_phys is the number of phys handled by the driver. `*phys` is an +array that contains the configuration for each phy. The has_mode_switch +property is a boolean flag that determines whether the SoC has USB host +and device on a single pair of pins. If so, a special register has to +be modified to change the internal routing of these pins between a USB +device or host module. + +For example the configuration for Exynos 4210 is following:: + + const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = { + .has_mode_switch = 0, + .num_phys = EXYNOS4210_NUM_PHYS, + .phys = exynos4210_phys, + .rate_to_clk = exynos4210_rate_to_clk, + } + +- `int (*rate_to_clk)(unsigned long, u32 *)` + + The rate_to_clk callback is to convert the rate of the clock + used as the reference clock for the PHY module to the value + that should be written in the hardware register. + +The exynos4210_phys configuration array is as follows:: + + static const struct samsung_usb2_common_phy exynos4210_phys[] = { + { + .label = "device", + .id = EXYNOS4210_DEVICE, + .power_on = exynos4210_power_on, + .power_off = exynos4210_power_off, + }, + { + .label = "host", + .id = EXYNOS4210_HOST, + .power_on = exynos4210_power_on, + .power_off = exynos4210_power_off, + }, + { + .label = "hsic0", + .id = EXYNOS4210_HSIC0, + .power_on = exynos4210_power_on, + .power_off = exynos4210_power_off, + }, + { + .label = "hsic1", + .id = EXYNOS4210_HSIC1, + .power_on = exynos4210_power_on, + .power_off = exynos4210_power_off, + }, + {}, + }; + +- `int (*power_on)(struct samsung_usb2_phy_instance *);` + `int (*power_off)(struct samsung_usb2_phy_instance *);` + + These two callbacks are used to power on and power off the phy + by modifying appropriate registers. + +Final change to the driver is adding appropriate compatible value to the +phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were +added to the struct of_device_id samsung_usb2_phy_of_match[] array:: + + #ifdef CONFIG_PHY_EXYNOS4210_USB2 + { + .compatible = "samsung,exynos4210-usb2-phy", + .data = &exynos4210_usb2_phy_config, + }, + #endif + +To add further flexibility to the driver the Kconfig file enables to +include support for selected SoCs in the compiled driver. The Kconfig +entry for Exynos 4210 is following:: + + config PHY_EXYNOS4210_USB2 + bool "Support for Exynos 4210" + depends on PHY_SAMSUNG_USB2 + depends on CPU_EXYNOS4210 + help + Enable USB PHY support for Exynos 4210. This option requires that + Samsung USB 2.0 PHY driver is enabled and means that support for this + particular SoC is compiled in the driver. In case of Exynos 4210 four + phys are available - device, host, HSCI0 and HSCI1. + +The newly created file that supports the new SoC has to be also added to the +Makefile. In case of Exynos 4210 the added line is following:: + + obj-$(CONFIG_PHY_EXYNOS4210_USB2) += phy-exynos4210-usb2.o + +After completing these steps the support for the new SoC should be ready. diff --git a/Documentation/driver-api/pin-control.rst b/Documentation/driver-api/pin-control.rst new file mode 100644 index 0000000000..4639912dc9 --- /dev/null +++ b/Documentation/driver-api/pin-control.rst @@ -0,0 +1,1455 @@ +=============================== +PINCTRL (PIN CONTROL) subsystem +=============================== + +This document outlines the pin control subsystem in Linux + +This subsystem deals with: + +- Enumerating and naming controllable pins + +- Multiplexing of pins, pads, fingers (etc) see below for details + +- Configuration of pins, pads, fingers (etc), such as software-controlled + biasing and driving mode specific pins, such as pull-up, pull-down, open drain, + load capacitance etc. + +Top-level interface +=================== + +Definitions: + +- A PIN CONTROLLER is a piece of hardware, usually a set of registers, that + can control PINs. It may be able to multiplex, bias, set load capacitance, + set drive strength, etc. for individual pins or groups of pins. + +- PINS are equal to pads, fingers, balls or whatever packaging input or + output line you want to control and these are denoted by unsigned integers + in the range 0..maxpin. This numberspace is local to each PIN CONTROLLER, so + there may be several such number spaces in a system. This pin space may + be sparse - i.e. there may be gaps in the space with numbers where no + pin exists. + +When a PIN CONTROLLER is instantiated, it will register a descriptor to the +pin control framework, and this descriptor contains an array of pin descriptors +describing the pins handled by this specific pin controller. + +Here is an example of a PGA (Pin Grid Array) chip seen from underneath:: + + A B C D E F G H + + 8 o o o o o o o o + + 7 o o o o o o o o + + 6 o o o o o o o o + + 5 o o o o o o o o + + 4 o o o o o o o o + + 3 o o o o o o o o + + 2 o o o o o o o o + + 1 o o o o o o o o + +To register a pin controller and name all the pins on this package we can do +this in our driver: + +.. code-block:: c + + #include <linux/pinctrl/pinctrl.h> + + const struct pinctrl_pin_desc foo_pins[] = { + PINCTRL_PIN(0, "A8"), + PINCTRL_PIN(1, "B8"), + PINCTRL_PIN(2, "C8"), + ... + PINCTRL_PIN(61, "F1"), + PINCTRL_PIN(62, "G1"), + PINCTRL_PIN(63, "H1"), + }; + + static struct pinctrl_desc foo_desc = { + .name = "foo", + .pins = foo_pins, + .npins = ARRAY_SIZE(foo_pins), + .owner = THIS_MODULE, + }; + + int __init foo_init(void) + { + int error; + + struct pinctrl_dev *pctl; + + error = pinctrl_register_and_init(&foo_desc, <PARENT>, NULL, &pctl); + if (error) + return error; + + return pinctrl_enable(pctl); + } + +To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and +selected drivers, you need to select them from your machine's Kconfig entry, +since these are so tightly integrated with the machines they are used on. +See ``arch/arm/mach-ux500/Kconfig`` for an example. + +Pins usually have fancier names than this. You can find these in the datasheet +for your chip. Notice that the core pinctrl.h file provides a fancy macro +called ``PINCTRL_PIN()`` to create the struct entries. As you can see the pins are +enumerated from 0 in the upper left corner to 63 in the lower right corner. +This enumeration was arbitrarily chosen, in practice you need to think +through your numbering system so that it matches the layout of registers +and such things in your driver, or the code may become complicated. You must +also consider matching of offsets to the GPIO ranges that may be handled by +the pin controller. + +For a padding with 467 pads, as opposed to actual pins, the enumeration will +be like this, walking around the edge of the chip, which seems to be industry +standard too (all these pads had names, too):: + + + 0 ..... 104 + 466 105 + . . + . . + 358 224 + 357 .... 225 + + +Pin groups +========== + +Many controllers need to deal with groups of pins, so the pin controller +subsystem has a mechanism for enumerating groups of pins and retrieving the +actual enumerated pins that are part of a certain group. + +For example, say that we have a group of pins dealing with an SPI interface +on { 0, 8, 16, 24 }, and a group of pins dealing with an I2C interface on pins +on { 24, 25 }. + +These two groups are presented to the pin control subsystem by implementing +some generic ``pinctrl_ops`` like this: + +.. code-block:: c + + #include <linux/pinctrl/pinctrl.h> + + static const unsigned int spi0_pins[] = { 0, 8, 16, 24 }; + static const unsigned int i2c0_pins[] = { 24, 25 }; + + static const struct pingroup foo_groups[] = { + PINCTRL_PINGROUP("spi0_grp", spi0_pins, ARRAY_SIZE(spi0_pins)), + PINCTRL_PINGROUP("i2c0_grp", i2c0_pins, ARRAY_SIZE(i2c0_pins)), + }; + + static int foo_get_groups_count(struct pinctrl_dev *pctldev) + { + return ARRAY_SIZE(foo_groups); + } + + static const char *foo_get_group_name(struct pinctrl_dev *pctldev, + unsigned int selector) + { + return foo_groups[selector].name; + } + + static int foo_get_group_pins(struct pinctrl_dev *pctldev, + unsigned int selector, + const unsigned int **pins, + unsigned int *npins) + { + *pins = foo_groups[selector].pins; + *npins = foo_groups[selector].npins; + return 0; + } + + static struct pinctrl_ops foo_pctrl_ops = { + .get_groups_count = foo_get_groups_count, + .get_group_name = foo_get_group_name, + .get_group_pins = foo_get_group_pins, + }; + + static struct pinctrl_desc foo_desc = { + ... + .pctlops = &foo_pctrl_ops, + }; + +The pin control subsystem will call the ``.get_groups_count()`` function to +determine the total number of legal selectors, then it will call the other functions +to retrieve the name and pins of the group. Maintaining the data structure of +the groups is up to the driver, this is just a simple example - in practice you +may need more entries in your group structure, for example specific register +ranges associated with each group and so on. + + +Pin configuration +================= + +Pins can sometimes be software-configured in various ways, mostly related +to their electronic properties when used as inputs or outputs. For example you +may be able to make an output pin high impedance (Hi-Z), or "tristate" meaning it is +effectively disconnected. You may be able to connect an input pin to VDD or GND +using a certain resistor value - pull up and pull down - so that the pin has a +stable value when nothing is driving the rail it is connected to, or when it's +unconnected. + +Pin configuration can be programmed by adding configuration entries into the +mapping table; see section `Board/machine configuration`_ below. + +The format and meaning of the configuration parameter, PLATFORM_X_PULL_UP +above, is entirely defined by the pin controller driver. + +The pin configuration driver implements callbacks for changing pin +configuration in the pin controller ops like this: + +.. code-block:: c + + #include <linux/pinctrl/pinconf.h> + #include <linux/pinctrl/pinctrl.h> + + #include "platform_x_pindefs.h" + + static int foo_pin_config_get(struct pinctrl_dev *pctldev, + unsigned int offset, + unsigned long *config) + { + struct my_conftype conf; + + /* ... Find setting for pin @ offset ... */ + + *config = (unsigned long) conf; + } + + static int foo_pin_config_set(struct pinctrl_dev *pctldev, + unsigned int offset, + unsigned long config) + { + struct my_conftype *conf = (struct my_conftype *) config; + + switch (conf) { + case PLATFORM_X_PULL_UP: + ... + break; + } + } + + static int foo_pin_config_group_get(struct pinctrl_dev *pctldev, + unsigned selector, + unsigned long *config) + { + ... + } + + static int foo_pin_config_group_set(struct pinctrl_dev *pctldev, + unsigned selector, + unsigned long config) + { + ... + } + + static struct pinconf_ops foo_pconf_ops = { + .pin_config_get = foo_pin_config_get, + .pin_config_set = foo_pin_config_set, + .pin_config_group_get = foo_pin_config_group_get, + .pin_config_group_set = foo_pin_config_group_set, + }; + + /* Pin config operations are handled by some pin controller */ + static struct pinctrl_desc foo_desc = { + ... + .confops = &foo_pconf_ops, + }; + +Interaction with the GPIO subsystem +=================================== + +The GPIO drivers may want to perform operations of various types on the same +physical pins that are also registered as pin controller pins. + +First and foremost, the two subsystems can be used as completely orthogonal, +see the section named `Pin control requests from drivers`_ and +`Drivers needing both pin control and GPIOs`_ below for details. But in some +situations a cross-subsystem mapping between pins and GPIOs is needed. + +Since the pin controller subsystem has its pinspace local to the pin controller +we need a mapping so that the pin control subsystem can figure out which pin +controller handles control of a certain GPIO pin. Since a single pin controller +may be muxing several GPIO ranges (typically SoCs that have one set of pins, +but internally several GPIO silicon blocks, each modelled as a struct +gpio_chip) any number of GPIO ranges can be added to a pin controller instance +like this: + +.. code-block:: c + + #include <linux/gpio/driver.h> + + #include <linux/pinctrl/pinctrl.h> + + struct gpio_chip chip_a; + struct gpio_chip chip_b; + + static struct pinctrl_gpio_range gpio_range_a = { + .name = "chip a", + .id = 0, + .base = 32, + .pin_base = 32, + .npins = 16, + .gc = &chip_a, + }; + + static struct pinctrl_gpio_range gpio_range_b = { + .name = "chip b", + .id = 0, + .base = 48, + .pin_base = 64, + .npins = 8, + .gc = &chip_b; + }; + + int __init foo_init(void) + { + struct pinctrl_dev *pctl; + ... + pinctrl_add_gpio_range(pctl, &gpio_range_a); + pinctrl_add_gpio_range(pctl, &gpio_range_b); + ... + } + +So this complex system has one pin controller handling two different +GPIO chips. "chip a" has 16 pins and "chip b" has 8 pins. The "chip a" and +"chip b" have different ``pin_base``, which means a start pin number of the +GPIO range. + +The GPIO range of "chip a" starts from the GPIO base of 32 and actual +pin range also starts from 32. However "chip b" has different starting +offset for the GPIO range and pin range. The GPIO range of "chip b" starts +from GPIO number 48, while the pin range of "chip b" starts from 64. + +We can convert a gpio number to actual pin number using this ``pin_base``. +They are mapped in the global GPIO pin space at: + +chip a: + - GPIO range : [32 .. 47] + - pin range : [32 .. 47] +chip b: + - GPIO range : [48 .. 55] + - pin range : [64 .. 71] + +The above examples assume the mapping between the GPIOs and pins is +linear. If the mapping is sparse or haphazard, an array of arbitrary pin +numbers can be encoded in the range like this: + +.. code-block:: c + + static const unsigned int range_pins[] = { 14, 1, 22, 17, 10, 8, 6, 2 }; + + static struct pinctrl_gpio_range gpio_range = { + .name = "chip", + .id = 0, + .base = 32, + .pins = &range_pins, + .npins = ARRAY_SIZE(range_pins), + .gc = &chip, + }; + +In this case the ``pin_base`` property will be ignored. If the name of a pin +group is known, the pins and npins elements of the above structure can be +initialised using the function ``pinctrl_get_group_pins()``, e.g. for pin +group "foo": + +.. code-block:: c + + pinctrl_get_group_pins(pctl, "foo", &gpio_range.pins, &gpio_range.npins); + +When GPIO-specific functions in the pin control subsystem are called, these +ranges will be used to look up the appropriate pin controller by inspecting +and matching the pin to the pin ranges across all controllers. When a +pin controller handling the matching range is found, GPIO-specific functions +will be called on that specific pin controller. + +For all functionalities dealing with pin biasing, pin muxing etc, the pin +controller subsystem will look up the corresponding pin number from the passed +in gpio number, and use the range's internals to retrieve a pin number. After +that, the subsystem passes it on to the pin control driver, so the driver +will get a pin number into its handled number range. Further it is also passed +the range ID value, so that the pin controller knows which range it should +deal with. + +Calling ``pinctrl_add_gpio_range()`` from pinctrl driver is DEPRECATED. Please see +section 2.1 of ``Documentation/devicetree/bindings/gpio/gpio.txt`` on how to bind +pinctrl and gpio drivers. + + +PINMUX interfaces +================= + +These calls use the pinmux_* naming prefix. No other calls should use that +prefix. + + +What is pinmuxing? +================== + +PINMUX, also known as padmux, ballmux, alternate functions or mission modes +is a way for chip vendors producing some kind of electrical packages to use +a certain physical pin (ball, pad, finger, etc) for multiple mutually exclusive +functions, depending on the application. By "application" in this context +we usually mean a way of soldering or wiring the package into an electronic +system, even though the framework makes it possible to also change the function +at runtime. + +Here is an example of a PGA (Pin Grid Array) chip seen from underneath:: + + A B C D E F G H + +---+ + 8 | o | o o o o o o o + | | + 7 | o | o o o o o o o + | | + 6 | o | o o o o o o o + +---+---+ + 5 | o | o | o o o o o o + +---+---+ +---+ + 4 o o o o o o | o | o + | | + 3 o o o o o o | o | o + | | + 2 o o o o o o | o | o + +-------+-------+-------+---+---+ + 1 | o o | o o | o o | o | o | + +-------+-------+-------+---+---+ + +This is not tetris. The game to think of is chess. Not all PGA/BGA packages +are chessboard-like, big ones have "holes" in some arrangement according to +different design patterns, but we're using this as a simple example. Of the +pins you see some will be taken by things like a few VCC and GND to feed power +to the chip, and quite a few will be taken by large ports like an external +memory interface. The remaining pins will often be subject to pin multiplexing. + +The example 8x8 PGA package above will have pin numbers 0 through 63 assigned +to its physical pins. It will name the pins { A1, A2, A3 ... H6, H7, H8 } using +pinctrl_register_pins() and a suitable data set as shown earlier. + +In this 8x8 BGA package the pins { A8, A7, A6, A5 } can be used as an SPI port +(these are four pins: CLK, RXD, TXD, FRM). In that case, pin B5 can be used as +some general-purpose GPIO pin. However, in another setting, pins { A5, B5 } can +be used as an I2C port (these are just two pins: SCL, SDA). Needless to say, +we cannot use the SPI port and I2C port at the same time. However in the inside +of the package the silicon performing the SPI logic can alternatively be routed +out on pins { G4, G3, G2, G1 }. + +On the bottom row at { A1, B1, C1, D1, E1, F1, G1, H1 } we have something +special - it's an external MMC bus that can be 2, 4 or 8 bits wide, and it will +consume 2, 4 or 8 pins respectively, so either { A1, B1 } are taken or +{ A1, B1, C1, D1 } or all of them. If we use all 8 bits, we cannot use the SPI +port on pins { G4, G3, G2, G1 } of course. + +This way the silicon blocks present inside the chip can be multiplexed "muxed" +out on different pin ranges. Often contemporary SoC (systems on chip) will +contain several I2C, SPI, SDIO/MMC, etc silicon blocks that can be routed to +different pins by pinmux settings. + +Since general-purpose I/O pins (GPIO) are typically always in shortage, it is +common to be able to use almost any pin as a GPIO pin if it is not currently +in use by some other I/O port. + + +Pinmux conventions +================== + +The purpose of the pinmux functionality in the pin controller subsystem is to +abstract and provide pinmux settings to the devices you choose to instantiate +in your machine configuration. It is inspired by the clk, GPIO and regulator +subsystems, so devices will request their mux setting, but it's also possible +to request a single pin for e.g. GPIO. + +The conventions are: + +- FUNCTIONS can be switched in and out by a driver residing with the pin + control subsystem in the ``drivers/pinctrl`` directory of the kernel. The + pin control driver knows the possible functions. In the example above you can + identify three pinmux functions, one for spi, one for i2c and one for mmc. + +- FUNCTIONS are assumed to be enumerable from zero in a one-dimensional array. + In this case the array could be something like: { spi0, i2c0, mmc0 } + for the three available functions. + +- FUNCTIONS have PIN GROUPS as defined on the generic level - so a certain + function is *always* associated with a certain set of pin groups, could + be just a single one, but could also be many. In the example above the + function i2c is associated with the pins { A5, B5 }, enumerated as + { 24, 25 } in the controller pin space. + + The Function spi is associated with pin groups { A8, A7, A6, A5 } + and { G4, G3, G2, G1 }, which are enumerated as { 0, 8, 16, 24 } and + { 38, 46, 54, 62 } respectively. + + Group names must be unique per pin controller, no two groups on the same + controller may have the same name. + +- The combination of a FUNCTION and a PIN GROUP determine a certain function + for a certain set of pins. The knowledge of the functions and pin groups + and their machine-specific particulars are kept inside the pinmux driver, + from the outside only the enumerators are known, and the driver core can + request: + + - The name of a function with a certain selector (>= 0) + - A list of groups associated with a certain function + - That a certain group in that list to be activated for a certain function + + As already described above, pin groups are in turn self-descriptive, so + the core will retrieve the actual pin range in a certain group from the + driver. + +- FUNCTIONS and GROUPS on a certain PIN CONTROLLER are MAPPED to a certain + device by the board file, device tree or similar machine setup configuration + mechanism, similar to how regulators are connected to devices, usually by + name. Defining a pin controller, function and group thus uniquely identify + the set of pins to be used by a certain device. (If only one possible group + of pins is available for the function, no group name need to be supplied - + the core will simply select the first and only group available.) + + In the example case we can define that this particular machine shall + use device spi0 with pinmux function fspi0 group gspi0 and i2c0 on function + fi2c0 group gi2c0, on the primary pin controller, we get mappings + like these: + + .. code-block:: c + + { + {"map-spi0", spi0, pinctrl0, fspi0, gspi0}, + {"map-i2c0", i2c0, pinctrl0, fi2c0, gi2c0}, + } + + Every map must be assigned a state name, pin controller, device and + function. The group is not compulsory - if it is omitted the first group + presented by the driver as applicable for the function will be selected, + which is useful for simple cases. + + It is possible to map several groups to the same combination of device, + pin controller and function. This is for cases where a certain function on + a certain pin controller may use different sets of pins in different + configurations. + +- PINS for a certain FUNCTION using a certain PIN GROUP on a certain + PIN CONTROLLER are provided on a first-come first-serve basis, so if some + other device mux setting or GPIO pin request has already taken your physical + pin, you will be denied the use of it. To get (activate) a new setting, the + old one has to be put (deactivated) first. + +Sometimes the documentation and hardware registers will be oriented around +pads (or "fingers") rather than pins - these are the soldering surfaces on the +silicon inside the package, and may or may not match the actual number of +pins/balls underneath the capsule. Pick some enumeration that makes sense to +you. Define enumerators only for the pins you can control if that makes sense. + +Assumptions: + +We assume that the number of possible function maps to pin groups is limited by +the hardware. I.e. we assume that there is no system where any function can be +mapped to any pin, like in a phone exchange. So the available pin groups for +a certain function will be limited to a few choices (say up to eight or so), +not hundreds or any amount of choices. This is the characteristic we have found +by inspecting available pinmux hardware, and a necessary assumption since we +expect pinmux drivers to present *all* possible function vs pin group mappings +to the subsystem. + + +Pinmux drivers +============== + +The pinmux core takes care of preventing conflicts on pins and calling +the pin controller driver to execute different settings. + +It is the responsibility of the pinmux driver to impose further restrictions +(say for example infer electronic limitations due to load, etc.) to determine +whether or not the requested function can actually be allowed, and in case it +is possible to perform the requested mux setting, poke the hardware so that +this happens. + +Pinmux drivers are required to supply a few callback functions, some are +optional. Usually the ``.set_mux()`` function is implemented, writing values into +some certain registers to activate a certain mux setting for a certain pin. + +A simple driver for the above example will work by setting bits 0, 1, 2, 3, 4, or 5 +into some register named MUX to select a certain function with a certain +group of pins would work something like this: + +.. code-block:: c + + #include <linux/pinctrl/pinctrl.h> + #include <linux/pinctrl/pinmux.h> + + static const unsigned int spi0_0_pins[] = { 0, 8, 16, 24 }; + static const unsigned int spi0_1_pins[] = { 38, 46, 54, 62 }; + static const unsigned int i2c0_pins[] = { 24, 25 }; + static const unsigned int mmc0_1_pins[] = { 56, 57 }; + static const unsigned int mmc0_2_pins[] = { 58, 59 }; + static const unsigned int mmc0_3_pins[] = { 60, 61, 62, 63 }; + + static const struct pingroup foo_groups[] = { + PINCTRL_PINGROUP("spi0_0_grp", spi0_0_pins, ARRAY_SIZE(spi0_0_pins)), + PINCTRL_PINGROUP("spi0_1_grp", spi0_1_pins, ARRAY_SIZE(spi0_1_pins)), + PINCTRL_PINGROUP("i2c0_grp", i2c0_pins, ARRAY_SIZE(i2c0_pins)), + PINCTRL_PINGROUP("mmc0_1_grp", mmc0_1_pins, ARRAY_SIZE(mmc0_1_pins)), + PINCTRL_PINGROUP("mmc0_2_grp", mmc0_2_pins, ARRAY_SIZE(mmc0_2_pins)), + PINCTRL_PINGROUP("mmc0_3_grp", mmc0_3_pins, ARRAY_SIZE(mmc0_3_pins)), + }; + + static int foo_get_groups_count(struct pinctrl_dev *pctldev) + { + return ARRAY_SIZE(foo_groups); + } + + static const char *foo_get_group_name(struct pinctrl_dev *pctldev, + unsigned int selector) + { + return foo_groups[selector].name; + } + + static int foo_get_group_pins(struct pinctrl_dev *pctldev, unsigned int selector, + const unsigned int **pins, + unsigned int *npins) + { + *pins = foo_groups[selector].pins; + *npins = foo_groups[selector].npins; + return 0; + } + + static struct pinctrl_ops foo_pctrl_ops = { + .get_groups_count = foo_get_groups_count, + .get_group_name = foo_get_group_name, + .get_group_pins = foo_get_group_pins, + }; + + static const char * const spi0_groups[] = { "spi0_0_grp", "spi0_1_grp" }; + static const char * const i2c0_groups[] = { "i2c0_grp" }; + static const char * const mmc0_groups[] = { "mmc0_1_grp", "mmc0_2_grp", "mmc0_3_grp" }; + + static const struct pinfunction foo_functions[] = { + PINCTRL_PINFUNCTION("spi0", spi0_groups, ARRAY_SIZE(spi0_groups)), + PINCTRL_PINFUNCTION("i2c0", i2c0_groups, ARRAY_SIZE(i2c0_groups)), + PINCTRL_PINFUNCTION("mmc0", mmc0_groups, ARRAY_SIZE(mmc0_groups)), + }; + + static int foo_get_functions_count(struct pinctrl_dev *pctldev) + { + return ARRAY_SIZE(foo_functions); + } + + static const char *foo_get_fname(struct pinctrl_dev *pctldev, unsigned int selector) + { + return foo_functions[selector].name; + } + + static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned int selector, + const char * const **groups, + unsigned int * const ngroups) + { + *groups = foo_functions[selector].groups; + *ngroups = foo_functions[selector].ngroups; + return 0; + } + + static int foo_set_mux(struct pinctrl_dev *pctldev, unsigned int selector, + unsigned int group) + { + u8 regbit = BIT(group); + + writeb((readb(MUX) | regbit), MUX); + return 0; + } + + static struct pinmux_ops foo_pmxops = { + .get_functions_count = foo_get_functions_count, + .get_function_name = foo_get_fname, + .get_function_groups = foo_get_groups, + .set_mux = foo_set_mux, + .strict = true, + }; + + /* Pinmux operations are handled by some pin controller */ + static struct pinctrl_desc foo_desc = { + ... + .pctlops = &foo_pctrl_ops, + .pmxops = &foo_pmxops, + }; + +In the example activating muxing 0 and 2 at the same time setting bits +0 and 2, uses pin 24 in common so they would collide. All the same for +the muxes 1 and 5, which have pin 62 in common. + +The beauty of the pinmux subsystem is that since it keeps track of all +pins and who is using them, it will already have denied an impossible +request like that, so the driver does not need to worry about such +things - when it gets a selector passed in, the pinmux subsystem makes +sure no other device or GPIO assignment is already using the selected +pins. Thus bits 0 and 2, or 1 and 5 in the control register will never +be set at the same time. + +All the above functions are mandatory to implement for a pinmux driver. + + +Pin control interaction with the GPIO subsystem +=============================================== + +Note that the following implies that the use case is to use a certain pin +from the Linux kernel using the API in ``<linux/gpio/consumer.h>`` with gpiod_get() +and similar functions. There are cases where you may be using something +that your datasheet calls "GPIO mode", but actually is just an electrical +configuration for a certain device. See the section below named +`GPIO mode pitfalls`_ for more details on this scenario. + +The public pinmux API contains two functions named ``pinctrl_gpio_request()`` +and ``pinctrl_gpio_free()``. These two functions shall *ONLY* be called from +gpiolib-based drivers as part of their ``.request()`` and ``.free()`` semantics. +Likewise the ``pinctrl_gpio_direction_input()`` / ``pinctrl_gpio_direction_output()`` +shall only be called from within respective ``.direction_input()`` / +``.direction_output()`` gpiolib implementation. + +NOTE that platforms and individual drivers shall *NOT* request GPIO pins to be +controlled e.g. muxed in. Instead, implement a proper gpiolib driver and have +that driver request proper muxing and other control for its pins. + +The function list could become long, especially if you can convert every +individual pin into a GPIO pin independent of any other pins, and then try +the approach to define every pin as a function. + +In this case, the function array would become 64 entries for each GPIO +setting and then the device functions. + +For this reason there are two functions a pin control driver can implement +to enable only GPIO on an individual pin: ``.gpio_request_enable()`` and +``.gpio_disable_free()``. + +This function will pass in the affected GPIO range identified by the pin +controller core, so you know which GPIO pins are being affected by the request +operation. + +If your driver needs to have an indication from the framework of whether the +GPIO pin shall be used for input or output you can implement the +``.gpio_set_direction()`` function. As described this shall be called from the +gpiolib driver and the affected GPIO range, pin offset and desired direction +will be passed along to this function. + +Alternatively to using these special functions, it is fully allowed to use +named functions for each GPIO pin, the ``pinctrl_gpio_request()`` will attempt to +obtain the function "gpioN" where "N" is the global GPIO pin number if no +special GPIO-handler is registered. + + +GPIO mode pitfalls +================== + +Due to the naming conventions used by hardware engineers, where "GPIO" +is taken to mean different things than what the kernel does, the developer +may be confused by a datasheet talking about a pin being possible to set +into "GPIO mode". It appears that what hardware engineers mean with +"GPIO mode" is not necessarily the use case that is implied in the kernel +interface ``<linux/gpio/consumer.h>``: a pin that you grab from kernel code and then +either listen for input or drive high/low to assert/deassert some +external line. + +Rather hardware engineers think that "GPIO mode" means that you can +software-control a few electrical properties of the pin that you would +not be able to control if the pin was in some other mode, such as muxed in +for a device. + +The GPIO portions of a pin and its relation to a certain pin controller +configuration and muxing logic can be constructed in several ways. Here +are two examples. + +Example **(A)**:: + + pin config + logic regs + | +- SPI + Physical pins --- pad --- pinmux -+- I2C + | +- mmc + | +- GPIO + pin + multiplex + logic regs + +Here some electrical properties of the pin can be configured no matter +whether the pin is used for GPIO or not. If you multiplex a GPIO onto a +pin, you can also drive it high/low from "GPIO" registers. +Alternatively, the pin can be controlled by a certain peripheral, while +still applying desired pin config properties. GPIO functionality is thus +orthogonal to any other device using the pin. + +In this arrangement the registers for the GPIO portions of the pin controller, +or the registers for the GPIO hardware module are likely to reside in a +separate memory range only intended for GPIO driving, and the register +range dealing with pin config and pin multiplexing get placed into a +different memory range and a separate section of the data sheet. + +A flag "strict" in struct pinmux_ops is available to check and deny +simultaneous access to the same pin from GPIO and pin multiplexing +consumers on hardware of this type. The pinctrl driver should set this flag +accordingly. + +Example **(B)**:: + + pin config + logic regs + | +- SPI + Physical pins --- pad --- pinmux -+- I2C + | | +- mmc + | | + GPIO pin + multiplex + logic regs + +In this arrangement, the GPIO functionality can always be enabled, such that +e.g. a GPIO input can be used to "spy" on the SPI/I2C/MMC signal while it is +pulsed out. It is likely possible to disrupt the traffic on the pin by doing +wrong things on the GPIO block, as it is never really disconnected. It is +possible that the GPIO, pin config and pin multiplex registers are placed into +the same memory range and the same section of the data sheet, although that +need not be the case. + +In some pin controllers, although the physical pins are designed in the same +way as (B), the GPIO function still can't be enabled at the same time as the +peripheral functions. So again the "strict" flag should be set, denying +simultaneous activation by GPIO and other muxed in devices. + +From a kernel point of view, however, these are different aspects of the +hardware and shall be put into different subsystems: + +- Registers (or fields within registers) that control electrical + properties of the pin such as biasing and drive strength should be + exposed through the pinctrl subsystem, as "pin configuration" settings. + +- Registers (or fields within registers) that control muxing of signals + from various other HW blocks (e.g. I2C, MMC, or GPIO) onto pins should + be exposed through the pinctrl subsystem, as mux functions. + +- Registers (or fields within registers) that control GPIO functionality + such as setting a GPIO's output value, reading a GPIO's input value, or + setting GPIO pin direction should be exposed through the GPIO subsystem, + and if they also support interrupt capabilities, through the irqchip + abstraction. + +Depending on the exact HW register design, some functions exposed by the +GPIO subsystem may call into the pinctrl subsystem in order to +coordinate register settings across HW modules. In particular, this may +be needed for HW with separate GPIO and pin controller HW modules, where +e.g. GPIO direction is determined by a register in the pin controller HW +module rather than the GPIO HW module. + +Electrical properties of the pin such as biasing and drive strength +may be placed at some pin-specific register in all cases or as part +of the GPIO register in case (B) especially. This doesn't mean that such +properties necessarily pertain to what the Linux kernel calls "GPIO". + +Example: a pin is usually muxed in to be used as a UART TX line. But during +system sleep, we need to put this pin into "GPIO mode" and ground it. + +If you make a 1-to-1 map to the GPIO subsystem for this pin, you may start +to think that you need to come up with something really complex, that the +pin shall be used for UART TX and GPIO at the same time, that you will grab +a pin control handle and set it to a certain state to enable UART TX to be +muxed in, then twist it over to GPIO mode and use gpiod_direction_output() +to drive it low during sleep, then mux it over to UART TX again when you +wake up and maybe even gpiod_get() / gpiod_put() as part of this cycle. This +all gets very complicated. + +The solution is to not think that what the datasheet calls "GPIO mode" +has to be handled by the ``<linux/gpio/consumer.h>`` interface. Instead view this as +a certain pin config setting. Look in e.g. ``<linux/pinctrl/pinconf-generic.h>`` +and you find this in the documentation: + + PIN_CONFIG_OUTPUT: + this will configure the pin in output, use argument + 1 to indicate high level, argument 0 to indicate low level. + +So it is perfectly possible to push a pin into "GPIO mode" and drive the +line low as part of the usual pin control map. So for example your UART +driver may look like this: + +.. code-block:: c + + #include <linux/pinctrl/consumer.h> + + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default; + struct pinctrl_state *pins_sleep; + + pins_default = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_DEFAULT); + pins_sleep = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_SLEEP); + + /* Normal mode */ + retval = pinctrl_select_state(pinctrl, pins_default); + + /* Sleep mode */ + retval = pinctrl_select_state(pinctrl, pins_sleep); + +And your machine configuration may look like this: + +.. code-block:: c + + static unsigned long uart_default_mode[] = { + PIN_CONF_PACKED(PIN_CONFIG_DRIVE_PUSH_PULL, 0), + }; + + static unsigned long uart_sleep_mode[] = { + PIN_CONF_PACKED(PIN_CONFIG_OUTPUT, 0), + }; + + static struct pinctrl_map pinmap[] __initdata = { + PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo", + "u0_group", "u0"), + PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo", + "UART_TX_PIN", uart_default_mode), + PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo", + "u0_group", "gpio-mode"), + PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo", + "UART_TX_PIN", uart_sleep_mode), + }; + + foo_init(void) + { + pinctrl_register_mappings(pinmap, ARRAY_SIZE(pinmap)); + } + +Here the pins we want to control are in the "u0_group" and there is some +function called "u0" that can be enabled on this group of pins, and then +everything is UART business as usual. But there is also some function +named "gpio-mode" that can be mapped onto the same pins to move them into +GPIO mode. + +This will give the desired effect without any bogus interaction with the +GPIO subsystem. It is just an electrical configuration used by that device +when going to sleep, it might imply that the pin is set into something the +datasheet calls "GPIO mode", but that is not the point: it is still used +by that UART device to control the pins that pertain to that very UART +driver, putting them into modes needed by the UART. GPIO in the Linux +kernel sense are just some 1-bit line, and is a different use case. + +How the registers are poked to attain the push or pull, and output low +configuration and the muxing of the "u0" or "gpio-mode" group onto these +pins is a question for the driver. + +Some datasheets will be more helpful and refer to the "GPIO mode" as +"low power mode" rather than anything to do with GPIO. This often means +the same thing electrically speaking, but in this latter case the +software engineers will usually quickly identify that this is some +specific muxing or configuration rather than anything related to the GPIO +API. + + +Board/machine configuration +=========================== + +Boards and machines define how a certain complete running system is put +together, including how GPIOs and devices are muxed, how regulators are +constrained and how the clock tree looks. Of course pinmux settings are also +part of this. + +A pin controller configuration for a machine looks pretty much like a simple +regulator configuration, so for the example array above we want to enable i2c +and spi on the second function mapping: + +.. code-block:: c + + #include <linux/pinctrl/machine.h> + + static const struct pinctrl_map mapping[] __initconst = { + { + .dev_name = "foo-spi.0", + .name = PINCTRL_STATE_DEFAULT, + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .data.mux.function = "spi0", + }, + { + .dev_name = "foo-i2c.0", + .name = PINCTRL_STATE_DEFAULT, + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .data.mux.function = "i2c0", + }, + { + .dev_name = "foo-mmc.0", + .name = PINCTRL_STATE_DEFAULT, + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .data.mux.function = "mmc0", + }, + }; + +The dev_name here matches to the unique device name that can be used to look +up the device struct (just like with clockdev or regulators). The function name +must match a function provided by the pinmux driver handling this pin range. + +As you can see we may have several pin controllers on the system and thus +we need to specify which one of them contains the functions we wish to map. + +You register this pinmux mapping to the pinmux subsystem by simply: + +.. code-block:: c + + ret = pinctrl_register_mappings(mapping, ARRAY_SIZE(mapping)); + +Since the above construct is pretty common there is a helper macro to make +it even more compact which assumes you want to use pinctrl-foo and position +0 for mapping, for example: + +.. code-block:: c + + static struct pinctrl_map mapping[] __initdata = { + PIN_MAP_MUX_GROUP("foo-i2c.o", PINCTRL_STATE_DEFAULT, + "pinctrl-foo", NULL, "i2c0"), + }; + +The mapping table may also contain pin configuration entries. It's common for +each pin/group to have a number of configuration entries that affect it, so +the table entries for configuration reference an array of config parameters +and values. An example using the convenience macros is shown below: + +.. code-block:: c + + static unsigned long i2c_grp_configs[] = { + FOO_PIN_DRIVEN, + FOO_PIN_PULLUP, + }; + + static unsigned long i2c_pin_configs[] = { + FOO_OPEN_COLLECTOR, + FOO_SLEW_RATE_SLOW, + }; + + static struct pinctrl_map mapping[] __initdata = { + PIN_MAP_MUX_GROUP("foo-i2c.0", PINCTRL_STATE_DEFAULT, + "pinctrl-foo", "i2c0", "i2c0"), + PIN_MAP_CONFIGS_GROUP("foo-i2c.0", PINCTRL_STATE_DEFAULT, + "pinctrl-foo", "i2c0", i2c_grp_configs), + PIN_MAP_CONFIGS_PIN("foo-i2c.0", PINCTRL_STATE_DEFAULT, + "pinctrl-foo", "i2c0scl", i2c_pin_configs), + PIN_MAP_CONFIGS_PIN("foo-i2c.0", PINCTRL_STATE_DEFAULT, + "pinctrl-foo", "i2c0sda", i2c_pin_configs), + }; + +Finally, some devices expect the mapping table to contain certain specific +named states. When running on hardware that doesn't need any pin controller +configuration, the mapping table must still contain those named states, in +order to explicitly indicate that the states were provided and intended to +be empty. Table entry macro ``PIN_MAP_DUMMY_STATE()`` serves the purpose of defining +a named state without causing any pin controller to be programmed: + +.. code-block:: c + + static struct pinctrl_map mapping[] __initdata = { + PIN_MAP_DUMMY_STATE("foo-i2c.0", PINCTRL_STATE_DEFAULT), + }; + + +Complex mappings +================ + +As it is possible to map a function to different groups of pins an optional +.group can be specified like this: + +.. code-block:: c + + ... + { + .dev_name = "foo-spi.0", + .name = "spi0-pos-A", + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "spi0", + .group = "spi0_0_grp", + }, + { + .dev_name = "foo-spi.0", + .name = "spi0-pos-B", + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "spi0", + .group = "spi0_1_grp", + }, + ... + +This example mapping is used to switch between two positions for spi0 at +runtime, as described further below under the heading `Runtime pinmuxing`_. + +Further it is possible for one named state to affect the muxing of several +groups of pins, say for example in the mmc0 example above, where you can +additively expand the mmc0 bus from 2 to 4 to 8 pins. If we want to use all +three groups for a total of 2 + 2 + 4 = 8 pins (for an 8-bit MMC bus as is the +case), we define a mapping like this: + +.. code-block:: c + + ... + { + .dev_name = "foo-mmc.0", + .name = "2bit" + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "mmc0", + .group = "mmc0_1_grp", + }, + { + .dev_name = "foo-mmc.0", + .name = "4bit" + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "mmc0", + .group = "mmc0_1_grp", + }, + { + .dev_name = "foo-mmc.0", + .name = "4bit" + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "mmc0", + .group = "mmc0_2_grp", + }, + { + .dev_name = "foo-mmc.0", + .name = "8bit" + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "mmc0", + .group = "mmc0_1_grp", + }, + { + .dev_name = "foo-mmc.0", + .name = "8bit" + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "mmc0", + .group = "mmc0_2_grp", + }, + { + .dev_name = "foo-mmc.0", + .name = "8bit" + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "mmc0", + .group = "mmc0_3_grp", + }, + ... + +The result of grabbing this mapping from the device with something like +this (see next paragraph): + +.. code-block:: c + + p = devm_pinctrl_get(dev); + s = pinctrl_lookup_state(p, "8bit"); + ret = pinctrl_select_state(p, s); + +or more simply: + +.. code-block:: c + + p = devm_pinctrl_get_select(dev, "8bit"); + +Will be that you activate all the three bottom records in the mapping at +once. Since they share the same name, pin controller device, function and +device, and since we allow multiple groups to match to a single device, they +all get selected, and they all get enabled and disable simultaneously by the +pinmux core. + + +Pin control requests from drivers +================================= + +When a device driver is about to probe the device core will automatically +attempt to issue ``pinctrl_get_select_default()`` on these devices. +This way driver writers do not need to add any of the boilerplate code +of the type found below. However when doing fine-grained state selection +and not using the "default" state, you may have to do some device driver +handling of the pinctrl handles and states. + +So if you just want to put the pins for a certain device into the default +state and be done with it, there is nothing you need to do besides +providing the proper mapping table. The device core will take care of +the rest. + +Generally it is discouraged to let individual drivers get and enable pin +control. So if possible, handle the pin control in platform code or some other +place where you have access to all the affected struct device * pointers. In +some cases where a driver needs to e.g. switch between different mux mappings +at runtime this is not possible. + +A typical case is if a driver needs to switch bias of pins from normal +operation and going to sleep, moving from the ``PINCTRL_STATE_DEFAULT`` to +``PINCTRL_STATE_SLEEP`` at runtime, re-biasing or even re-muxing pins to save +current in sleep mode. + +A driver may request a certain control state to be activated, usually just the +default state like this: + +.. code-block:: c + + #include <linux/pinctrl/consumer.h> + + struct foo_state { + struct pinctrl *p; + struct pinctrl_state *s; + ... + }; + + foo_probe() + { + /* Allocate a state holder named "foo" etc */ + struct foo_state *foo = ...; + + foo->p = devm_pinctrl_get(&device); + if (IS_ERR(foo->p)) { + /* FIXME: clean up "foo" here */ + return PTR_ERR(foo->p); + } + + foo->s = pinctrl_lookup_state(foo->p, PINCTRL_STATE_DEFAULT); + if (IS_ERR(foo->s)) { + /* FIXME: clean up "foo" here */ + return PTR_ERR(foo->s); + } + + ret = pinctrl_select_state(foo->p, foo->s); + if (ret < 0) { + /* FIXME: clean up "foo" here */ + return ret; + } + } + +This get/lookup/select/put sequence can just as well be handled by bus drivers +if you don't want each and every driver to handle it and you know the +arrangement on your bus. + +The semantics of the pinctrl APIs are: + +- ``pinctrl_get()`` is called in process context to obtain a handle to all pinctrl + information for a given client device. It will allocate a struct from the + kernel memory to hold the pinmux state. All mapping table parsing or similar + slow operations take place within this API. + +- ``devm_pinctrl_get()`` is a variant of pinctrl_get() that causes ``pinctrl_put()`` + to be called automatically on the retrieved pointer when the associated + device is removed. It is recommended to use this function over plain + ``pinctrl_get()``. + +- ``pinctrl_lookup_state()`` is called in process context to obtain a handle to a + specific state for a client device. This operation may be slow, too. + +- ``pinctrl_select_state()`` programs pin controller hardware according to the + definition of the state as given by the mapping table. In theory, this is a + fast-path operation, since it only involved blasting some register settings + into hardware. However, note that some pin controllers may have their + registers on a slow/IRQ-based bus, so client devices should not assume they + can call ``pinctrl_select_state()`` from non-blocking contexts. + +- ``pinctrl_put()`` frees all information associated with a pinctrl handle. + +- ``devm_pinctrl_put()`` is a variant of ``pinctrl_put()`` that may be used to + explicitly destroy a pinctrl object returned by ``devm_pinctrl_get()``. + However, use of this function will be rare, due to the automatic cleanup + that will occur even without calling it. + + ``pinctrl_get()`` must be paired with a plain ``pinctrl_put()``. + ``pinctrl_get()`` may not be paired with ``devm_pinctrl_put()``. + ``devm_pinctrl_get()`` can optionally be paired with ``devm_pinctrl_put()``. + ``devm_pinctrl_get()`` may not be paired with plain ``pinctrl_put()``. + +Usually the pin control core handled the get/put pair and call out to the +device drivers bookkeeping operations, like checking available functions and +the associated pins, whereas ``pinctrl_select_state()`` pass on to the pin controller +driver which takes care of activating and/or deactivating the mux setting by +quickly poking some registers. + +The pins are allocated for your device when you issue the ``devm_pinctrl_get()`` +call, after this you should be able to see this in the debugfs listing of all +pins. + +NOTE: the pinctrl system will return ``-EPROBE_DEFER`` if it cannot find the +requested pinctrl handles, for example if the pinctrl driver has not yet +registered. Thus make sure that the error path in your driver gracefully +cleans up and is ready to retry the probing later in the startup process. + + +Drivers needing both pin control and GPIOs +========================================== + +Again, it is discouraged to let drivers lookup and select pin control states +themselves, but again sometimes this is unavoidable. + +So say that your driver is fetching its resources like this: + +.. code-block:: c + + #include <linux/pinctrl/consumer.h> + #include <linux/gpio/consumer.h> + + struct pinctrl *pinctrl; + struct gpio_desc *gpio; + + pinctrl = devm_pinctrl_get_select_default(&dev); + gpio = devm_gpiod_get(&dev, "foo"); + +Here we first request a certain pin state and then request GPIO "foo" to be +used. If you're using the subsystems orthogonally like this, you should +nominally always get your pinctrl handle and select the desired pinctrl +state BEFORE requesting the GPIO. This is a semantic convention to avoid +situations that can be electrically unpleasant, you will certainly want to +mux in and bias pins in a certain way before the GPIO subsystems starts to +deal with them. + +The above can be hidden: using the device core, the pinctrl core may be +setting up the config and muxing for the pins right before the device is +probing, nevertheless orthogonal to the GPIO subsystem. + +But there are also situations where it makes sense for the GPIO subsystem +to communicate directly with the pinctrl subsystem, using the latter as a +back-end. This is when the GPIO driver may call out to the functions +described in the section `Pin control interaction with the GPIO subsystem`_ +above. This only involves per-pin multiplexing, and will be completely +hidden behind the gpiod_*() function namespace. In this case, the driver +need not interact with the pin control subsystem at all. + +If a pin control driver and a GPIO driver is dealing with the same pins +and the use cases involve multiplexing, you MUST implement the pin controller +as a back-end for the GPIO driver like this, unless your hardware design +is such that the GPIO controller can override the pin controller's +multiplexing state through hardware without the need to interact with the +pin control system. + + +System pin control hogging +========================== + +Pin control map entries can be hogged by the core when the pin controller +is registered. This means that the core will attempt to call ``pinctrl_get()``, +``pinctrl_lookup_state()`` and ``pinctrl_select_state()`` on it immediately after +the pin control device has been registered. + +This occurs for mapping table entries where the client device name is equal +to the pin controller device name, and the state name is ``PINCTRL_STATE_DEFAULT``: + +.. code-block:: c + + { + .dev_name = "pinctrl-foo", + .name = PINCTRL_STATE_DEFAULT, + .type = PIN_MAP_TYPE_MUX_GROUP, + .ctrl_dev_name = "pinctrl-foo", + .function = "power_func", + }, + +Since it may be common to request the core to hog a few always-applicable +mux settings on the primary pin controller, there is a convenience macro for +this: + +.. code-block:: c + + PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-foo", NULL /* group */, + "power_func") + +This gives the exact same result as the above construction. + + +Runtime pinmuxing +================= + +It is possible to mux a certain function in and out at runtime, say to move +an SPI port from one set of pins to another set of pins. Say for example for +spi0 in the example above, we expose two different groups of pins for the same +function, but with different named in the mapping as described under +"Advanced mapping" above. So that for an SPI device, we have two states named +"pos-A" and "pos-B". + +This snippet first initializes a state object for both groups (in foo_probe()), +then muxes the function in the pins defined by group A, and finally muxes it in +on the pins defined by group B: + +.. code-block:: c + + #include <linux/pinctrl/consumer.h> + + struct pinctrl *p; + struct pinctrl_state *s1, *s2; + + foo_probe() + { + /* Setup */ + p = devm_pinctrl_get(&device); + if (IS_ERR(p)) + ... + + s1 = pinctrl_lookup_state(p, "pos-A"); + if (IS_ERR(s1)) + ... + + s2 = pinctrl_lookup_state(p, "pos-B"); + if (IS_ERR(s2)) + ... + } + + foo_switch() + { + /* Enable on position A */ + ret = pinctrl_select_state(p, s1); + if (ret < 0) + ... + + ... + + /* Enable on position B */ + ret = pinctrl_select_state(p, s2); + if (ret < 0) + ... + + ... + } + +The above has to be done from process context. The reservation of the pins +will be done when the state is activated, so in effect one specific pin +can be used by different functions at different times on a running system. + + +Debugfs files +============= + +These files are created in ``/sys/kernel/debug/pinctrl``: + +- ``pinctrl-devices``: prints each pin controller device along with columns to + indicate support for pinmux and pinconf + +- ``pinctrl-handles``: prints each configured pin controller handle and the + corresponding pinmux maps + +- ``pinctrl-maps``: prints all pinctrl maps + +A sub-directory is created inside of ``/sys/kernel/debug/pinctrl`` for each pin +controller device containing these files: + +- ``pins``: prints a line for each pin registered on the pin controller. The + pinctrl driver may add additional information such as register contents. + +- ``gpio-ranges``: prints ranges that map gpio lines to pins on the controller + +- ``pingroups``: prints all pin groups registered on the pin controller + +- ``pinconf-pins``: prints pin config settings for each pin + +- ``pinconf-groups``: prints pin config settings per pin group + +- ``pinmux-functions``: prints each pin function along with the pin groups that + map to the pin function + +- ``pinmux-pins``: iterates through all pins and prints mux owner, gpio owner + and if the pin is a hog + +- ``pinmux-select``: write to this file to activate a pin function for a group: + + .. code-block:: sh + + echo "<group-name function-name>" > pinmux-select diff --git a/Documentation/driver-api/pldmfw/driver-ops.rst b/Documentation/driver-api/pldmfw/driver-ops.rst new file mode 100644 index 0000000000..f0654783d3 --- /dev/null +++ b/Documentation/driver-api/pldmfw/driver-ops.rst @@ -0,0 +1,56 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +========================= +Driver-specific callbacks +========================= + +The ``pldmfw`` module relies on the device driver for implementing device +specific behavior using the following operations. + +``.match_record`` +----------------- + +The ``.match_record`` operation is used to determine whether a given PLDM +record matches the device being updated. This requires comparing the record +descriptors in the record with information from the device. Many record +descriptors are defined by the PLDM standard, but it is also allowed for +devices to implement their own descriptors. + +The ``.match_record`` operation should return true if a given record matches +the device. + +``.send_package_data`` +---------------------- + +The ``.send_package_data`` operation is used to send the device-specific +package data in a record to the device firmware. If the matching record +provides package data, ``pldmfw`` will call the ``.send_package_data`` +function with a pointer to the package data and with the package data +length. The device driver should send this data to firmware. + +``.send_component_table`` +------------------------- + +The ``.send_component_table`` operation is used to forward component +information to the device. It is called once for each applicable component, +that is, for each component indicated by the matching record. The +device driver should send the component information to the device firmware, +and wait for a response. The provided transfer flag indicates whether this +is the first, last, or a middle component, and is expected to be forwarded +to firmware as part of the component table information. The driver should an +error in the case when the firmware indicates that the component cannot be +updated, or return zero if the component can be updated. + +``.flash_component`` +-------------------- + +The ``.flash_component`` operation is used to inform the device driver to +flash a given component. The driver must perform any steps necessary to send +the component data to the device. + +``.finalize_update`` +-------------------- + +The ``.finalize_update`` operation is used by the ``pldmfw`` library in +order to allow the device driver to perform any remaining device specific +logic needed to finish the update. diff --git a/Documentation/driver-api/pldmfw/file-format.rst b/Documentation/driver-api/pldmfw/file-format.rst new file mode 100644 index 0000000000..b7a9cebe09 --- /dev/null +++ b/Documentation/driver-api/pldmfw/file-format.rst @@ -0,0 +1,203 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +================================== +PLDM Firmware file format overview +================================== + +A PLDM firmware package is a binary file which contains a header that +describes the contents of the firmware package. This includes an initial +package header, one or more firmware records, and one or more components +describing the actual flash contents to program. + +This diagram provides an overview of the file format:: + + overall file layout + +----------------------+ + | | + | Package Header | + | | + +----------------------+ + | | + | Device Records | + | | + +----------------------+ + | | + | Component Info | + | | + +----------------------+ + | | + | Package Header CRC | + | | + +----------------------+ + | | + | Component Image 1 | + | | + +----------------------+ + | | + | Component Image 2 | + | | + +----------------------+ + | | + | ... | + | | + +----------------------+ + | | + | Component Image N | + | | + +----------------------+ + +Package Header +============== + +The package header begins with the UUID of the PLDM file format, and +contains information about the version of the format that the file uses. It +also includes the total header size, a release date, the size of the +component bitmap, and an overall package version. + +The following diagram provides an overview of the package header:: + + header layout + +-------------------------+ + | PLDM UUID | + +-------------------------+ + | Format Revision | + +-------------------------+ + | Header Size | + +-------------------------+ + | Release Date | + +-------------------------+ + | Component Bitmap Length | + +-------------------------+ + | Package Version Info | + +-------------------------+ + +Device Records +============== + +The device firmware records area starts with a count indicating the total +number of records in the file, followed by each record. A single device +record describes what device matches this record. All valid PLDM firmware +files must contain at least one record, but optionally may contain more than +one record if they support multiple devices. + +Each record will identify the device it supports via TLVs that describe the +device, such as the PCI device and vendor information. It will also indicate +which set of components that are used by this device. It is possible that +only subset of provided components will be used by a given record. A record +may also optionally contain device-specific package data that will be used +by the device firmware during the update process. + +The following diagram provides an overview of the device record area:: + + area layout + +---------------+ + | | + | Record Count | + | | + +---------------+ + | | + | Record 1 | + | | + +---------------+ + | | + | Record 2 | + | | + +---------------+ + | | + | ... | + | | + +---------------+ + | | + | Record N | + | | + +---------------+ + + record layout + +-----------------------+ + | Record Length | + +-----------------------+ + | Descriptor Count | + +-----------------------+ + | Option Flags | + +-----------------------+ + | Version Settings | + +-----------------------+ + | Package Data Length | + +-----------------------+ + | Applicable Components | + +-----------------------+ + | Version String | + +-----------------------+ + | Descriptor TLVs | + +-----------------------+ + | Package Data | + +-----------------------+ + +Component Info +============== + +The component information area begins with a count of the number of +components. Following this count is a description for each component. The +component information points to the location in the file where the component +data is stored, and includes version data used to identify the version of +the component. + +The following diagram provides an overview of the component area:: + + area layout + +-----------------+ + | | + | Component Count | + | | + +-----------------+ + | | + | Component 1 | + | | + +-----------------+ + | | + | Component 2 | + | | + +-----------------+ + | | + | ... | + | | + +-----------------+ + | | + | Component N | + | | + +-----------------+ + + component layout + +------------------------+ + | Classification | + +------------------------+ + | Component Identifier | + +------------------------+ + | Comparison Stamp | + +------------------------+ + | Component Options | + +------------------------+ + | Activation Method | + +------------------------+ + | Location Offset | + +------------------------+ + | Component Size | + +------------------------+ + | Component Version Info | + +------------------------+ + | Package Data | + +------------------------+ + + +Package Header CRC +================== + +Following the component information is a short 4-byte CRC calculated over +the contents of all of the header information. + +Component Images +================ + +The component images follow the package header information in the PLDM +firmware file. Each of these is simply a binary chunk with its start and +size defined by the matching component structure in the component info area. diff --git a/Documentation/driver-api/pldmfw/index.rst b/Documentation/driver-api/pldmfw/index.rst new file mode 100644 index 0000000000..fd871b83f3 --- /dev/null +++ b/Documentation/driver-api/pldmfw/index.rst @@ -0,0 +1,72 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +================================== +PLDM Firmware Flash Update Library +================================== + +``pldmfw`` implements functionality for updating the flash on a device using +the PLDM for Firmware Update standard +<https://www.dmtf.org/documents/pmci/pldm-firmware-update-specification-100>. + +.. toctree:: + :maxdepth: 1 + + file-format + driver-ops + +================================== +Overview of the ``pldmfw`` library +================================== + +The ``pldmfw`` library is intended to be used by device drivers for +implementing device flash update based on firmware files following the PLDM +firmware file format. + +It is implemented using an ops table that allows device drivers to provide +the underlying device specific functionality. + +``pldmfw`` implements logic to parse the packed binary format of the PLDM +firmware file into data structures, and then uses the provided function +operations to determine if the firmware file is a match for the device. If +so, it sends the record and component data to the firmware using the device +specific implementations provided by device drivers. Once the device +firmware indicates that the update may be performed, the firmware data is +sent to the device for programming. + +Parsing the PLDM file +===================== + +The PLDM file format uses packed binary data, with most multi-byte fields +stored in the Little Endian format. Several pieces of data are variable +length, including version strings and the number of records and components. +Due to this, it is not straight forward to index the record, record +descriptors, or components. + +To avoid proliferating access to the packed binary data, the ``pldmfw`` +library parses and extracts this data into simpler structures for ease of +access. + +In order to safely process the firmware file, care is taken to avoid +unaligned access of multi-byte fields, and to properly convert from Little +Endian to CPU host format. Additionally the records, descriptors, and +components are stored in linked lists. + +Performing a flash update +========================= + +To perform a flash update, the ``pldmfw`` module performs the following +steps + +1. Parse the firmware file for record and component information +2. Scan through the records and determine if the device matches any record + in the file. The first matched record will be used. +3. If the matching record provides package data, send this package data to + the device. +4. For each component that the record indicates, send the component data to + the device. For each component, the firmware may respond with an + indication of whether the update is suitable or not. If any component is + not suitable, the update is canceled. +5. For each component, send the binary data to the device firmware for + updating. +6. After all components are programmed, perform any final device-specific + actions to finalize the update. diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst new file mode 100644 index 0000000000..d477208604 --- /dev/null +++ b/Documentation/driver-api/pm/cpuidle.rst @@ -0,0 +1,279 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +======================== +CPU Idle Time Management +======================== + +:Copyright: |copy| 2019 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + + +CPU Idle Time Management Subsystem +================================== + +Every time one of the logical CPUs in the system (the entities that appear to +fetch and execute instructions: hardware threads, if present, or processor +cores) is idle after an interrupt or equivalent wakeup event, which means that +there are no tasks to run on it except for the special "idle" task associated +with it, there is an opportunity to save energy for the processor that it +belongs to. That can be done by making the idle logical CPU stop fetching +instructions from memory and putting some of the processor's functional units +depended on by it into an idle state in which they will draw less power. + +However, there may be multiple different idle states that can be used in such a +situation in principle, so it may be necessary to find the most suitable one +(from the kernel perspective) and ask the processor to use (or "enter") that +particular idle state. That is the role of the CPU idle time management +subsystem in the kernel, called ``CPUIdle``. + +The design of ``CPUIdle`` is modular and based on the code duplication avoidance +principle, so the generic code that in principle need not depend on the hardware +or platform design details in it is separate from the code that interacts with +the hardware. It generally is divided into three categories of functional +units: *governors* responsible for selecting idle states to ask the processor +to enter, *drivers* that pass the governors' decisions on to the hardware and +the *core* providing a common framework for them. + + +CPU Idle Time Governors +======================= + +A CPU idle time (``CPUIdle``) governor is a bundle of policy code invoked when +one of the logical CPUs in the system turns out to be idle. Its role is to +select an idle state to ask the processor to enter in order to save some energy. + +``CPUIdle`` governors are generic and each of them can be used on any hardware +platform that the Linux kernel can run on. For this reason, data structures +operated on by them cannot depend on any hardware architecture or platform +design details as well. + +The governor itself is represented by a struct cpuidle_governor object +containing four callback pointers, :c:member:`enable`, :c:member:`disable`, +:c:member:`select`, :c:member:`reflect`, a :c:member:`rating` field described +below, and a name (string) used for identifying it. + +For the governor to be available at all, that object needs to be registered +with the ``CPUIdle`` core by calling :c:func:`cpuidle_register_governor()` with +a pointer to it passed as the argument. If successful, that causes the core to +add the governor to the global list of available governors and, if it is the +only one in the list (that is, the list was empty before) or the value of its +:c:member:`rating` field is greater than the value of that field for the +governor currently in use, or the name of the new governor was passed to the +kernel as the value of the ``cpuidle.governor=`` command line parameter, the new +governor will be used from that point on (there can be only one ``CPUIdle`` +governor in use at a time). Also, user space can choose the ``CPUIdle`` +governor to use at run time via ``sysfs``. + +Once registered, ``CPUIdle`` governors cannot be unregistered, so it is not +practical to put them into loadable kernel modules. + +The interface between ``CPUIdle`` governors and the core consists of four +callbacks: + +:c:member:`enable` + :: + + int (*enable) (struct cpuidle_driver *drv, struct cpuidle_device *dev); + + The role of this callback is to prepare the governor for handling the + (logical) CPU represented by the struct cpuidle_device object pointed + to by the ``dev`` argument. The struct cpuidle_driver object pointed + to by the ``drv`` argument represents the ``CPUIdle`` driver to be used + with that CPU (among other things, it should contain the list of + struct cpuidle_state objects representing idle states that the + processor holding the given CPU can be asked to enter). + + It may fail, in which case it is expected to return a negative error + code, and that causes the kernel to run the architecture-specific + default code for idle CPUs on the CPU in question instead of ``CPUIdle`` + until the ``->enable()`` governor callback is invoked for that CPU + again. + +:c:member:`disable` + :: + + void (*disable) (struct cpuidle_driver *drv, struct cpuidle_device *dev); + + Called to make the governor stop handling the (logical) CPU represented + by the struct cpuidle_device object pointed to by the ``dev`` + argument. + + It is expected to reverse any changes made by the ``->enable()`` + callback when it was last invoked for the target CPU, free all memory + allocated by that callback and so on. + +:c:member:`select` + :: + + int (*select) (struct cpuidle_driver *drv, struct cpuidle_device *dev, + bool *stop_tick); + + Called to select an idle state for the processor holding the (logical) + CPU represented by the struct cpuidle_device object pointed to by the + ``dev`` argument. + + The list of idle states to take into consideration is represented by the + :c:member:`states` array of struct cpuidle_state objects held by the + struct cpuidle_driver object pointed to by the ``drv`` argument (which + represents the ``CPUIdle`` driver to be used with the CPU at hand). The + value returned by this callback is interpreted as an index into that + array (unless it is a negative error code). + + The ``stop_tick`` argument is used to indicate whether or not to stop + the scheduler tick before asking the processor to enter the selected + idle state. When the ``bool`` variable pointed to by it (which is set + to ``true`` before invoking this callback) is cleared to ``false``, the + processor will be asked to enter the selected idle state without + stopping the scheduler tick on the given CPU (if the tick has been + stopped on that CPU already, however, it will not be restarted before + asking the processor to enter the idle state). + + This callback is mandatory (i.e. the :c:member:`select` callback pointer + in struct cpuidle_governor must not be ``NULL`` for the registration + of the governor to succeed). + +:c:member:`reflect` + :: + + void (*reflect) (struct cpuidle_device *dev, int index); + + Called to allow the governor to evaluate the accuracy of the idle state + selection made by the ``->select()`` callback (when it was invoked last + time) and possibly use the result of that to improve the accuracy of + idle state selections in the future. + +In addition, ``CPUIdle`` governors are required to take power management +quality of service (PM QoS) constraints on the processor wakeup latency into +account when selecting idle states. In order to obtain the current effective +PM QoS wakeup latency constraint for a given CPU, a ``CPUIdle`` governor is +expected to pass the number of the CPU to +:c:func:`cpuidle_governor_latency_req()`. Then, the governor's ``->select()`` +callback must not return the index of an indle state whose +:c:member:`exit_latency` value is greater than the number returned by that +function. + + +CPU Idle Time Management Drivers +================================ + +CPU idle time management (``CPUIdle``) drivers provide an interface between the +other parts of ``CPUIdle`` and the hardware. + +First of all, a ``CPUIdle`` driver has to populate the :c:member:`states` array +of struct cpuidle_state objects included in the struct cpuidle_driver object +representing it. Going forward this array will represent the list of available +idle states that the processor hardware can be asked to enter shared by all of +the logical CPUs handled by the given driver. + +The entries in the :c:member:`states` array are expected to be sorted by the +value of the :c:member:`target_residency` field in struct cpuidle_state in +the ascending order (that is, index 0 should correspond to the idle state with +the minimum value of :c:member:`target_residency`). [Since the +:c:member:`target_residency` value is expected to reflect the "depth" of the +idle state represented by the struct cpuidle_state object holding it, this +sorting order should be the same as the ascending sorting order by the idle +state "depth".] + +Three fields in struct cpuidle_state are used by the existing ``CPUIdle`` +governors for computations related to idle state selection: + +:c:member:`target_residency` + Minimum time to spend in this idle state including the time needed to + enter it (which may be substantial) to save more energy than could + be saved by staying in a shallower idle state for the same amount of + time, in microseconds. + +:c:member:`exit_latency` + Maximum time it will take a CPU asking the processor to enter this idle + state to start executing the first instruction after a wakeup from it, + in microseconds. + +:c:member:`flags` + Flags representing idle state properties. Currently, governors only use + the ``CPUIDLE_FLAG_POLLING`` flag which is set if the given object + does not represent a real idle state, but an interface to a software + "loop" that can be used in order to avoid asking the processor to enter + any idle state at all. [There are other flags used by the ``CPUIdle`` + core in special situations.] + +The :c:member:`enter` callback pointer in struct cpuidle_state, which must not +be ``NULL``, points to the routine to execute in order to ask the processor to +enter this particular idle state: + +:: + + void (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, + int index); + +The first two arguments of it point to the struct cpuidle_device object +representing the logical CPU running this callback and the +struct cpuidle_driver object representing the driver itself, respectively, +and the last one is an index of the struct cpuidle_state entry in the driver's +:c:member:`states` array representing the idle state to ask the processor to +enter. + +The analogous ``->enter_s2idle()`` callback in struct cpuidle_state is used +only for implementing the suspend-to-idle system-wide power management feature. +The difference between in and ``->enter()`` is that it must not re-enable +interrupts at any point (even temporarily) or attempt to change the states of +clock event devices, which the ``->enter()`` callback may do sometimes. + +Once the :c:member:`states` array has been populated, the number of valid +entries in it has to be stored in the :c:member:`state_count` field of the +struct cpuidle_driver object representing the driver. Moreover, if any +entries in the :c:member:`states` array represent "coupled" idle states (that +is, idle states that can only be asked for if multiple related logical CPUs are +idle), the :c:member:`safe_state_index` field in struct cpuidle_driver needs +to be the index of an idle state that is not "coupled" (that is, one that can be +asked for if only one logical CPU is idle). + +In addition to that, if the given ``CPUIdle`` driver is only going to handle a +subset of logical CPUs in the system, the :c:member:`cpumask` field in its +struct cpuidle_driver object must point to the set (mask) of CPUs that will be +handled by it. + +A ``CPUIdle`` driver can only be used after it has been registered. If there +are no "coupled" idle state entries in the driver's :c:member:`states` array, +that can be accomplished by passing the driver's struct cpuidle_driver object +to :c:func:`cpuidle_register_driver()`. Otherwise, :c:func:`cpuidle_register()` +should be used for this purpose. + +However, it also is necessary to register struct cpuidle_device objects for +all of the logical CPUs to be handled by the given ``CPUIdle`` driver with the +help of :c:func:`cpuidle_register_device()` after the driver has been registered +and :c:func:`cpuidle_register_driver()`, unlike :c:func:`cpuidle_register()`, +does not do that automatically. For this reason, the drivers that use +:c:func:`cpuidle_register_driver()` to register themselves must also take care +of registering the struct cpuidle_device objects as needed, so it is generally +recommended to use :c:func:`cpuidle_register()` for ``CPUIdle`` driver +registration in all cases. + +The registration of a struct cpuidle_device object causes the ``CPUIdle`` +``sysfs`` interface to be created and the governor's ``->enable()`` callback to +be invoked for the logical CPU represented by it, so it must take place after +registering the driver that will handle the CPU in question. + +``CPUIdle`` drivers and struct cpuidle_device objects can be unregistered +when they are not necessary any more which allows some resources associated with +them to be released. Due to dependencies between them, all of the +struct cpuidle_device objects representing CPUs handled by the given +``CPUIdle`` driver must be unregistered, with the help of +:c:func:`cpuidle_unregister_device()`, before calling +:c:func:`cpuidle_unregister_driver()` to unregister the driver. Alternatively, +:c:func:`cpuidle_unregister()` can be called to unregister a ``CPUIdle`` driver +along with all of the struct cpuidle_device objects representing CPUs handled +by it. + +``CPUIdle`` drivers can respond to runtime system configuration changes that +lead to modifications of the list of available processor idle states (which can +happen, for example, when the system's power source is switched from AC to +battery or the other way around). Upon a notification of such a change, +a ``CPUIdle`` driver is expected to call :c:func:`cpuidle_pause_and_lock()` to +turn ``CPUIdle`` off temporarily and then :c:func:`cpuidle_disable_device()` for +all of the struct cpuidle_device objects representing CPUs affected by that +change. Next, it can update its :c:member:`states` array in accordance with +the new configuration of the system, call :c:func:`cpuidle_enable_device()` for +all of the relevant struct cpuidle_device objects and invoke +:c:func:`cpuidle_resume_and_unlock()` to allow ``CPUIdle`` to be used again. diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst new file mode 100644 index 0000000000..d448cb57df --- /dev/null +++ b/Documentation/driver-api/pm/devices.rst @@ -0,0 +1,880 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +.. _driverapi_pm_devices: + +============================== +Device Power Management Basics +============================== + +:Copyright: |copy| 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. +:Copyright: |copy| 2010 Alan Stern <stern@rowland.harvard.edu> +:Copyright: |copy| 2016 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + + +Most of the code in Linux is device drivers, so most of the Linux power +management (PM) code is also driver-specific. Most drivers will do very +little; others, especially for platforms with small batteries (like cell +phones), will do a lot. + +This writeup gives an overview of how drivers interact with system-wide +power management goals, emphasizing the models and interfaces that are +shared by everything that hooks up to the driver model core. Read it as +background for the domain-specific work you'd do with any specific driver. + + +Two Models for Device Power Management +====================================== + +Drivers will use one or both of these models to put devices into low-power +states: + + System Sleep model: + + Drivers can enter low-power states as part of entering system-wide + low-power states like "suspend" (also known as "suspend-to-RAM"), or + (mostly for systems with disks) "hibernation" (also known as + "suspend-to-disk"). + + This is something that device, bus, and class drivers collaborate on + by implementing various role-specific suspend and resume methods to + cleanly power down hardware and software subsystems, then reactivate + them without loss of data. + + Some drivers can manage hardware wakeup events, which make the system + leave the low-power state. This feature may be enabled or disabled + using the relevant :file:`/sys/devices/.../power/wakeup` file (for + Ethernet drivers the ioctl interface used by ethtool may also be used + for this purpose); enabling it may cost some power usage, but let the + whole system enter low-power states more often. + + Runtime Power Management model: + + Devices may also be put into low-power states while the system is + running, independently of other power management activity in principle. + However, devices are not generally independent of each other (for + example, a parent device cannot be suspended unless all of its child + devices have been suspended). Moreover, depending on the bus type the + device is on, it may be necessary to carry out some bus-specific + operations on the device for this purpose. Devices put into low power + states at run time may require special handling during system-wide power + transitions (suspend or hibernation). + + For these reasons not only the device driver itself, but also the + appropriate subsystem (bus type, device type or device class) driver and + the PM core are involved in runtime power management. As in the system + sleep power management case, they need to collaborate by implementing + various role-specific suspend and resume methods, so that the hardware + is cleanly powered down and reactivated without data or service loss. + +There's not a lot to be said about those low-power states except that they are +very system-specific, and often device-specific. Also, that if enough devices +have been put into low-power states (at runtime), the effect may be very similar +to entering some system-wide low-power state (system sleep) ... and that +synergies exist, so that several drivers using runtime PM might put the system +into a state where even deeper power saving options are available. + +Most suspended devices will have quiesced all I/O: no more DMA or IRQs (except +for wakeup events), no more data read or written, and requests from upstream +drivers are no longer accepted. A given bus or platform may have different +requirements though. + +Examples of hardware wakeup events include an alarm from a real time clock, +network wake-on-LAN packets, keyboard or mouse activity, and media insertion +or removal (for PCMCIA, MMC/SD, USB, and so on). + +Interfaces for Entering System Sleep States +=========================================== + +There are programming interfaces provided for subsystems (bus type, device type, +device class) and device drivers to allow them to participate in the power +management of devices they are concerned with. These interfaces cover both +system sleep and runtime power management. + + +Device Power Management Operations +---------------------------------- + +Device power management operations, at the subsystem level as well as at the +device driver level, are implemented by defining and populating objects of type +struct dev_pm_ops defined in :file:`include/linux/pm.h`. The roles of the +methods included in it will be explained in what follows. For now, it should be +sufficient to remember that the last three methods are specific to runtime power +management while the remaining ones are used during system-wide power +transitions. + +There also is a deprecated "old" or "legacy" interface for power management +operations available at least for some subsystems. This approach does not use +struct dev_pm_ops objects and it is suitable only for implementing system +sleep power management methods in a limited way. Therefore it is not described +in this document, so please refer directly to the source code for more +information about it. + + +Subsystem-Level Methods +----------------------- + +The core methods to suspend and resume devices reside in +struct dev_pm_ops pointed to by the :c:member:`ops` member of +struct dev_pm_domain, or by the :c:member:`pm` member of struct bus_type, +struct device_type and struct class. They are mostly of interest to the +people writing infrastructure for platforms and buses, like PCI or USB, or +device type and device class drivers. They also are relevant to the writers of +device drivers whose subsystems (PM domains, device types, device classes and +bus types) don't provide all power management methods. + +Bus drivers implement these methods as appropriate for the hardware and the +drivers using it; PCI works differently from USB, and so on. Not many people +write subsystem-level drivers; most driver code is a "device driver" that builds +on top of bus-specific framework code. + +For more information on these driver calls, see the description later; +they are called in phases for every device, respecting the parent-child +sequencing in the driver model tree. + + +:file:`/sys/devices/.../power/wakeup` files +------------------------------------------- + +All device objects in the driver model contain fields that control the handling +of system wakeup events (hardware signals that can force the system out of a +sleep state). These fields are initialized by bus or device driver code using +:c:func:`device_set_wakeup_capable()` and :c:func:`device_set_wakeup_enable()`, +defined in :file:`include/linux/pm_wakeup.h`. + +The :c:member:`power.can_wakeup` flag just records whether the device (and its +driver) can physically support wakeup events. The +:c:func:`device_set_wakeup_capable()` routine affects this flag. The +:c:member:`power.wakeup` field is a pointer to an object of type +struct wakeup_source used for controlling whether or not the device should use +its system wakeup mechanism and for notifying the PM core of system wakeup +events signaled by the device. This object is only present for wakeup-capable +devices (i.e. devices whose :c:member:`can_wakeup` flags are set) and is created +(or removed) by :c:func:`device_set_wakeup_capable()`. + +Whether or not a device is capable of issuing wakeup events is a hardware +matter, and the kernel is responsible for keeping track of it. By contrast, +whether or not a wakeup-capable device should issue wakeup events is a policy +decision, and it is managed by user space through a sysfs attribute: the +:file:`power/wakeup` file. User space can write the "enabled" or "disabled" +strings to it to indicate whether or not, respectively, the device is supposed +to signal system wakeup. This file is only present if the +:c:member:`power.wakeup` object exists for the given device and is created (or +removed) along with that object, by :c:func:`device_set_wakeup_capable()`. +Reads from the file will return the corresponding string. + +The initial value in the :file:`power/wakeup` file is "disabled" for the +majority of devices; the major exceptions are power buttons, keyboards, and +Ethernet adapters whose WoL (wake-on-LAN) feature has been set up with ethtool. +It should also default to "enabled" for devices that don't generate wakeup +requests on their own but merely forward wakeup requests from one bus to another +(like PCI Express ports). + +The :c:func:`device_may_wakeup()` routine returns true only if the +:c:member:`power.wakeup` object exists and the corresponding :file:`power/wakeup` +file contains the "enabled" string. This information is used by subsystems, +like the PCI bus type code, to see whether or not to enable the devices' wakeup +mechanisms. If device wakeup mechanisms are enabled or disabled directly by +drivers, they also should use :c:func:`device_may_wakeup()` to decide what to do +during a system sleep transition. Device drivers, however, are not expected to +call :c:func:`device_set_wakeup_enable()` directly in any case. + +It ought to be noted that system wakeup is conceptually different from "remote +wakeup" used by runtime power management, although it may be supported by the +same physical mechanism. Remote wakeup is a feature allowing devices in +low-power states to trigger specific interrupts to signal conditions in which +they should be put into the full-power state. Those interrupts may or may not +be used to signal system wakeup events, depending on the hardware design. On +some systems it is impossible to trigger them from system sleep states. In any +case, remote wakeup should always be enabled for runtime power management for +all devices and drivers that support it. + + +:file:`/sys/devices/.../power/control` files +-------------------------------------------- + +Each device in the driver model has a flag to control whether it is subject to +runtime power management. This flag, :c:member:`runtime_auto`, is initialized +by the bus type (or generally subsystem) code using :c:func:`pm_runtime_allow()` +or :c:func:`pm_runtime_forbid()`; the default is to allow runtime power +management. + +The setting can be adjusted by user space by writing either "on" or "auto" to +the device's :file:`power/control` sysfs file. Writing "auto" calls +:c:func:`pm_runtime_allow()`, setting the flag and allowing the device to be +runtime power-managed by its driver. Writing "on" calls +:c:func:`pm_runtime_forbid()`, clearing the flag, returning the device to full +power if it was in a low-power state, and preventing the +device from being runtime power-managed. User space can check the current value +of the :c:member:`runtime_auto` flag by reading that file. + +The device's :c:member:`runtime_auto` flag has no effect on the handling of +system-wide power transitions. In particular, the device can (and in the +majority of cases should and will) be put into a low-power state during a +system-wide transition to a sleep state even though its :c:member:`runtime_auto` +flag is clear. + +For more information about the runtime power management framework, refer to +Documentation/power/runtime_pm.rst. + + +Calling Drivers to Enter and Leave System Sleep States +====================================================== + +When the system goes into a sleep state, each device's driver is asked to +suspend the device by putting it into a state compatible with the target +system state. That's usually some version of "off", but the details are +system-specific. Also, wakeup-enabled devices will usually stay partly +functional in order to wake the system. + +When the system leaves that low-power state, the device's driver is asked to +resume it by returning it to full power. The suspend and resume operations +always go together, and both are multi-phase operations. + +For simple drivers, suspend might quiesce the device using class code +and then turn its hardware as "off" as possible during suspend_noirq. The +matching resume calls would then completely reinitialize the hardware +before reactivating its class I/O queues. + +More power-aware drivers might prepare the devices for triggering system wakeup +events. + + +Call Sequence Guarantees +------------------------ + +To ensure that bridges and similar links needing to talk to a device are +available when the device is suspended or resumed, the device hierarchy is +walked in a bottom-up order to suspend devices. A top-down order is +used to resume those devices. + +The ordering of the device hierarchy is defined by the order in which devices +get registered: a child can never be registered, probed or resumed before +its parent; and can't be removed or suspended after that parent. + +The policy is that the device hierarchy should match hardware bus topology. +[Or at least the control bus, for devices which use multiple busses.] +In particular, this means that a device registration may fail if the parent of +the device is suspending (i.e. has been chosen by the PM core as the next +device to suspend) or has already suspended, as well as after all of the other +devices have been suspended. Device drivers must be prepared to cope with such +situations. + + +System Power Management Phases +------------------------------ + +Suspending or resuming the system is done in several phases. Different phases +are used for suspend-to-idle, shallow (standby), and deep ("suspend-to-RAM") +sleep states and the hibernation state ("suspend-to-disk"). Each phase involves +executing callbacks for every device before the next phase begins. Not all +buses or classes support all these callbacks and not all drivers use all the +callbacks. The various phases always run after tasks have been frozen and +before they are unfrozen. Furthermore, the ``*_noirq`` phases run at a time +when IRQ handlers have been disabled (except for those marked with the +IRQF_NO_SUSPEND flag). + +All phases use PM domain, bus, type, class or driver callbacks (that is, methods +defined in ``dev->pm_domain->ops``, ``dev->bus->pm``, ``dev->type->pm``, +``dev->class->pm`` or ``dev->driver->pm``). These callbacks are regarded by the +PM core as mutually exclusive. Moreover, PM domain callbacks always take +precedence over all of the other callbacks and, for example, type callbacks take +precedence over bus, class and driver callbacks. To be precise, the following +rules are used to determine which callback to execute in the given phase: + + 1. If ``dev->pm_domain`` is present, the PM core will choose the callback + provided by ``dev->pm_domain->ops`` for execution. + + 2. Otherwise, if both ``dev->type`` and ``dev->type->pm`` are present, the + callback provided by ``dev->type->pm`` will be chosen for execution. + + 3. Otherwise, if both ``dev->class`` and ``dev->class->pm`` are present, + the callback provided by ``dev->class->pm`` will be chosen for + execution. + + 4. Otherwise, if both ``dev->bus`` and ``dev->bus->pm`` are present, the + callback provided by ``dev->bus->pm`` will be chosen for execution. + +This allows PM domains and device types to override callbacks provided by bus +types or device classes if necessary. + +The PM domain, type, class and bus callbacks may in turn invoke device- or +driver-specific methods stored in ``dev->driver->pm``, but they don't have to do +that. + +If the subsystem callback chosen for execution is not present, the PM core will +execute the corresponding method from the ``dev->driver->pm`` set instead if +there is one. + + +Entering System Suspend +----------------------- + +When the system goes into the freeze, standby or memory sleep state, +the phases are: ``prepare``, ``suspend``, ``suspend_late``, ``suspend_noirq``. + + 1. The ``prepare`` phase is meant to prevent races by preventing new + devices from being registered; the PM core would never know that all the + children of a device had been suspended if new children could be + registered at will. [By contrast, from the PM core's perspective, + devices may be unregistered at any time.] Unlike the other + suspend-related phases, during the ``prepare`` phase the device + hierarchy is traversed top-down. + + After the ``->prepare`` callback method returns, no new children may be + registered below the device. The method may also prepare the device or + driver in some way for the upcoming system power transition, but it + should not put the device into a low-power state. Moreover, if the + device supports runtime power management, the ``->prepare`` callback + method must not update its state in case it is necessary to resume it + from runtime suspend later on. + + For devices supporting runtime power management, the return value of the + prepare callback can be used to indicate to the PM core that it may + safely leave the device in runtime suspend (if runtime-suspended + already), provided that all of the device's descendants are also left in + runtime suspend. Namely, if the prepare callback returns a positive + number and that happens for all of the descendants of the device too, + and all of them (including the device itself) are runtime-suspended, the + PM core will skip the ``suspend``, ``suspend_late`` and + ``suspend_noirq`` phases as well as all of the corresponding phases of + the subsequent device resume for all of these devices. In that case, + the ``->complete`` callback will be the next one invoked after the + ``->prepare`` callback and is entirely responsible for putting the + device into a consistent state as appropriate. + + Note that this direct-complete procedure applies even if the device is + disabled for runtime PM; only the runtime-PM status matters. It follows + that if a device has system-sleep callbacks but does not support runtime + PM, then its prepare callback must never return a positive value. This + is because all such devices are initially set to runtime-suspended with + runtime PM disabled. + + This feature also can be controlled by device drivers by using the + ``DPM_FLAG_NO_DIRECT_COMPLETE`` and ``DPM_FLAG_SMART_PREPARE`` driver + power management flags. [Typically, they are set at the time the driver + is probed against the device in question by passing them to the + :c:func:`dev_pm_set_driver_flags` helper function.] If the first of + these flags is set, the PM core will not apply the direct-complete + procedure described above to the given device and, consequenty, to any + of its ancestors. The second flag, when set, informs the middle layer + code (bus types, device types, PM domains, classes) that it should take + the return value of the ``->prepare`` callback provided by the driver + into account and it may only return a positive value from its own + ``->prepare`` callback if the driver's one also has returned a positive + value. + + 2. The ``->suspend`` methods should quiesce the device to stop it from + performing I/O. They also may save the device registers and put it into + the appropriate low-power state, depending on the bus type the device is + on, and they may enable wakeup events. + + However, for devices supporting runtime power management, the + ``->suspend`` methods provided by subsystems (bus types and PM domains + in particular) must follow an additional rule regarding what can be done + to the devices before their drivers' ``->suspend`` methods are called. + Namely, they may resume the devices from runtime suspend by + calling :c:func:`pm_runtime_resume` for them, if that is necessary, but + they must not update the state of the devices in any other way at that + time (in case the drivers need to resume the devices from runtime + suspend in their ``->suspend`` methods). In fact, the PM core prevents + subsystems or drivers from putting devices into runtime suspend at + these times by calling :c:func:`pm_runtime_get_noresume` before issuing + the ``->prepare`` callback (and calling :c:func:`pm_runtime_put` after + issuing the ``->complete`` callback). + + 3. For a number of devices it is convenient to split suspend into the + "quiesce device" and "save device state" phases, in which cases + ``suspend_late`` is meant to do the latter. It is always executed after + runtime power management has been disabled for the device in question. + + 4. The ``suspend_noirq`` phase occurs after IRQ handlers have been disabled, + which means that the driver's interrupt handler will not be called while + the callback method is running. The ``->suspend_noirq`` methods should + save the values of the device's registers that weren't saved previously + and finally put the device into the appropriate low-power state. + + The majority of subsystems and device drivers need not implement this + callback. However, bus types allowing devices to share interrupt + vectors, like PCI, generally need it; otherwise a driver might encounter + an error during the suspend phase by fielding a shared interrupt + generated by some other device after its own device had been set to low + power. + +At the end of these phases, drivers should have stopped all I/O transactions +(DMA, IRQs), saved enough state that they can re-initialize or restore previous +state (as needed by the hardware), and placed the device into a low-power state. +On many platforms they will gate off one or more clock sources; sometimes they +will also switch off power supplies or reduce voltages. [Drivers supporting +runtime PM may already have performed some or all of these steps.] + +If :c:func:`device_may_wakeup()` returns ``true``, the device should be +prepared for generating hardware wakeup signals to trigger a system wakeup event +when the system is in the sleep state. For example, :c:func:`enable_irq_wake()` +might identify GPIO signals hooked up to a switch or other external hardware, +and :c:func:`pci_enable_wake()` does something similar for the PCI PME signal. + +If any of these callbacks returns an error, the system won't enter the desired +low-power state. Instead, the PM core will unwind its actions by resuming all +the devices that were suspended. + + +Leaving System Suspend +---------------------- + +When resuming from freeze, standby or memory sleep, the phases are: +``resume_noirq``, ``resume_early``, ``resume``, ``complete``. + + 1. The ``->resume_noirq`` callback methods should perform any actions + needed before the driver's interrupt handlers are invoked. This + generally means undoing the actions of the ``suspend_noirq`` phase. If + the bus type permits devices to share interrupt vectors, like PCI, the + method should bring the device and its driver into a state in which the + driver can recognize if the device is the source of incoming interrupts, + if any, and handle them correctly. + + For example, the PCI bus type's ``->pm.resume_noirq()`` puts the device + into the full-power state (D0 in the PCI terminology) and restores the + standard configuration registers of the device. Then it calls the + device driver's ``->pm.resume_noirq()`` method to perform device-specific + actions. + + 2. The ``->resume_early`` methods should prepare devices for the execution + of the resume methods. This generally involves undoing the actions of + the preceding ``suspend_late`` phase. + + 3. The ``->resume`` methods should bring the device back to its operating + state, so that it can perform normal I/O. This generally involves + undoing the actions of the ``suspend`` phase. + + 4. The ``complete`` phase should undo the actions of the ``prepare`` phase. + For this reason, unlike the other resume-related phases, during the + ``complete`` phase the device hierarchy is traversed bottom-up. + + Note, however, that new children may be registered below the device as + soon as the ``->resume`` callbacks occur; it's not necessary to wait + until the ``complete`` phase runs. + + Moreover, if the preceding ``->prepare`` callback returned a positive + number, the device may have been left in runtime suspend throughout the + whole system suspend and resume (its ``->suspend``, ``->suspend_late``, + ``->suspend_noirq``, ``->resume_noirq``, + ``->resume_early``, and ``->resume`` callbacks may have been + skipped). In that case, the ``->complete`` callback is entirely + responsible for putting the device into a consistent state after system + suspend if necessary. [For example, it may need to queue up a runtime + resume request for the device for this purpose.] To check if that is + the case, the ``->complete`` callback can consult the device's + ``power.direct_complete`` flag. If that flag is set when the + ``->complete`` callback is being run then the direct-complete mechanism + was used, and special actions may be required to make the device work + correctly afterward. + +At the end of these phases, drivers should be as functional as they were before +suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are +gated on. + +However, the details here may again be platform-specific. For example, +some systems support multiple "run" states, and the mode in effect at +the end of resume might not be the one which preceded suspension. +That means availability of certain clocks or power supplies changed, +which could easily affect how a driver works. + +Drivers need to be able to handle hardware which has been reset since all of the +suspend methods were called, for example by complete reinitialization. +This may be the hardest part, and the one most protected by NDA'd documents +and chip errata. It's simplest if the hardware state hasn't changed since +the suspend was carried out, but that can only be guaranteed if the target +system sleep entered was suspend-to-idle. For the other system sleep states +that may not be the case (and usually isn't for ACPI-defined system sleep +states, like S3). + +Drivers must also be prepared to notice that the device has been removed +while the system was powered down, whenever that's physically possible. +PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses +where common Linux platforms will see such removal. Details of how drivers +will notice and handle such removals are currently bus-specific, and often +involve a separate thread. + +These callbacks may return an error value, but the PM core will ignore such +errors since there's nothing it can do about them other than printing them in +the system log. + + +Entering Hibernation +-------------------- + +Hibernating the system is more complicated than putting it into sleep states, +because it involves creating and saving a system image. Therefore there are +more phases for hibernation, with a different set of callbacks. These phases +always run after tasks have been frozen and enough memory has been freed. + +The general procedure for hibernation is to quiesce all devices ("freeze"), +create an image of the system memory while everything is stable, reactivate all +devices ("thaw"), write the image to permanent storage, and finally shut down +the system ("power off"). The phases used to accomplish this are: ``prepare``, +``freeze``, ``freeze_late``, ``freeze_noirq``, ``thaw_noirq``, ``thaw_early``, +``thaw``, ``complete``, ``prepare``, ``poweroff``, ``poweroff_late``, +``poweroff_noirq``. + + 1. The ``prepare`` phase is discussed in the "Entering System Suspend" + section above. + + 2. The ``->freeze`` methods should quiesce the device so that it doesn't + generate IRQs or DMA, and they may need to save the values of device + registers. However the device does not have to be put in a low-power + state, and to save time it's best not to do so. Also, the device should + not be prepared to generate wakeup events. + + 3. The ``freeze_late`` phase is analogous to the ``suspend_late`` phase + described earlier, except that the device should not be put into a + low-power state and should not be allowed to generate wakeup events. + + 4. The ``freeze_noirq`` phase is analogous to the ``suspend_noirq`` phase + discussed earlier, except again that the device should not be put into + a low-power state and should not be allowed to generate wakeup events. + +At this point the system image is created. All devices should be inactive and +the contents of memory should remain undisturbed while this happens, so that the +image forms an atomic snapshot of the system state. + + 5. The ``thaw_noirq`` phase is analogous to the ``resume_noirq`` phase + discussed earlier. The main difference is that its methods can assume + the device is in the same state as at the end of the ``freeze_noirq`` + phase. + + 6. The ``thaw_early`` phase is analogous to the ``resume_early`` phase + described above. Its methods should undo the actions of the preceding + ``freeze_late``, if necessary. + + 7. The ``thaw`` phase is analogous to the ``resume`` phase discussed + earlier. Its methods should bring the device back to an operating + state, so that it can be used for saving the image if necessary. + + 8. The ``complete`` phase is discussed in the "Leaving System Suspend" + section above. + +At this point the system image is saved, and the devices then need to be +prepared for the upcoming system shutdown. This is much like suspending them +before putting the system into the suspend-to-idle, shallow or deep sleep state, +and the phases are similar. + + 9. The ``prepare`` phase is discussed above. + + 10. The ``poweroff`` phase is analogous to the ``suspend`` phase. + + 11. The ``poweroff_late`` phase is analogous to the ``suspend_late`` phase. + + 12. The ``poweroff_noirq`` phase is analogous to the ``suspend_noirq`` phase. + +The ``->poweroff``, ``->poweroff_late`` and ``->poweroff_noirq`` callbacks +should do essentially the same things as the ``->suspend``, ``->suspend_late`` +and ``->suspend_noirq`` callbacks, respectively. A notable difference is +that they need not store the device register values, because the registers +should already have been stored during the ``freeze``, ``freeze_late`` or +``freeze_noirq`` phases. Also, on many machines the firmware will power-down +the entire system, so it is not necessary for the callback to put the device in +a low-power state. + + +Leaving Hibernation +------------------- + +Resuming from hibernation is, again, more complicated than resuming from a sleep +state in which the contents of main memory are preserved, because it requires +a system image to be loaded into memory and the pre-hibernation memory contents +to be restored before control can be passed back to the image kernel. + +Although in principle the image might be loaded into memory and the +pre-hibernation memory contents restored by the boot loader, in practice this +can't be done because boot loaders aren't smart enough and there is no +established protocol for passing the necessary information. So instead, the +boot loader loads a fresh instance of the kernel, called "the restore kernel", +into memory and passes control to it in the usual way. Then the restore kernel +reads the system image, restores the pre-hibernation memory contents, and passes +control to the image kernel. Thus two different kernel instances are involved +in resuming from hibernation. In fact, the restore kernel may be completely +different from the image kernel: a different configuration and even a different +version. This has important consequences for device drivers and their +subsystems. + +To be able to load the system image into memory, the restore kernel needs to +include at least a subset of device drivers allowing it to access the storage +medium containing the image, although it doesn't need to include all of the +drivers present in the image kernel. After the image has been loaded, the +devices managed by the boot kernel need to be prepared for passing control back +to the image kernel. This is very similar to the initial steps involved in +creating a system image, and it is accomplished in the same way, using +``prepare``, ``freeze``, and ``freeze_noirq`` phases. However, the devices +affected by these phases are only those having drivers in the restore kernel; +other devices will still be in whatever state the boot loader left them. + +Should the restoration of the pre-hibernation memory contents fail, the restore +kernel would go through the "thawing" procedure described above, using the +``thaw_noirq``, ``thaw_early``, ``thaw``, and ``complete`` phases, and then +continue running normally. This happens only rarely. Most often the +pre-hibernation memory contents are restored successfully and control is passed +to the image kernel, which then becomes responsible for bringing the system back +to the working state. + +To achieve this, the image kernel must restore the devices' pre-hibernation +functionality. The operation is much like waking up from a sleep state (with +the memory contents preserved), although it involves different phases: +``restore_noirq``, ``restore_early``, ``restore``, ``complete``. + + 1. The ``restore_noirq`` phase is analogous to the ``resume_noirq`` phase. + + 2. The ``restore_early`` phase is analogous to the ``resume_early`` phase. + + 3. The ``restore`` phase is analogous to the ``resume`` phase. + + 4. The ``complete`` phase is discussed above. + +The main difference from ``resume[_early|_noirq]`` is that +``restore[_early|_noirq]`` must assume the device has been accessed and +reconfigured by the boot loader or the restore kernel. Consequently, the state +of the device may be different from the state remembered from the ``freeze``, +``freeze_late`` and ``freeze_noirq`` phases. The device may even need to be +reset and completely re-initialized. In many cases this difference doesn't +matter, so the ``->resume[_early|_noirq]`` and ``->restore[_early|_norq]`` +method pointers can be set to the same routines. Nevertheless, different +callback pointers are used in case there is a situation where it actually does +matter. + + +Power Management Notifiers +========================== + +There are some operations that cannot be carried out by the power management +callbacks discussed above, because the callbacks occur too late or too early. +To handle these cases, subsystems and device drivers may register power +management notifiers that are called before tasks are frozen and after they have +been thawed. Generally speaking, the PM notifiers are suitable for performing +actions that either require user space to be available, or at least won't +interfere with user space. + +For details refer to Documentation/driver-api/pm/notifiers.rst. + + +Device Low-Power (suspend) States +================================= + +Device low-power states aren't standard. One device might only handle +"on" and "off", while another might support a dozen different versions of +"on" (how many engines are active?), plus a state that gets back to "on" +faster than from a full "off". + +Some buses define rules about what different suspend states mean. PCI +gives one example: after the suspend sequence completes, a non-legacy +PCI device may not perform DMA or issue IRQs, and any wakeup events it +issues would be issued through the PME# bus signal. Plus, there are +several PCI-standard device states, some of which are optional. + +In contrast, integrated system-on-chip processors often use IRQs as the +wakeup event sources (so drivers would call :c:func:`enable_irq_wake`) and +might be able to treat DMA completion as a wakeup event (sometimes DMA can stay +active too, it'd only be the CPU and some peripherals that sleep). + +Some details here may be platform-specific. Systems may have devices that +can be fully active in certain sleep states, such as an LCD display that's +refreshed using DMA while most of the system is sleeping lightly ... and +its frame buffer might even be updated by a DSP or other non-Linux CPU while +the Linux control processor stays idle. + +Moreover, the specific actions taken may depend on the target system state. +One target system state might allow a given device to be very operational; +another might require a hard shut down with re-initialization on resume. +And two different target systems might use the same device in different +ways; the aforementioned LCD might be active in one product's "standby", +but a different product using the same SOC might work differently. + + +Device Power Management Domains +=============================== + +Sometimes devices share reference clocks or other power resources. In those +cases it generally is not possible to put devices into low-power states +individually. Instead, a set of devices sharing a power resource can be put +into a low-power state together at the same time by turning off the shared +power resource. Of course, they also need to be put into the full-power state +together, by turning the shared power resource on. A set of devices with this +property is often referred to as a power domain. A power domain may also be +nested inside another power domain. The nested domain is referred to as the +sub-domain of the parent domain. + +Support for power domains is provided through the :c:member:`pm_domain` field of +struct device. This field is a pointer to an object of type +struct dev_pm_domain, defined in :file:`include/linux/pm.h`, providing a set +of power management callbacks analogous to the subsystem-level and device driver +callbacks that are executed for the given device during all power transitions, +instead of the respective subsystem-level callbacks. Specifically, if a +device's :c:member:`pm_domain` pointer is not NULL, the ``->suspend()`` callback +from the object pointed to by it will be executed instead of its subsystem's +(e.g. bus type's) ``->suspend()`` callback and analogously for all of the +remaining callbacks. In other words, power management domain callbacks, if +defined for the given device, always take precedence over the callbacks provided +by the device's subsystem (e.g. bus type). + +The support for device power management domains is only relevant to platforms +needing to use the same device driver power management callbacks in many +different power domain configurations and wanting to avoid incorporating the +support for power domains into subsystem-level callbacks, for example by +modifying the platform bus type. Other platforms need not implement it or take +it into account in any way. + +Devices may be defined as IRQ-safe which indicates to the PM core that their +runtime PM callbacks may be invoked with disabled interrupts (see +Documentation/power/runtime_pm.rst for more information). If an +IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be +disallowed, unless the domain itself is defined as IRQ-safe. However, it +makes sense to define a PM domain as IRQ-safe only if all the devices in it +are IRQ-safe. Moreover, if an IRQ-safe domain has a parent domain, the runtime +PM of the parent is only allowed if the parent itself is IRQ-safe too with the +additional restriction that all child domains of an IRQ-safe parent must also +be IRQ-safe. + + +Runtime Power Management +======================== + +Many devices are able to dynamically power down while the system is still +running. This feature is useful for devices that are not being used, and +can offer significant power savings on a running system. These devices +often support a range of runtime power states, which might use names such +as "off", "sleep", "idle", "active", and so on. Those states will in some +cases (like PCI) be partially constrained by the bus the device uses, and will +usually include hardware states that are also used in system sleep states. + +A system-wide power transition can be started while some devices are in low +power states due to runtime power management. The system sleep PM callbacks +should recognize such situations and react to them appropriately, but the +necessary actions are subsystem-specific. + +In some cases the decision may be made at the subsystem level while in other +cases the device driver may be left to decide. In some cases it may be +desirable to leave a suspended device in that state during a system-wide power +transition, but in other cases the device must be put back into the full-power +state temporarily, for example so that its system wakeup capability can be +disabled. This all depends on the hardware and the design of the subsystem and +device driver in question. + +If it is necessary to resume a device from runtime suspend during a system-wide +transition into a sleep state, that can be done by calling +:c:func:`pm_runtime_resume` from the ``->suspend`` callback (or the ``->freeze`` +or ``->poweroff`` callback for transitions related to hibernation) of either the +device's driver or its subsystem (for example, a bus type or a PM domain). +However, subsystems must not otherwise change the runtime status of devices +from their ``->prepare`` and ``->suspend`` callbacks (or equivalent) *before* +invoking device drivers' ``->suspend`` callbacks (or equivalent). + +.. _smart_suspend_flag: + +The ``DPM_FLAG_SMART_SUSPEND`` Driver Flag +------------------------------------------ + +Some bus types and PM domains have a policy to resume all devices from runtime +suspend upfront in their ``->suspend`` callbacks, but that may not be really +necessary if the device's driver can cope with runtime-suspended devices. +The driver can indicate this by setting ``DPM_FLAG_SMART_SUSPEND`` in +:c:member:`power.driver_flags` at probe time, with the assistance of the +:c:func:`dev_pm_set_driver_flags` helper routine. + +Setting that flag causes the PM core and middle-layer code +(bus types, PM domains etc.) to skip the ``->suspend_late`` and +``->suspend_noirq`` callbacks provided by the driver if the device remains in +runtime suspend throughout those phases of the system-wide suspend (and +similarly for the "freeze" and "poweroff" parts of system hibernation). +[Otherwise the same driver +callback might be executed twice in a row for the same device, which would not +be valid in general.] If the middle-layer system-wide PM callbacks are present +for the device then they are responsible for skipping these driver callbacks; +if not then the PM core skips them. The subsystem callback routines can +determine whether they need to skip the driver callbacks by testing the return +value from the :c:func:`dev_pm_skip_suspend` helper function. + +In addition, with ``DPM_FLAG_SMART_SUSPEND`` set, the driver's ``->thaw_noirq`` +and ``->thaw_early`` callbacks are skipped in hibernation if the device remained +in runtime suspend throughout the preceding "freeze" transition. Again, if the +middle-layer callbacks are present for the device, they are responsible for +doing this, otherwise the PM core takes care of it. + + +The ``DPM_FLAG_MAY_SKIP_RESUME`` Driver Flag +-------------------------------------------- + +During system-wide resume from a sleep state it's easiest to put devices into +the full-power state, as explained in Documentation/power/runtime_pm.rst. +[Refer to that document for more information regarding this particular issue as +well as for information on the device runtime power management framework in +general.] However, it often is desirable to leave devices in suspend after +system transitions to the working state, especially if those devices had been in +runtime suspend before the preceding system-wide suspend (or analogous) +transition. + +To that end, device drivers can use the ``DPM_FLAG_MAY_SKIP_RESUME`` flag to +indicate to the PM core and middle-layer code that they allow their "noirq" and +"early" resume callbacks to be skipped if the device can be left in suspend +after system-wide PM transitions to the working state. Whether or not that is +the case generally depends on the state of the device before the given system +suspend-resume cycle and on the type of the system transition under way. +In particular, the "thaw" and "restore" transitions related to hibernation are +not affected by ``DPM_FLAG_MAY_SKIP_RESUME`` at all. [All callbacks are +issued during the "restore" transition regardless of the flag settings, +and whether or not any driver callbacks +are skipped during the "thaw" transition depends whether or not the +``DPM_FLAG_SMART_SUSPEND`` flag is set (see `above <smart_suspend_flag_>`_). +In addition, a device is not allowed to remain in runtime suspend if any of its +children will be returned to full power.] + +The ``DPM_FLAG_MAY_SKIP_RESUME`` flag is taken into account in combination with +the :c:member:`power.may_skip_resume` status bit set by the PM core during the +"suspend" phase of suspend-type transitions. If the driver or the middle layer +has a reason to prevent the driver's "noirq" and "early" resume callbacks from +being skipped during the subsequent system resume transition, it should +clear :c:member:`power.may_skip_resume` in its ``->suspend``, ``->suspend_late`` +or ``->suspend_noirq`` callback. [Note that the drivers setting +``DPM_FLAG_SMART_SUSPEND`` need to clear :c:member:`power.may_skip_resume` in +their ``->suspend`` callback in case the other two are skipped.] + +Setting the :c:member:`power.may_skip_resume` status bit along with the +``DPM_FLAG_MAY_SKIP_RESUME`` flag is necessary, but generally not sufficient, +for the driver's "noirq" and "early" resume callbacks to be skipped. Whether or +not they should be skipped can be determined by evaluating the +:c:func:`dev_pm_skip_resume` helper function. + +If that function returns ``true``, the driver's "noirq" and "early" resume +callbacks should be skipped and the device's runtime PM status will be set to +"suspended" by the PM core. Otherwise, if the device was runtime-suspended +during the preceding system-wide suspend transition and its +``DPM_FLAG_SMART_SUSPEND`` is set, its runtime PM status will be set to +"active" by the PM core. [Hence, the drivers that do not set +``DPM_FLAG_SMART_SUSPEND`` should not expect the runtime PM status of their +devices to be changed from "suspended" to "active" by the PM core during +system-wide resume-type transitions.] + +If the ``DPM_FLAG_MAY_SKIP_RESUME`` flag is not set for a device, but +``DPM_FLAG_SMART_SUSPEND`` is set and the driver's "late" and "noirq" suspend +callbacks are skipped, its system-wide "noirq" and "early" resume callbacks, if +present, are invoked as usual and the device's runtime PM status is set to +"active" by the PM core before enabling runtime PM for it. In that case, the +driver must be prepared to cope with the invocation of its system-wide resume +callbacks back-to-back with its ``->runtime_suspend`` one (without the +intervening ``->runtime_resume`` and system-wide suspend callbacks) and the +final state of the device must reflect the "active" runtime PM status in that +case. [Note that this is not a problem at all if the driver's +``->suspend_late`` callback pointer points to the same function as its +``->runtime_suspend`` one and its ``->resume_early`` callback pointer points to +the same function as the ``->runtime_resume`` one, while none of the other +system-wide suspend-resume callbacks of the driver are present, for example.] + +Likewise, if ``DPM_FLAG_MAY_SKIP_RESUME`` is set for a device, its driver's +system-wide "noirq" and "early" resume callbacks may be skipped while its "late" +and "noirq" suspend callbacks may have been executed (in principle, regardless +of whether or not ``DPM_FLAG_SMART_SUSPEND`` is set). In that case, the driver +needs to be able to cope with the invocation of its ``->runtime_resume`` +callback back-to-back with its "late" and "noirq" suspend ones. [For instance, +that is not a concern if the driver sets both ``DPM_FLAG_SMART_SUSPEND`` and +``DPM_FLAG_MAY_SKIP_RESUME`` and uses the same pair of suspend/resume callback +functions for runtime PM and system-wide suspend/resume.] diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst new file mode 100644 index 0000000000..c2a9ef8d11 --- /dev/null +++ b/Documentation/driver-api/pm/index.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +CPU and Device Power Management +=============================== + +.. toctree:: + + cpuidle + devices + notifiers + types + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst new file mode 100644 index 0000000000..186435c43b --- /dev/null +++ b/Documentation/driver-api/pm/notifiers.rst @@ -0,0 +1,74 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +============================= +Suspend/Hibernation Notifiers +============================= + +:Copyright: |copy| 2016 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + + +There are some operations that subsystems or drivers may want to carry out +before hibernation/suspend or after restore/resume, but they require the system +to be fully functional, so the drivers' and subsystems' ``->suspend()`` and +``->resume()`` or even ``->prepare()`` and ``->complete()`` callbacks are not +suitable for this purpose. + +For example, device drivers may want to upload firmware to their devices after +resume/restore, but they cannot do it by calling :c:func:`request_firmware()` +from their ``->resume()`` or ``->complete()`` callback routines (user land +processes are frozen at these points). The solution may be to load the firmware +into memory before processes are frozen and upload it from there in the +``->resume()`` routine. A suspend/hibernation notifier may be used for that. + +Subsystems or drivers having such needs can register suspend notifiers that +will be called upon the following events by the PM core: + +``PM_HIBERNATION_PREPARE`` + The system is going to hibernate, tasks will be frozen immediately. This + is different from ``PM_SUSPEND_PREPARE`` below, because in this case + additional work is done between the notifiers and the invocation of PM + callbacks for the "freeze" transition. + +``PM_POST_HIBERNATION`` + The system memory state has been restored from a hibernation image or an + error occurred during hibernation. Device restore callbacks have been + executed and tasks have been thawed. + +``PM_RESTORE_PREPARE`` + The system is going to restore a hibernation image. If all goes well, + the restored image kernel will issue a ``PM_POST_HIBERNATION`` + notification. + +``PM_POST_RESTORE`` + An error occurred during restore from hibernation. Device restore + callbacks have been executed and tasks have been thawed. + +``PM_SUSPEND_PREPARE`` + The system is preparing for suspend. + +``PM_POST_SUSPEND`` + The system has just resumed or an error occurred during suspend. Device + resume callbacks have been executed and tasks have been thawed. + +It is generally assumed that whatever the notifiers do for +``PM_HIBERNATION_PREPARE``, should be undone for ``PM_POST_HIBERNATION``. +Analogously, operations carried out for ``PM_SUSPEND_PREPARE`` should be +reversed for ``PM_POST_SUSPEND``. + +Moreover, if one of the notifiers fails for the ``PM_HIBERNATION_PREPARE`` or +``PM_SUSPEND_PREPARE`` event, the notifiers that have already succeeded for that +event will be called for ``PM_POST_HIBERNATION`` or ``PM_POST_SUSPEND``, +respectively. + +The hibernation and suspend notifiers are called with :c:data:`pm_mutex` held. +They are defined in the usual way, but their last argument is meaningless (it is +always NULL). + +To register and/or unregister a suspend notifier use +:c:func:`register_pm_notifier()` and :c:func:`unregister_pm_notifier()`, +respectively (both defined in :file:`include/linux/suspend.h`). If you don't +need to unregister the notifier, you can also use the :c:func:`pm_notifier()` +macro defined in :file:`include/linux/suspend.h`. diff --git a/Documentation/driver-api/pm/types.rst b/Documentation/driver-api/pm/types.rst new file mode 100644 index 0000000000..73a231caf7 --- /dev/null +++ b/Documentation/driver-api/pm/types.rst @@ -0,0 +1,7 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Device Power Management Data Types +================================== + +.. kernel-doc:: include/linux/pm.h diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst new file mode 100644 index 0000000000..2d6b99766e --- /dev/null +++ b/Documentation/driver-api/pps.rst @@ -0,0 +1,242 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================== +PPS - Pulse Per Second +====================== + +Copyright (C) 2007 Rodolfo Giometti <giometti@enneenne.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + + + +Overview +-------- + +LinuxPPS provides a programming interface (API) to define in the +system several PPS sources. + +PPS means "pulse per second" and a PPS source is just a device which +provides a high precision signal each second so that an application +can use it to adjust system clock time. + +A PPS source can be connected to a serial port (usually to the Data +Carrier Detect pin) or to a parallel port (ACK-pin) or to a special +CPU's GPIOs (this is the common case in embedded systems) but in each +case when a new pulse arrives the system must apply to it a timestamp +and record it for userland. + +Common use is the combination of the NTPD as userland program, with a +GPS receiver as PPS source, to obtain a wallclock-time with +sub-millisecond synchronisation to UTC. + + +RFC considerations +------------------ + +While implementing a PPS API as RFC 2783 defines and using an embedded +CPU GPIO-Pin as physical link to the signal, I encountered a deeper +problem: + + At startup it needs a file descriptor as argument for the function + time_pps_create(). + +This implies that the source has a /dev/... entry. This assumption is +OK for the serial and parallel port, where you can do something +useful besides(!) the gathering of timestamps as it is the central +task for a PPS API. But this assumption does not work for a single +purpose GPIO line. In this case even basic file-related functionality +(like read() and write()) makes no sense at all and should not be a +precondition for the use of a PPS API. + +The problem can be simply solved if you consider that a PPS source is +not always connected with a GPS data source. + +So your programs should check if the GPS data source (the serial port +for instance) is a PPS source too, and if not they should provide the +possibility to open another device as PPS source. + +In LinuxPPS the PPS sources are simply char devices usually mapped +into files /dev/pps0, /dev/pps1, etc. + + +PPS with USB to serial devices +------------------------------ + +It is possible to grab the PPS from an USB to serial device. However, +you should take into account the latencies and jitter introduced by +the USB stack. Users have reported clock instability around +-1ms when +synchronized with PPS through USB. With USB 2.0, jitter may decrease +down to the order of 125 microseconds. + +This may be suitable for time server synchronization with NTP because +of its undersampling and algorithms. + +If your device doesn't report PPS, you can check that the feature is +supported by its driver. Most of the time, you only need to add a call +to usb_serial_handle_dcd_change after checking the DCD status (see +ch341 and pl2303 examples). + + +Coding example +-------------- + +To register a PPS source into the kernel you should define a struct +pps_source_info as follows:: + + static struct pps_source_info pps_ktimer_info = { + .name = "ktimer", + .path = "", + .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | + PPS_ECHOASSERT | + PPS_CANWAIT | PPS_TSFMT_TSPEC, + .echo = pps_ktimer_echo, + .owner = THIS_MODULE, + }; + +and then calling the function pps_register_source() in your +initialization routine as follows:: + + source = pps_register_source(&pps_ktimer_info, + PPS_CAPTUREASSERT | PPS_OFFSETASSERT); + +The pps_register_source() prototype is:: + + int pps_register_source(struct pps_source_info *info, int default_params) + +where "info" is a pointer to a structure that describes a particular +PPS source, "default_params" tells the system what the initial default +parameters for the device should be (it is obvious that these parameters +must be a subset of ones defined in the struct +pps_source_info which describe the capabilities of the driver). + +Once you have registered a new PPS source into the system you can +signal an assert event (for example in the interrupt handler routine) +just using:: + + pps_event(source, &ts, PPS_CAPTUREASSERT, ptr) + +where "ts" is the event's timestamp. + +The same function may also run the defined echo function +(pps_ktimer_echo(), passing to it the "ptr" pointer) if the user +asked for that... etc.. + +Please see the file drivers/pps/clients/pps-ktimer.c for example code. + + +SYSFS support +------------- + +If the SYSFS filesystem is enabled in the kernel it provides a new class:: + + $ ls /sys/class/pps/ + pps0/ pps1/ pps2/ + +Every directory is the ID of a PPS sources defined in the system and +inside you find several files:: + + $ ls -F /sys/class/pps/pps0/ + assert dev mode path subsystem@ + clear echo name power/ uevent + + +Inside each "assert" and "clear" file you can find the timestamp and a +sequence number:: + + $ cat /sys/class/pps/pps0/assert + 1170026870.983207967#8 + +Where before the "#" is the timestamp in seconds; after it is the +sequence number. Other files are: + + * echo: reports if the PPS source has an echo function or not; + + * mode: reports available PPS functioning modes; + + * name: reports the PPS source's name; + + * path: reports the PPS source's device path, that is the device the + PPS source is connected to (if it exists). + + +Testing the PPS support +----------------------- + +In order to test the PPS support even without specific hardware you can use +the pps-ktimer driver (see the client subsection in the PPS configuration menu) +and the userland tools available in your distribution's pps-tools package, +http://linuxpps.org , or https://github.com/redlab-i/pps-tools. + +Once you have enabled the compilation of pps-ktimer just modprobe it (if +not statically compiled):: + + # modprobe pps-ktimer + +and the run ppstest as follow:: + + $ ./ppstest /dev/pps1 + trying PPS source "/dev/pps1" + found PPS source "/dev/pps1" + ok, found 1 source(s), now start fetching data... + source 0 - assert 1186592699.388832443, sequence: 364 - clear 0.000000000, sequence: 0 + source 0 - assert 1186592700.388931295, sequence: 365 - clear 0.000000000, sequence: 0 + source 0 - assert 1186592701.389032765, sequence: 366 - clear 0.000000000, sequence: 0 + +Please note that to compile userland programs, you need the file timepps.h. +This is available in the pps-tools repository mentioned above. + + +Generators +---------- + +Sometimes one needs to be able not only to catch PPS signals but to produce +them also. For example, running a distributed simulation, which requires +computers' clock to be synchronized very tightly. One way to do this is to +invent some complicated hardware solutions but it may be neither necessary +nor affordable. The cheap way is to load a PPS generator on one of the +computers (master) and PPS clients on others (slaves), and use very simple +cables to deliver signals using parallel ports, for example. + +Parallel port cable pinout:: + + pin name master slave + 1 STROBE *------ * + 2 D0 * | * + 3 D1 * | * + 4 D2 * | * + 5 D3 * | * + 6 D4 * | * + 7 D5 * | * + 8 D6 * | * + 9 D7 * | * + 10 ACK * ------* + 11 BUSY * * + 12 PE * * + 13 SEL * * + 14 AUTOFD * * + 15 ERROR * * + 16 INIT * * + 17 SELIN * * + 18-25 GND *-----------* + +Please note that parallel port interrupt occurs only on high->low transition, +so it is used for PPS assert edge. PPS clear edge can be determined only +using polling in the interrupt handler which actually can be done way more +precisely because interrupt handling delays can be quite big and random. So +current parport PPS generator implementation (pps_gen_parport module) is +geared towards using the clear edge for time synchronization. + +Clear edge polling is done with disabled interrupts so it's better to select +delay between assert and clear edge as small as possible to reduce system +latencies. But if it is too small slave won't be able to capture clear edge +transition. The default of 30us should be good enough in most situations. +The delay can be selected using 'delay' pps_gen_parport module parameter. diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst new file mode 100644 index 0000000000..5e033c3b11 --- /dev/null +++ b/Documentation/driver-api/ptp.rst @@ -0,0 +1,137 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================================== +PTP hardware clock infrastructure for Linux +=========================================== + + This patch set introduces support for IEEE 1588 PTP clocks in + Linux. Together with the SO_TIMESTAMPING socket options, this + presents a standardized method for developing PTP user space + programs, synchronizing Linux with external clocks, and using the + ancillary features of PTP hardware clocks. + + A new class driver exports a kernel interface for specific clock + drivers and a user space interface. The infrastructure supports a + complete set of PTP hardware clock functionality. + + + Basic clock operations + - Set time + - Get time + - Shift the clock by a given offset atomically + - Adjust clock frequency + + + Ancillary clock features + - Time stamp external events + - Period output signals configurable from user space + - Low Pass Filter (LPF) access from user space + - Synchronization of the Linux system time via the PPS subsystem + +PTP hardware clock kernel API +============================= + + A PTP clock driver registers itself with the class driver. The + class driver handles all of the dealings with user space. The + author of a clock driver need only implement the details of + programming the clock hardware. The clock driver notifies the class + driver of asynchronous events (alarms and external time stamps) via + a simple message passing interface. + + The class driver supports multiple PTP clock drivers. In normal use + cases, only one PTP clock is needed. However, for testing and + development, it can be useful to have more than one clock in a + single system, in order to allow performance comparisons. + +PTP hardware clock user space API +================================= + + The class driver also creates a character device for each + registered clock. User space can use an open file descriptor from + the character device as a POSIX clock id and may call + clock_gettime, clock_settime, and clock_adjtime. These calls + implement the basic clock operations. + + User space programs may control the clock using standardized + ioctls. A program may query, enable, configure, and disable the + ancillary clock features. User space can receive time stamped + events via blocking read() and poll(). + +Writing clock drivers +===================== + + Clock drivers include include/linux/ptp_clock_kernel.h and register + themselves by presenting a 'struct ptp_clock_info' to the + registration method. Clock drivers must implement all of the + functions in the interface. If a clock does not offer a particular + ancillary feature, then the driver should just return -EOPNOTSUPP + from those functions. + + Drivers must ensure that all of the methods in interface are + reentrant. Since most hardware implementations treat the time value + as a 64 bit integer accessed as two 32 bit registers, drivers + should use spin_lock_irqsave/spin_unlock_irqrestore to protect + against concurrent access. This locking cannot be accomplished in + class driver, since the lock may also be needed by the clock + driver's interrupt service routine. + +PTP hardware clock requirements for '.adjphase' +----------------------------------------------- + + The 'struct ptp_clock_info' interface has a '.adjphase' function. + This function has a set of requirements from the PHC in order to be + implemented. + + * The PHC implements a servo algorithm internally that is used to + correct the offset passed in the '.adjphase' call. + * When other PTP adjustment functions are called, the PHC servo + algorithm is disabled. + + **NOTE:** '.adjphase' is not a simple time adjustment functionality + that 'jumps' the PHC clock time based on the provided offset. It + should correct the offset provided using an internal algorithm. + +Supported hardware +================== + + * Freescale eTSEC gianfar + + - 2 Time stamp external triggers, programmable polarity (opt. interrupt) + - 2 Alarm registers (optional interrupt) + - 3 Periodic signals (optional interrupt) + + * National DP83640 + + - 6 GPIOs programmable as inputs or outputs + - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be + used as general inputs or outputs + - GPIO inputs can time stamp external triggers + - GPIO outputs can produce periodic signals + - 1 interrupt pin + + * Intel IXP465 + + - Auxiliary Slave/Master Mode Snapshot (optional interrupt) + - Target Time (optional interrupt) + + * Renesas (IDT) ClockMatrix™ + + - Up to 4 independent PHC channels + - Integrated low pass filter (LPF), access via .adjPhase (compliant to ITU-T G.8273.2) + - Programmable output periodic signals + - Programmable inputs can time stamp external triggers + - Driver and/or hardware configuration through firmware (idtcm.bin) + - LPF settings (bandwidth, phase limiting, automatic holdover, physical layer assist (per ITU-T G.8273.2)) + - Programmable output PTP clocks, any frequency up to 1GHz (to other PHY/MAC time stampers, refclk to ASSPs/SoCs/FPGAs) + - Lock to GNSS input, automatic switching between GNSS and user-space PHC control (optional) + + * NVIDIA Mellanox + + - GPIO + - Certain variants of ConnectX-6 Dx and later products support one + GPIO which can time stamp external triggers and one GPIO to produce + periodic signals. + - Certain variants of ConnectX-5 and older products support one GPIO, + configured to either time stamp external triggers or produce + periodic signals. + - PHC instances + - All ConnectX devices have a free-running counter + - ConnectX-6 Dx and later devices have a UTC format counter diff --git a/Documentation/driver-api/pwm.rst b/Documentation/driver-api/pwm.rst new file mode 100644 index 0000000000..3fdc95f7a1 --- /dev/null +++ b/Documentation/driver-api/pwm.rst @@ -0,0 +1,176 @@ +====================================== +Pulse Width Modulation (PWM) interface +====================================== + +This provides an overview about the Linux PWM interface + +PWMs are commonly used for controlling LEDs, fans or vibrators in +cell phones. PWMs with a fixed purpose have no need implementing +the Linux PWM API (although they could). However, PWMs are often +found as discrete devices on SoCs which have no fixed purpose. It's +up to the board designer to connect them to LEDs or fans. To provide +this kind of flexibility the generic PWM API exists. + +Identifying PWMs +---------------- + +Users of the legacy PWM API use unique IDs to refer to PWM devices. + +Instead of referring to a PWM device via its unique ID, board setup code +should instead register a static mapping that can be used to match PWM +consumers to providers, as given in the following example:: + + static struct pwm_lookup board_pwm_lookup[] = { + PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL, + 50000, PWM_POLARITY_NORMAL), + }; + + static void __init board_init(void) + { + ... + pwm_add_table(board_pwm_lookup, ARRAY_SIZE(board_pwm_lookup)); + ... + } + +Using PWMs +---------- + +Consumers use the pwm_get() function and pass to it the consumer device or a +consumer name. pwm_put() is used to free the PWM device. Managed variants of +the getter, devm_pwm_get() and devm_fwnode_pwm_get(), also exist. + +After being requested, a PWM has to be configured using:: + + int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state); + +This API controls both the PWM period/duty_cycle config and the +enable/disable state. + +As a consumer, don't rely on the output's state for a disabled PWM. If it's +easily possible, drivers are supposed to emit the inactive state, but some +drivers cannot. If you rely on getting the inactive state, use .duty_cycle=0, +.enabled=true. + +There is also a usage_power setting: If set, the PWM driver is only required to +maintain the power output but has more freedom regarding signal form. +If supported by the driver, the signal can be optimized, for example to improve +EMI by phase shifting the individual channels of a chip. + +The pwm_config(), pwm_enable() and pwm_disable() functions are just wrappers +around pwm_apply_state() and should not be used if the user wants to change +several parameter at once. For example, if you see pwm_config() and +pwm_{enable,disable}() calls in the same function, this probably means you +should switch to pwm_apply_state(). + +The PWM user API also allows one to query the PWM state that was passed to the +last invocation of pwm_apply_state() using pwm_get_state(). Note this is +different to what the driver has actually implemented if the request cannot be +satisfied exactly with the hardware in use. There is currently no way for +consumers to get the actually implemented settings. + +In addition to the PWM state, the PWM API also exposes PWM arguments, which +are the reference PWM config one should use on this PWM. +PWM arguments are usually platform-specific and allows the PWM user to only +care about dutycycle relatively to the full period (like, duty = 50% of the +period). struct pwm_args contains 2 fields (period and polarity) and should +be used to set the initial PWM config (usually done in the probe function +of the PWM user). PWM arguments are retrieved with pwm_get_args(). + +All consumers should really be reconfiguring the PWM upon resume as +appropriate. This is the only way to ensure that everything is resumed in +the proper order. + +Using PWMs with the sysfs interface +----------------------------------- + +If CONFIG_SYSFS is enabled in your kernel configuration a simple sysfs +interface is provided to use the PWMs from userspace. It is exposed at +/sys/class/pwm/. Each probed PWM controller/chip will be exported as +pwmchipN, where N is the base of the PWM chip. Inside the directory you +will find: + + npwm + The number of PWM channels this chip supports (read-only). + + export + Exports a PWM channel for use with sysfs (write-only). + + unexport + Unexports a PWM channel from sysfs (write-only). + +The PWM channels are numbered using a per-chip index from 0 to npwm-1. + +When a PWM channel is exported a pwmX directory will be created in the +pwmchipN directory it is associated with, where X is the number of the +channel that was exported. The following properties will then be available: + + period + The total period of the PWM signal (read/write). + Value is in nanoseconds and is the sum of the active and inactive + time of the PWM. + + duty_cycle + The active time of the PWM signal (read/write). + Value is in nanoseconds and must be less than the period. + + polarity + Changes the polarity of the PWM signal (read/write). + Writes to this property only work if the PWM chip supports changing + the polarity. The polarity can only be changed if the PWM is not + enabled. Value is the string "normal" or "inversed". + + enable + Enable/disable the PWM signal (read/write). + + - 0 - disabled + - 1 - enabled + +Implementing a PWM driver +------------------------- + +Currently there are two ways to implement pwm drivers. Traditionally +there only has been the barebone API meaning that each driver has +to implement the pwm_*() functions itself. This means that it's impossible +to have multiple PWM drivers in the system. For this reason it's mandatory +for new drivers to use the generic PWM framework. + +A new PWM controller/chip can be added using pwmchip_add() and removed +again with pwmchip_remove(). pwmchip_add() takes a filled in struct +pwm_chip as argument which provides a description of the PWM chip, the +number of PWM devices provided by the chip and the chip-specific +implementation of the supported PWM operations to the framework. + +When implementing polarity support in a PWM driver, make sure to respect the +signal conventions in the PWM framework. By definition, normal polarity +characterizes a signal starts high for the duration of the duty cycle and +goes low for the remainder of the period. Conversely, a signal with inversed +polarity starts low for the duration of the duty cycle and goes high for the +remainder of the period. + +Drivers are encouraged to implement ->apply() instead of the legacy +->enable(), ->disable() and ->config() methods. Doing that should provide +atomicity in the PWM config workflow, which is required when the PWM controls +a critical device (like a regulator). + +The implementation of ->get_state() (a method used to retrieve initial PWM +state) is also encouraged for the same reason: letting the PWM user know +about the current PWM state would allow him to avoid glitches. + +Drivers should not implement any power management. In other words, +consumers should implement it as described in the "Using PWMs" section. + +Locking +------- + +The PWM core list manipulations are protected by a mutex, so pwm_get() +and pwm_put() may not be called from an atomic context. Currently the +PWM core does not enforce any locking to pwm_enable(), pwm_disable() and +pwm_config(), so the calling context is currently driver specific. This +is an issue derived from the former barebone API and should be fixed soon. + +Helpers +------- + +Currently a PWM can only be configured with period_ns and duty_ns. For several +use cases freq_hz and duty_percent might be better. Instead of calculating +this in your driver please consider adding appropriate helpers to the framework. diff --git a/Documentation/driver-api/rapidio/index.rst b/Documentation/driver-api/rapidio/index.rst new file mode 100644 index 0000000000..a41b4242d1 --- /dev/null +++ b/Documentation/driver-api/rapidio/index.rst @@ -0,0 +1,15 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +The Linux RapidIO Subsystem +=========================== + +.. toctree:: + :maxdepth: 1 + + rapidio + sysfs + + tsi721 + mport_cdev + rio_cm diff --git a/Documentation/driver-api/rapidio/mport_cdev.rst b/Documentation/driver-api/rapidio/mport_cdev.rst new file mode 100644 index 0000000000..df77a7f7be --- /dev/null +++ b/Documentation/driver-api/rapidio/mport_cdev.rst @@ -0,0 +1,110 @@ +================================================================== +RapidIO subsystem mport character device driver (rio_mport_cdev.c) +================================================================== + +1. Overview +=========== + +This device driver is the result of collaboration within the RapidIO.org +Software Task Group (STG) between Texas Instruments, Freescale, +Prodrive Technologies, Nokia Networks, BAE and IDT. Additional input was +received from other members of RapidIO.org. The objective was to create a +character mode driver interface which exposes the capabilities of RapidIO +devices directly to applications, in a manner that allows the numerous and +varied RapidIO implementations to interoperate. + +This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations +for user-space applications. Most of RapidIO operations are supported through +'ioctl' system calls. + +When loaded this device driver creates filesystem nodes named rio_mportX in /dev +directory for each registered RapidIO mport device. 'X' in the node name matches +to unique port ID assigned to each local mport device. + +Using available set of ioctl commands user-space applications can perform +following RapidIO bus and subsystem operations: + +- Reads and writes from/to configuration registers of mport devices + (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL) +- Reads and writes from/to configuration registers of remote RapidIO devices. + This operations are defined as RapidIO Maintenance reads/writes in RIO spec. + (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE) +- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET) +- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET) +- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET) +- Query capabilities and RapidIO link configuration of mport devices + (RIO_MPORT_GET_PROPERTIES) +- Enable/Disable reporting of RapidIO doorbell events to user-space applications + (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE) +- Enable/Disable reporting of RIO port-write events to user-space applications + (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE) +- Query/Control type of events reported through this driver: doorbells, + port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK) +- Configure/Map mport's outbound requests window(s) for specific size, + RapidIO destination ID, hopcount and request type + (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND) +- Configure/Map mport's inbound requests window(s) for specific size, + RapidIO base address and local memory base address + (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND) +- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers + to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA) +- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER). + Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data + transfer modes. +- Check/Wait for completion of asynchronous DMA data transfer + (RIO_WAIT_FOR_ASYNC) +- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL). + This allows implementation of various RapidIO fabric enumeration algorithms + as user-space applications while using remaining functionality provided by + kernel RapidIO subsystem. + +2. Hardware Compatibility +========================= + +This device driver uses standard interfaces defined by kernel RapidIO subsystem +and therefore it can be used with any mport device driver registered by RapidIO +subsystem with limitations set by available mport implementation. + +At this moment the most common limitation is availability of RapidIO-specific +DMA engine framework for specific mport device. Users should verify available +functionality of their platform when planning to use this driver: + +- IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully + compatible with this driver. +- Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO + specific DMA engine support and therefore DMA data transfers mport_cdev driver + are not available. + +3. Module parameters +==================== + +- 'dma_timeout' + - DMA transfer completion timeout (in msec, default value 3000). + This parameter set a maximum completion wait time for SYNC mode DMA + transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests. + +- 'dbg_level' + - This parameter allows to control amount of debug information + generated by this device driver. This parameter is formed by set of + bit masks that correspond to the specific functional blocks. + For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c' + This parameter can be changed dynamically. + Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level. + +4. Known problems +================= + + None. + +5. User-space Applications and API +================================== + +API library and applications that use this device driver are available from +RapidIO.org. + +6. TODO List +============ + +- Add support for sending/receiving "raw" RapidIO messaging packets. +- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA + is not available. diff --git a/Documentation/driver-api/rapidio/rapidio.rst b/Documentation/driver-api/rapidio/rapidio.rst new file mode 100644 index 0000000000..74c552ad3e --- /dev/null +++ b/Documentation/driver-api/rapidio/rapidio.rst @@ -0,0 +1,362 @@ +============ +Introduction +============ + +The RapidIO standard is a packet-based fabric interconnect standard designed for +use in embedded systems. Development of the RapidIO standard is directed by the +RapidIO Trade Association (RTA). The current version of the RapidIO specification +is publicly available for download from the RTA web-site [1]. + +This document describes the basics of the Linux RapidIO subsystem and provides +information on its major components. + +1 Overview +========== + +Because the RapidIO subsystem follows the Linux device model it is integrated +into the kernel similarly to other buses by defining RapidIO-specific device and +bus types and registering them within the device model. + +The Linux RapidIO subsystem is architecture independent and therefore defines +architecture-specific interfaces that provide support for common RapidIO +subsystem operations. + +2. Core Components +================== + +A typical RapidIO network is a combination of endpoints and switches. +Each of these components is represented in the subsystem by an associated data +structure. The core logical components of the RapidIO subsystem are defined +in include/linux/rio.h file. + +2.1 Master Port +--------------- + +A master port (or mport) is a RapidIO interface controller that is local to the +processor executing the Linux code. A master port generates and receives RapidIO +packets (transactions). In the RapidIO subsystem each master port is represented +by a rio_mport data structure. This structure contains master port specific +resources such as mailboxes and doorbells. The rio_mport also includes a unique +host device ID that is valid when a master port is configured as an enumerating +host. + +RapidIO master ports are serviced by subsystem specific mport device drivers +that provide functionality defined for this subsystem. To provide a hardware +independent interface for RapidIO subsystem operations, rio_mport structure +includes rio_ops data structure which contains pointers to hardware specific +implementations of RapidIO functions. + +2.2 Device +---------- + +A RapidIO device is any endpoint (other than mport) or switch in the network. +All devices are presented in the RapidIO subsystem by corresponding rio_dev data +structure. Devices form one global device list and per-network device lists +(depending on number of available mports and networks). + +2.3 Switch +---------- + +A RapidIO switch is a special class of device that routes packets between its +ports towards their final destination. The packet destination port within a +switch is defined by an internal routing table. A switch is presented in the +RapidIO subsystem by rio_dev data structure expanded by additional rio_switch +data structure, which contains switch specific information such as copy of the +routing table and pointers to switch specific functions. + +The RapidIO subsystem defines the format and initialization method for subsystem +specific switch drivers that are designed to provide hardware-specific +implementation of common switch management routines. + +2.4 Network +----------- + +A RapidIO network is a combination of interconnected endpoint and switch devices. +Each RapidIO network known to the system is represented by corresponding rio_net +data structure. This structure includes lists of all devices and local master +ports that form the same network. It also contains a pointer to the default +master port that is used to communicate with devices within the network. + +2.5 Device Drivers +------------------ + +RapidIO device-specific drivers follow Linux Kernel Driver Model and are +intended to support specific RapidIO devices attached to the RapidIO network. + +2.6 Subsystem Interfaces +------------------------ + +RapidIO interconnect specification defines features that may be used to provide +one or more common service layers for all participating RapidIO devices. These +common services may act separately from device-specific drivers or be used by +device-specific drivers. Example of such service provider is the RIONET driver +which implements Ethernet-over-RapidIO interface. Because only one driver can be +registered for a device, all common RapidIO services have to be registered as +subsystem interfaces. This allows to have multiple common services attached to +the same device without blocking attachment of a device-specific driver. + +3. Subsystem Initialization +=========================== + +In order to initialize the RapidIO subsystem, a platform must initialize and +register at least one master port within the RapidIO network. To register mport +within the subsystem controller driver's initialization code calls function +rio_register_mport() for each available master port. + +After all active master ports are registered with a RapidIO subsystem, +an enumeration and/or discovery routine may be called automatically or +by user-space command. + +RapidIO subsystem can be configured to be built as a statically linked or +modular component of the kernel (see details below). + +4. Enumeration and Discovery +============================ + +4.1 Overview +------------ + +RapidIO subsystem configuration options allow users to build enumeration and +discovery methods as statically linked components or loadable modules. +An enumeration/discovery method implementation and available input parameters +define how any given method can be attached to available RapidIO mports: +simply to all available mports OR individually to the specified mport device. + +Depending on selected enumeration/discovery build configuration, there are +several methods to initiate an enumeration and/or discovery process: + + (a) Statically linked enumeration and discovery process can be started + automatically during kernel initialization time using corresponding module + parameters. This was the original method used since introduction of RapidIO + subsystem. Now this method relies on enumerator module parameter which is + 'rio-scan.scan' for existing basic enumeration/discovery method. + When automatic start of enumeration/discovery is used a user has to ensure + that all discovering endpoints are started before the enumerating endpoint + and are waiting for enumeration to be completed. + Configuration option CONFIG_RAPIDIO_DISC_TIMEOUT defines time that discovering + endpoint waits for enumeration to be completed. If the specified timeout + expires the discovery process is terminated without obtaining RapidIO network + information. NOTE: a timed out discovery process may be restarted later using + a user-space command as it is described below (if the given endpoint was + enumerated successfully). + + (b) Statically linked enumeration and discovery process can be started by + a command from user space. This initiation method provides more flexibility + for a system startup compared to the option (a) above. After all participating + endpoints have been successfully booted, an enumeration process shall be + started first by issuing a user-space command, after an enumeration is + completed a discovery process can be started on all remaining endpoints. + + (c) Modular enumeration and discovery process can be started by a command from + user space. After an enumeration/discovery module is loaded, a network scan + process can be started by issuing a user-space command. + Similar to the option (b) above, an enumerator has to be started first. + + (d) Modular enumeration and discovery process can be started by a module + initialization routine. In this case an enumerating module shall be loaded + first. + +When a network scan process is started it calls an enumeration or discovery +routine depending on the configured role of a master port: host or agent. + +Enumeration is performed by a master port if it is configured as a host port by +assigning a host destination ID greater than or equal to zero. The host +destination ID can be assigned to a master port using various methods depending +on RapidIO subsystem build configuration: + + (a) For a statically linked RapidIO subsystem core use command line parameter + "rapidio.hdid=" with a list of destination ID assignments in order of mport + device registration. For example, in a system with two RapidIO controllers + the command line parameter "rapidio.hdid=-1,7" will result in assignment of + the host destination ID=7 to the second RapidIO controller, while the first + one will be assigned destination ID=-1. + + (b) If the RapidIO subsystem core is built as a loadable module, in addition + to the method shown above, the host destination ID(s) can be specified using + traditional methods of passing module parameter "hdid=" during its loading: + + - from command line: "modprobe rapidio hdid=-1,7", or + - from modprobe configuration file using configuration command "options", + like in this example: "options rapidio hdid=-1,7". An example of modprobe + configuration file is provided in the section below. + +NOTES: + (i) if "hdid=" parameter is omitted all available mport will be assigned + destination ID = -1; + + (ii) the "hdid=" parameter in systems with multiple mports can have + destination ID assignments omitted from the end of list (default = -1). + +If the host device ID for a specific master port is set to -1, the discovery +process will be performed for it. + +The enumeration and discovery routines use RapidIO maintenance transactions +to access the configuration space of devices. + +NOTE: If RapidIO switch-specific device drivers are built as loadable modules +they must be loaded before enumeration/discovery process starts. +This requirement is cased by the fact that enumeration/discovery methods invoke +vendor-specific callbacks on early stages. + +4.2 Automatic Start of Enumeration and Discovery +------------------------------------------------ + +Automatic enumeration/discovery start method is applicable only to built-in +enumeration/discovery RapidIO configuration selection. To enable automatic +enumeration/discovery start by existing basic enumerator method set use boot +command line parameter "rio-scan.scan=1". + +This configuration requires synchronized start of all RapidIO endpoints that +form a network which will be enumerated/discovered. Discovering endpoints have +to be started before an enumeration starts to ensure that all RapidIO +controllers have been initialized and are ready to be discovered. Configuration +parameter CONFIG_RAPIDIO_DISC_TIMEOUT defines time (in seconds) which +a discovering endpoint will wait for enumeration to be completed. + +When automatic enumeration/discovery start is selected, basic method's +initialization routine calls rio_init_mports() to perform enumeration or +discovery for all known mport devices. + +Depending on RapidIO network size and configuration this automatic +enumeration/discovery start method may be difficult to use due to the +requirement for synchronized start of all endpoints. + +4.3 User-space Start of Enumeration and Discovery +------------------------------------------------- + +User-space start of enumeration and discovery can be used with built-in and +modular build configurations. For user-space controlled start RapidIO subsystem +creates the sysfs write-only attribute file '/sys/bus/rapidio/scan'. To initiate +an enumeration or discovery process on specific mport device, a user needs to +write mport_ID (not RapidIO destination ID) into that file. The mport_ID is a +sequential number (0 ... RIO_MAX_MPORTS) assigned during mport device +registration. For example for machine with single RapidIO controller, mport_ID +for that controller always will be 0. + +To initiate RapidIO enumeration/discovery on all available mports a user may +write '-1' (or RIO_MPORT_ANY) into the scan attribute file. + +4.4 Basic Enumeration Method +---------------------------- + +This is an original enumeration/discovery method which is available since +first release of RapidIO subsystem code. The enumeration process is +implemented according to the enumeration algorithm outlined in the RapidIO +Interconnect Specification: Annex I [1]. + +This method can be configured as statically linked or loadable module. +The method's single parameter "scan" allows to trigger the enumeration/discovery +process from module initialization routine. + +This enumeration/discovery method can be started only once and does not support +unloading if it is built as a module. + +The enumeration process traverses the network using a recursive depth-first +algorithm. When a new device is found, the enumerator takes ownership of that +device by writing into the Host Device ID Lock CSR. It does this to ensure that +the enumerator has exclusive right to enumerate the device. If device ownership +is successfully acquired, the enumerator allocates a new rio_dev structure and +initializes it according to device capabilities. + +If the device is an endpoint, a unique device ID is assigned to it and its value +is written into the device's Base Device ID CSR. + +If the device is a switch, the enumerator allocates an additional rio_switch +structure to store switch specific information. Then the switch's vendor ID and +device ID are queried against a table of known RapidIO switches. Each switch +table entry contains a pointer to a switch-specific initialization routine that +initializes pointers to the rest of switch specific operations, and performs +hardware initialization if necessary. A RapidIO switch does not have a unique +device ID; it relies on hopcount and routing for device ID of an attached +endpoint if access to its configuration registers is required. If a switch (or +chain of switches) does not have any endpoint (except enumerator) attached to +it, a fake device ID will be assigned to configure a route to that switch. +In the case of a chain of switches without endpoint, one fake device ID is used +to configure a route through the entire chain and switches are differentiated by +their hopcount value. + +For both endpoints and switches the enumerator writes a unique component tag +into device's Component Tag CSR. That unique value is used by the error +management notification mechanism to identify a device that is reporting an +error management event. + +Enumeration beyond a switch is completed by iterating over each active egress +port of that switch. For each active link, a route to a default device ID +(0xFF for 8-bit systems and 0xFFFF for 16-bit systems) is temporarily written +into the routing table. The algorithm recurs by calling itself with hopcount + 1 +and the default device ID in order to access the device on the active port. + +After the host has completed enumeration of the entire network it releases +devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint +in the system, it sets the Discovered bit in the Port General Control CSR +to indicate that enumeration is completed and agents are allowed to execute +passive discovery of the network. + +The discovery process is performed by agents and is similar to the enumeration +process that is described above. However, the discovery process is performed +without changes to the existing routing because agents only gather information +about RapidIO network structure and are building an internal map of discovered +devices. This way each Linux-based component of the RapidIO subsystem has +a complete view of the network. The discovery process can be performed +simultaneously by several agents. After initializing its RapidIO master port +each agent waits for enumeration completion by the host for the configured wait +time period. If this wait time period expires before enumeration is completed, +an agent skips RapidIO discovery and continues with remaining kernel +initialization. + +4.5 Adding New Enumeration/Discovery Method +------------------------------------------- + +RapidIO subsystem code organization allows addition of new enumeration/discovery +methods as new configuration options without significant impact to the core +RapidIO code. + +A new enumeration/discovery method has to be attached to one or more mport +devices before an enumeration/discovery process can be started. Normally, +method's module initialization routine calls rio_register_scan() to attach +an enumerator to a specified mport device (or devices). The basic enumerator +implementation demonstrates this process. + +4.6 Using Loadable RapidIO Switch Drivers +----------------------------------------- + +In the case when RapidIO switch drivers are built as loadable modules a user +must ensure that they are loaded before the enumeration/discovery starts. +This process can be automated by specifying pre- or post- dependencies in the +RapidIO-specific modprobe configuration file as shown in the example below. + +File /etc/modprobe.d/rapidio.conf:: + + # Configure RapidIO subsystem modules + + # Set enumerator host destination ID (overrides kernel command line option) + options rapidio hdid=-1,2 + + # Load RapidIO switch drivers immediately after rapidio core module was loaded + softdep rapidio post: idt_gen2 idtcps tsi57x + + # OR : + + # Load RapidIO switch drivers just before rio-scan enumerator module is loaded + softdep rio-scan pre: idt_gen2 idtcps tsi57x + + -------------------------- + +NOTE: + In the example above, one of "softdep" commands must be removed or + commented out to keep required module loading sequence. + +5. References +============= + +[1] RapidIO Trade Association. RapidIO Interconnect Specifications. + http://www.rapidio.org. + +[2] Rapidio TA. Technology Comparisons. + http://www.rapidio.org/education/technology_comparisons/ + +[3] RapidIO support for Linux. + https://lwn.net/Articles/139118/ + +[4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005 + https://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf diff --git a/Documentation/driver-api/rapidio/rio_cm.rst b/Documentation/driver-api/rapidio/rio_cm.rst new file mode 100644 index 0000000000..5294430a7a --- /dev/null +++ b/Documentation/driver-api/rapidio/rio_cm.rst @@ -0,0 +1,135 @@ +========================================================================== +RapidIO subsystem Channelized Messaging character device driver (rio_cm.c) +========================================================================== + + +1. Overview +=========== + +This device driver is the result of collaboration within the RapidIO.org +Software Task Group (STG) between Texas Instruments, Prodrive Technologies, +Nokia Networks, BAE and IDT. Additional input was received from other members +of RapidIO.org. + +The objective was to create a character mode driver interface which exposes +messaging capabilities of RapidIO endpoint devices (mports) directly +to applications, in a manner that allows the numerous and varied RapidIO +implementations to interoperate. + +This driver (RIO_CM) provides to user-space applications shared access to +RapidIO mailbox messaging resources. + +RapidIO specification (Part 2) defines that endpoint devices may have up to four +messaging mailboxes in case of multi-packet message (up to 4KB) and +up to 64 mailboxes if single-packet messages (up to 256 B) are used. In addition +to protocol definition limitations, a particular hardware implementation can +have reduced number of messaging mailboxes. RapidIO aware applications must +therefore share the messaging resources of a RapidIO endpoint. + +Main purpose of this device driver is to provide RapidIO mailbox messaging +capability to large number of user-space processes by introducing socket-like +operations using a single messaging mailbox. This allows applications to +use the limited RapidIO messaging hardware resources efficiently. + +Most of device driver's operations are supported through 'ioctl' system calls. + +When loaded this device driver creates a single file system node named rio_cm +in /dev directory common for all registered RapidIO mport devices. + +Following ioctl commands are available to user-space applications: + +- RIO_CM_MPORT_GET_LIST: + Returns to caller list of local mport devices that + support messaging operations (number of entries up to RIO_MAX_MPORTS). + Each list entry is combination of mport's index in the system and RapidIO + destination ID assigned to the port. +- RIO_CM_EP_GET_LIST_SIZE: + Returns number of messaging capable remote endpoints + in a RapidIO network associated with the specified mport device. +- RIO_CM_EP_GET_LIST: + Returns list of RapidIO destination IDs for messaging + capable remote endpoints (peers) available in a RapidIO network associated + with the specified mport device. +- RIO_CM_CHAN_CREATE: + Creates RapidIO message exchange channel data structure + with channel ID assigned automatically or as requested by a caller. +- RIO_CM_CHAN_BIND: + Binds the specified channel data structure to the specified + mport device. +- RIO_CM_CHAN_LISTEN: + Enables listening for connection requests on the specified + channel. +- RIO_CM_CHAN_ACCEPT: + Accepts a connection request from peer on the specified + channel. If wait timeout for this request is specified by a caller it is + a blocking call. If timeout set to 0 this is non-blocking call - ioctl + handler checks for a pending connection request and if one is not available + exits with -EGAIN error status immediately. +- RIO_CM_CHAN_CONNECT: + Sends a connection request to a remote peer/channel. +- RIO_CM_CHAN_SEND: + Sends a data message through the specified channel. + The handler for this request assumes that message buffer specified by + a caller includes the reserved space for a packet header required by + this driver. +- RIO_CM_CHAN_RECEIVE: + Receives a data message through a connected channel. + If the channel does not have an incoming message ready to return this ioctl + handler will wait for new message until timeout specified by a caller + expires. If timeout value is set to 0, ioctl handler uses a default value + defined by MAX_SCHEDULE_TIMEOUT. +- RIO_CM_CHAN_CLOSE: + Closes a specified channel and frees associated buffers. + If the specified channel is in the CONNECTED state, sends close notification + to the remote peer. + +The ioctl command codes and corresponding data structures intended for use by +user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'. + +2. Hardware Compatibility +========================= + +This device driver uses standard interfaces defined by kernel RapidIO subsystem +and therefore it can be used with any mport device driver registered by RapidIO +subsystem with limitations set by available mport HW implementation of messaging +mailboxes. + +3. Module parameters +==================== + +- 'dbg_level' + - This parameter allows to control amount of debug information + generated by this device driver. This parameter is formed by set of + bit masks that correspond to the specific functional block. + For mask definitions see 'drivers/rapidio/devices/rio_cm.c' + This parameter can be changed dynamically. + Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level. + +- 'cmbox' + - Number of RapidIO mailbox to use (default value is 1). + This parameter allows to set messaging mailbox number that will be used + within entire RapidIO network. It can be used when default mailbox is + used by other device drivers or is not supported by some nodes in the + RapidIO network. + +- 'chstart' + - Start channel number for dynamic assignment. Default value - 256. + Allows to exclude channel numbers below this parameter from dynamic + allocation to avoid conflicts with software components that use + reserved predefined channel numbers. + +4. Known problems +================= + + None. + +5. User-space Applications and API Library +========================================== + +Messaging API library and applications that use this device driver are available +from RapidIO.org. + +6. TODO List +============ + +- Add support for system notification messages (reserved channel 0). diff --git a/Documentation/driver-api/rapidio/sysfs.rst b/Documentation/driver-api/rapidio/sysfs.rst new file mode 100644 index 0000000000..540f726834 --- /dev/null +++ b/Documentation/driver-api/rapidio/sysfs.rst @@ -0,0 +1,7 @@ +============= +Sysfs entries +============= + +The RapidIO sysfs files have moved to: +Documentation/ABI/testing/sysfs-bus-rapidio and +Documentation/ABI/testing/sysfs-class-rapidio diff --git a/Documentation/driver-api/rapidio/tsi721.rst b/Documentation/driver-api/rapidio/tsi721.rst new file mode 100644 index 0000000000..42aea438cd --- /dev/null +++ b/Documentation/driver-api/rapidio/tsi721.rst @@ -0,0 +1,112 @@ +========================================================================= +RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge. +========================================================================= + +1. Overview +=========== + +This driver implements all currently defined RapidIO mport callback functions. +It supports maintenance read and write operations, inbound and outbound RapidIO +doorbells, inbound maintenance port-writes and RapidIO messaging. + +To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA +channels. This mechanism provides access to larger range of hop counts and +destination IDs without need for changes in outbound window translation. + +RapidIO messaging support uses dedicated messaging channels for each mailbox. +For inbound messages this driver uses destination ID matching to forward messages +into the corresponding message queue. Messaging callbacks are implemented to be +fully compatible with RIONET driver (Ethernet over RapidIO messaging services). + +1. Module parameters: + +- 'dbg_level' + - This parameter allows to control amount of debug information + generated by this device driver. This parameter is formed by set of + This parameter can be changed bit masks that correspond to the specific + functional block. + For mask definitions see 'drivers/rapidio/devices/tsi721.h' + This parameter can be changed dynamically. + Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level. + +- 'dma_desc_per_channel' + - This parameter defines number of hardware buffer + descriptors allocated for each registered Tsi721 DMA channel. + Its default value is 128. + +- 'dma_txqueue_sz' + - DMA transactions queue size. Defines number of pending + transaction requests that can be accepted by each DMA channel. + Default value is 16. + +- 'dma_sel' + - DMA channel selection mask. Bitmask that defines which hardware + DMA channels (0 ... 6) will be registered with DmaEngine core. + If bit is set to 1, the corresponding DMA channel will be registered. + DMA channels not selected by this mask will not be used by this device + driver. Default value is 0x7f (use all channels). + +- 'pcie_mrrs' + - override value for PCIe Maximum Read Request Size (MRRS). + This parameter gives an ability to override MRRS value set during PCIe + configuration process. Tsi721 supports read request sizes up to 4096B. + Value for this parameter must be set as defined by PCIe specification: + 0 = 128B, 1 = 256B, 2 = 512B, 3 = 1024B, 4 = 2048B and 5 = 4096B. + Default value is '-1' (= keep platform setting). + +- 'mbox_sel' + - RIO messaging MBOX selection mask. This is a bitmask that defines + messaging MBOXes are managed by this device driver. Mask bits 0 - 3 + correspond to MBOX0 - MBOX3. MBOX is under driver's control if the + corresponding bit is set to '1'. Default value is 0x0f (= all). + +2. Known problems +================= + + None. + +3. DMA Engine Support +===================== + +Tsi721 mport driver supports DMA data transfers between local system memory and +remote RapidIO devices. This functionality is implemented according to SLAVE +mode API defined by common Linux kernel DMA Engine framework. + +Depending on system requirements RapidIO DMA operations can be included/excluded +by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven +out of eight available BDMA channels to support DMA data transfers. +One BDMA channel is reserved for generation of maintenance read/write requests. + +If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included, +this driver will accept DMA-specific module parameter: + + "dma_desc_per_channel" + - defines number of hardware buffer descriptors used by + each BDMA channel of Tsi721 (by default - 128). + +4. Version History + + ===== ==================================================================== + 1.1.0 DMA operations re-worked to support data scatter/gather lists larger + than hardware buffer descriptors ring. + 1.0.0 Initial driver release. + ===== ==================================================================== + +5. License +=========== + + Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. diff --git a/Documentation/driver-api/regulator.rst b/Documentation/driver-api/regulator.rst new file mode 100644 index 0000000000..b43c78eb24 --- /dev/null +++ b/Documentation/driver-api/regulator.rst @@ -0,0 +1,170 @@ +.. Copyright 2007-2008 Wolfson Microelectronics + +.. This documentation is free software; you can redistribute +.. it and/or modify it under the terms of the GNU General Public +.. License version 2 as published by the Free Software Foundation. + +================================= +Voltage and current regulator API +================================= + +:Author: Liam Girdwood +:Author: Mark Brown + +Introduction +============ + +This framework is designed to provide a standard kernel interface to +control voltage and current regulators. + +The intention is to allow systems to dynamically control regulator power +output in order to save power and prolong battery life. This applies to +both voltage regulators (where voltage output is controllable) and +current sinks (where current limit is controllable). + +Note that additional (and currently more complete) documentation is +available in the Linux kernel source under +``Documentation/power/regulator``. + +Glossary +-------- + +The regulator API uses a number of terms which may not be familiar: + +Regulator + + Electronic device that supplies power to other devices. Most regulators + can enable and disable their output and some can also control their + output voltage or current. + +Consumer + + Electronic device which consumes power provided by a regulator. These + may either be static, requiring only a fixed supply, or dynamic, + requiring active management of the regulator at runtime. + +Power Domain + + The electronic circuit supplied by a given regulator, including the + regulator and all consumer devices. The configuration of the regulator + is shared between all the components in the circuit. + +Power Management Integrated Circuit (PMIC) + + An IC which contains numerous regulators and often also other + subsystems. In an embedded system the primary PMIC is often equivalent + to a combination of the PSU and southbridge in a desktop system. + +Consumer driver interface +========================= + +This offers a similar API to the kernel clock framework. Consumer +drivers use `get <#API-regulator-get>`__ and +`put <#API-regulator-put>`__ operations to acquire and release +regulators. Functions are provided to `enable <#API-regulator-enable>`__ +and `disable <#API-regulator-disable>`__ the regulator and to get and +set the runtime parameters of the regulator. + +When requesting regulators consumers use symbolic names for their +supplies, such as "Vcc", which are mapped into actual regulator devices +by the machine interface. + +A stub version of this API is provided when the regulator framework is +not in use in order to minimise the need to use ifdefs. + +Enabling and disabling +---------------------- + +The regulator API provides reference counted enabling and disabling of +regulators. Consumer devices use the :c:func:`regulator_enable()` and +:c:func:`regulator_disable()` functions to enable and disable +regulators. Calls to the two functions must be balanced. + +Note that since multiple consumers may be using a regulator and machine +constraints may not allow the regulator to be disabled there is no +guarantee that calling :c:func:`regulator_disable()` will actually +cause the supply provided by the regulator to be disabled. Consumer +drivers should assume that the regulator may be enabled at all times. + +Configuration +------------- + +Some consumer devices may need to be able to dynamically configure their +supplies. For example, MMC drivers may need to select the correct +operating voltage for their cards. This may be done while the regulator +is enabled or disabled. + +The :c:func:`regulator_set_voltage()` and +:c:func:`regulator_set_current_limit()` functions provide the primary +interface for this. Both take ranges of voltages and currents, supporting +drivers that do not require a specific value (eg, CPU frequency scaling +normally permits the CPU to use a wider range of supply voltages at lower +frequencies but does not require that the supply voltage be lowered). Where +an exact value is required both minimum and maximum values should be +identical. + +Callbacks +--------- + +Callbacks may also be registered for events such as regulation failures. + +Regulator driver interface +========================== + +Drivers for regulator chips register the regulators with the regulator +core, providing operations structures to the core. A notifier interface +allows error conditions to be reported to the core. + +Registration should be triggered by explicit setup done by the platform, +supplying a struct regulator_init_data for the regulator +containing constraint and supply information. + +Machine interface +================= + +This interface provides a way to define how regulators are connected to +consumers on a given system and what the valid operating parameters are +for the system. + +Supplies +-------- + +Regulator supplies are specified using struct +:c:type:`regulator_consumer_supply`. This is done at driver registration +time as part of the machine constraints. + +Constraints +----------- + +As well as defining the connections the machine interface also provides +constraints defining the operations that clients are allowed to perform +and the parameters that may be set. This is required since generally +regulator devices will offer more flexibility than it is safe to use on +a given system, for example supporting higher supply voltages than the +consumers are rated for. + +This is done at driver registration time` by providing a +struct regulation_constraints. + +The constraints may also specify an initial configuration for the +regulator in the constraints, which is particularly useful for use with +static consumers. + +API reference +============= + +Due to limitations of the kernel documentation framework and the +existing layout of the source code the entire regulator API is +documented here. + +.. kernel-doc:: include/linux/regulator/consumer.h + :internal: + +.. kernel-doc:: include/linux/regulator/machine.h + :internal: + +.. kernel-doc:: include/linux/regulator/driver.h + :internal: + +.. kernel-doc:: drivers/regulator/core.c + :export: diff --git a/Documentation/driver-api/reset.rst b/Documentation/driver-api/reset.rst new file mode 100644 index 0000000000..84e03d7039 --- /dev/null +++ b/Documentation/driver-api/reset.rst @@ -0,0 +1,221 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +==================== +Reset controller API +==================== + +Introduction +============ + +Reset controllers are central units that control the reset signals to multiple +peripherals. +The reset controller API is split into two parts: +the `consumer driver interface <#consumer-driver-interface>`__ (`API reference +<#reset-consumer-api>`__), which allows peripheral drivers to request control +over their reset input signals, and the `reset controller driver interface +<#reset-controller-driver-interface>`__ (`API reference +<#reset-controller-driver-api>`__), which is used by drivers for reset +controller devices to register their reset controls to provide them to the +consumers. + +While some reset controller hardware units also implement system restart +functionality, restart handlers are out of scope for the reset controller API. + +Glossary +-------- + +The reset controller API uses these terms with a specific meaning: + +Reset line + + Physical reset line carrying a reset signal from a reset controller + hardware unit to a peripheral module. + +Reset control + + Control method that determines the state of one or multiple reset lines. + Most commonly this is a single bit in reset controller register space that + either allows direct control over the physical state of the reset line, or + is self-clearing and can be used to trigger a predetermined pulse on the + reset line. + In more complicated reset controls, a single trigger action can launch a + carefully timed sequence of pulses on multiple reset lines. + +Reset controller + + A hardware module that provides a number of reset controls to control a + number of reset lines. + +Reset consumer + + Peripheral module or external IC that is put into reset by the signal on a + reset line. + +Consumer driver interface +========================= + +This interface provides an API that is similar to the kernel clock framework. +Consumer drivers use get and put operations to acquire and release reset +controls. +Functions are provided to assert and deassert the controlled reset lines, +trigger reset pulses, or to query reset line status. + +When requesting reset controls, consumers can use symbolic names for their +reset inputs, which are mapped to an actual reset control on an existing reset +controller device by the core. + +A stub version of this API is provided when the reset controller framework is +not in use in order to minimize the need to use ifdefs. + +Shared and exclusive resets +--------------------------- + +The reset controller API provides either reference counted deassertion and +assertion or direct, exclusive control. +The distinction between shared and exclusive reset controls is made at the time +the reset control is requested, either via devm_reset_control_get_shared() or +via devm_reset_control_get_exclusive(). +This choice determines the behavior of the API calls made with the reset +control. + +Shared resets behave similarly to clocks in the kernel clock framework. +They provide reference counted deassertion, where only the first deassert, +which increments the deassertion reference count to one, and the last assert +which decrements the deassertion reference count back to zero, have a physical +effect on the reset line. + +Exclusive resets on the other hand guarantee direct control. +That is, an assert causes the reset line to be asserted immediately, and a +deassert causes the reset line to be deasserted immediately. + +Assertion and deassertion +------------------------- + +Consumer drivers use the reset_control_assert() and reset_control_deassert() +functions to assert and deassert reset lines. +For shared reset controls, calls to the two functions must be balanced. + +Note that since multiple consumers may be using a shared reset control, there +is no guarantee that calling reset_control_assert() on a shared reset control +will actually cause the reset line to be asserted. +Consumer drivers using shared reset controls should assume that the reset line +may be kept deasserted at all times. +The API only guarantees that the reset line can not be asserted as long as any +consumer has requested it to be deasserted. + +Triggering +---------- + +Consumer drivers use reset_control_reset() to trigger a reset pulse on a +self-deasserting reset control. +In general, these resets can not be shared between multiple consumers, since +requesting a pulse from any consumer driver will reset all connected +peripherals. + +The reset controller API allows requesting self-deasserting reset controls as +shared, but for those only the first trigger request causes an actual pulse to +be issued on the reset line. +All further calls to this function have no effect until all consumers have +called reset_control_rearm(). +For shared reset controls, calls to the two functions must be balanced. +This allows devices that only require an initial reset at any point before the +driver is probed or resumed to share a pulsed reset line. + +Querying +-------- + +Only some reset controllers support querying the current status of a reset +line, via reset_control_status(). +If supported, this function returns a positive non-zero value if the given +reset line is asserted. +The reset_control_status() function does not accept a +`reset control array <#reset-control-arrays>`__ handle as its input parameter. + +Optional resets +--------------- + +Often peripherals require a reset line on some platforms but not on others. +For this, reset controls can be requested as optional using +devm_reset_control_get_optional_exclusive() or +devm_reset_control_get_optional_shared(). +These functions return a NULL pointer instead of an error when the requested +reset control is not specified in the device tree. +Passing a NULL pointer to the reset_control functions causes them to return +quietly without an error. + +Reset control arrays +-------------------- + +Some drivers need to assert a bunch of reset lines in no particular order. +devm_reset_control_array_get() returns an opaque reset control handle that can +be used to assert, deassert, or trigger all specified reset controls at once. +The reset control API does not guarantee the order in which the individual +controls therein are handled. + +Reset controller driver interface +================================= + +Drivers for reset controller modules provide the functionality necessary to +assert or deassert reset signals, to trigger a reset pulse on a reset line, or +to query its current state. +All functions are optional. + +Initialization +-------------- + +Drivers fill a struct :c:type:`reset_controller_dev` and register it with +reset_controller_register() in their probe function. +The actual functionality is implemented in callback functions via a struct +:c:type:`reset_control_ops`. + +API reference +============= + +The reset controller API is documented here in two parts: +the `reset consumer API <#reset-consumer-api>`__ and the `reset controller +driver API <#reset-controller-driver-api>`__. + +Reset consumer API +------------------ + +Reset consumers can control a reset line using an opaque reset control handle, +which can be obtained from devm_reset_control_get_exclusive() or +devm_reset_control_get_shared(). +Given the reset control, consumers can call reset_control_assert() and +reset_control_deassert(), trigger a reset pulse using reset_control_reset(), or +query the reset line status using reset_control_status(). + +.. kernel-doc:: include/linux/reset.h + :internal: + +.. kernel-doc:: drivers/reset/core.c + :functions: reset_control_reset + reset_control_assert + reset_control_deassert + reset_control_status + reset_control_acquire + reset_control_release + reset_control_rearm + reset_control_put + of_reset_control_get_count + of_reset_control_array_get + devm_reset_control_array_get + reset_control_get_count + +Reset controller driver API +--------------------------- + +Reset controller drivers are supposed to implement the necessary functions in +a static constant structure :c:type:`reset_control_ops`, allocate and fill out +a struct :c:type:`reset_controller_dev`, and register it using +devm_reset_controller_register(). + +.. kernel-doc:: include/linux/reset-controller.h + :internal: + +.. kernel-doc:: drivers/reset/core.c + :functions: of_reset_simple_xlate + reset_controller_register + reset_controller_unregister + devm_reset_controller_register + reset_controller_add_lookup diff --git a/Documentation/driver-api/rfkill.rst b/Documentation/driver-api/rfkill.rst new file mode 100644 index 0000000000..7d3684e81d --- /dev/null +++ b/Documentation/driver-api/rfkill.rst @@ -0,0 +1,132 @@ +=============================== +rfkill - RF kill switch support +=============================== + + +.. contents:: + :depth: 2 + +Introduction +============ + +The rfkill subsystem provides a generic interface for disabling any radio +transmitter in the system. When a transmitter is blocked, it shall not +radiate any power. + +The subsystem also provides the ability to react on button presses and +disable all transmitters of a certain type (or all). This is intended for +situations where transmitters need to be turned off, for example on +aircraft. + +The rfkill subsystem has a concept of "hard" and "soft" block, which +differ little in their meaning (block == transmitters off) but rather in +whether they can be changed or not: + + - hard block + read-only radio block that cannot be overridden by software + + - soft block + writable radio block (need not be readable) that is set by + the system software. + +The rfkill subsystem has two parameters, rfkill.default_state and +rfkill.master_switch_mode, which are documented in +admin-guide/kernel-parameters.rst. + + +Implementation details +====================== + +The rfkill subsystem is composed of three main components: + + * the rfkill core, + * the deprecated rfkill-input module (an input layer handler, being + replaced by userspace policy code) and + * the rfkill drivers. + +The rfkill core provides API for kernel drivers to register their radio +transmitter with the kernel, methods for turning it on and off, and letting +the system know about hardware-disabled states that may be implemented on +the device. + +The rfkill core code also notifies userspace of state changes, and provides +ways for userspace to query the current states. See the "Userspace support" +section below. + +When the device is hard-blocked (either by a call to rfkill_set_hw_state() +or from query_hw_block), set_block() will be invoked for additional software +block, but drivers can ignore the method call since they can use the return +value of the function rfkill_set_hw_state() to sync the software state +instead of keeping track of calls to set_block(). In fact, drivers should +use the return value of rfkill_set_hw_state() unless the hardware actually +keeps track of soft and hard block separately. + + +Kernel API +========== + +Drivers for radio transmitters normally implement an rfkill driver. + +Platform drivers might implement input devices if the rfkill button is just +that, a button. If that button influences the hardware then you need to +implement an rfkill driver instead. This also applies if the platform provides +a way to turn on/off the transmitter(s). + +For some platforms, it is possible that the hardware state changes during +suspend/hibernation, in which case it will be necessary to update the rfkill +core with the current state at resume time. + +To create an rfkill driver, driver's Kconfig needs to have:: + + depends on RFKILL || !RFKILL + +to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL +case allows the driver to be built when rfkill is not configured, in which +case all rfkill API can still be used but will be provided by static inlines +which compile to almost nothing. + +Calling rfkill_set_hw_state() when a state change happens is required from +rfkill drivers that control devices that can be hard-blocked unless they also +assign the poll_hw_block() callback (then the rfkill core will poll the +device). Don't do this unless you cannot get the event in any other way. + +rfkill provides per-switch LED triggers, which can be used to drive LEDs +according to the switch state (LED_FULL when blocked, LED_OFF otherwise). + + +Userspace support +================= + +The recommended userspace interface to use is /dev/rfkill, which is a misc +character device that allows userspace to obtain and set the state of rfkill +devices and sets of devices. It also notifies userspace about device addition +and removal. The API is a simple read/write API that is defined in +linux/rfkill.h, with one ioctl that allows turning off the deprecated input +handler in the kernel for the transition period. + +Except for the one ioctl, communication with the kernel is done via read() +and write() of instances of 'struct rfkill_event'. In this structure, the +soft and hard block are properly separated (unlike sysfs, see below) and +userspace is able to get a consistent snapshot of all rfkill devices in the +system. Also, it is possible to switch all rfkill drivers (or all drivers of +a specified type) into a state which also updates the default state for +hotplugged devices. + +After an application opens /dev/rfkill, it can read the current state of all +devices. Changes can be obtained by either polling the descriptor for +hotplug or state change events or by listening for uevents emitted by the +rfkill core framework. + +Additionally, each rfkill device is registered in sysfs and emits uevents. + +rfkill devices issue uevents (with an action of "change"), with the following +environment variables set:: + + RFKILL_NAME + RFKILL_STATE + RFKILL_TYPE + +The content of these variables corresponds to the "name", "state" and +"type" sysfs files explained above. + +For further details consult Documentation/ABI/stable/sysfs-class-rfkill. diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst new file mode 100644 index 0000000000..8c0845c4ee --- /dev/null +++ b/Documentation/driver-api/s390-drivers.rst @@ -0,0 +1,135 @@ +=================================== +Writing s390 channel device drivers +=================================== + +:Author: Cornelia Huck + +Introduction +============ + +This document describes the interfaces available for device drivers that +drive s390 based channel attached I/O devices. This includes interfaces +for interaction with the hardware and interfaces for interacting with +the common driver core. Those interfaces are provided by the s390 common +I/O layer. + +The document assumes a familarity with the technical terms associated +with the s390 channel I/O architecture. For a description of this +architecture, please refer to the "z/Architecture: Principles of +Operation", IBM publication no. SA22-7832. + +While most I/O devices on a s390 system are typically driven through the +channel I/O mechanism described here, there are various other methods +(like the diag interface). These are out of the scope of this document. + +The s390 common I/O layer also provides access to some devices that are +not strictly considered I/O devices. They are considered here as well, +although they are not the focus of this document. + +Some additional information can also be found in the kernel source under +Documentation/arch/s390/driver-model.rst. + +The css bus +=========== + +The css bus contains the subchannels available on the system. They fall +into several categories: + +* Standard I/O subchannels, for use by the system. They have a child + device on the ccw bus and are described below. +* I/O subchannels bound to the vfio-ccw driver. See + Documentation/arch/s390/vfio-ccw.rst. +* Message subchannels. No Linux driver currently exists. +* CHSC subchannels (at most one). The chsc subchannel driver can be used + to send asynchronous chsc commands. +* eADM subchannels. Used for talking to storage class memory. + +The ccw bus +=========== + +The ccw bus typically contains the majority of devices available to a +s390 system. Named after the channel command word (ccw), the basic +command structure used to address its devices, the ccw bus contains +so-called channel attached devices. They are addressed via I/O +subchannels, visible on the css bus. A device driver for +channel-attached devices, however, will never interact with the +subchannel directly, but only via the I/O device on the ccw bus, the ccw +device. + +I/O functions for channel-attached devices +------------------------------------------ + +Some hardware structures have been translated into C structures for use +by the common I/O layer and device drivers. For more information on the +hardware structures represented here, please consult the Principles of +Operation. + +.. kernel-doc:: arch/s390/include/asm/cio.h + :internal: + +ccw devices +----------- + +Devices that want to initiate channel I/O need to attach to the ccw bus. +Interaction with the driver core is done via the common I/O layer, which +provides the abstractions of ccw devices and ccw device drivers. + +The functions that initiate or terminate channel I/O all act upon a ccw +device structure. Device drivers must not bypass those functions or +strange side effects may happen. + +.. kernel-doc:: arch/s390/include/asm/ccwdev.h + :internal: + +.. kernel-doc:: drivers/s390/cio/device.c + :export: + +.. kernel-doc:: drivers/s390/cio/device_ops.c + :export: + +The channel-measurement facility +-------------------------------- + +The channel-measurement facility provides a means to collect measurement +data which is made available by the channel subsystem for each channel +attached device. + +.. kernel-doc:: arch/s390/include/uapi/asm/cmb.h + :internal: + +.. kernel-doc:: drivers/s390/cio/cmf.c + :export: + +The ccwgroup bus +================ + +The ccwgroup bus only contains artificial devices, created by the user. +Many networking devices (e.g. qeth) are in fact composed of several ccw +devices (like read, write and data channel for qeth). The ccwgroup bus +provides a mechanism to create a meta-device which contains those ccw +devices as slave devices and can be associated with the netdevice. + +ccw group devices +----------------- + +.. kernel-doc:: arch/s390/include/asm/ccwgroup.h + :internal: + +.. kernel-doc:: drivers/s390/cio/ccwgroup.c + :export: + +Generic interfaces +================== + +The following section contains interfaces in use not only by drivers +dealing with ccw devices, but drivers for various other s390 hardware +as well. + +Adapter interrupts +------------------ + +The common I/O layer provides helper functions for dealing with adapter +interrupts and interrupt vectors. + +.. kernel-doc:: drivers/s390/cio/airq.c + :export: diff --git a/Documentation/driver-api/scsi.rst b/Documentation/driver-api/scsi.rst new file mode 100644 index 0000000000..64b231d125 --- /dev/null +++ b/Documentation/driver-api/scsi.rst @@ -0,0 +1,338 @@ +===================== +SCSI Interfaces Guide +===================== + +:Author: James Bottomley +:Author: Rob Landley + +Introduction +============ + +Protocol vs bus +--------------- + +Once upon a time, the Small Computer Systems Interface defined both a +parallel I/O bus and a data protocol to connect a wide variety of +peripherals (disk drives, tape drives, modems, printers, scanners, +optical drives, test equipment, and medical devices) to a host computer. + +Although the old parallel (fast/wide/ultra) SCSI bus has largely fallen +out of use, the SCSI command set is more widely used than ever to +communicate with devices over a number of different busses. + +The `SCSI protocol <http://www.t10.org/scsi-3.htm>`__ is a big-endian +peer-to-peer packet based protocol. SCSI commands are 6, 10, 12, or 16 +bytes long, often followed by an associated data payload. + +SCSI commands can be transported over just about any kind of bus, and +are the default protocol for storage devices attached to USB, SATA, SAS, +Fibre Channel, FireWire, and ATAPI devices. SCSI packets are also +commonly exchanged over Infiniband, +`I2O <http://i2o.shadowconnect.com/faq.php>`__, TCP/IP +(`iSCSI <https://en.wikipedia.org/wiki/ISCSI>`__), even `Parallel +ports <http://cyberelk.net/tim/parport/parscsi.html>`__. + +Design of the Linux SCSI subsystem +---------------------------------- + +The SCSI subsystem uses a three layer design, with upper, mid, and low +layers. Every operation involving the SCSI subsystem (such as reading a +sector from a disk) uses one driver at each of the 3 levels: one upper +layer driver, one lower layer driver, and the SCSI midlayer. + +The SCSI upper layer provides the interface between userspace and the +kernel, in the form of block and char device nodes for I/O and ioctl(). +The SCSI lower layer contains drivers for specific hardware devices. + +In between is the SCSI mid-layer, analogous to a network routing layer +such as the IPv4 stack. The SCSI mid-layer routes a packet based data +protocol between the upper layer's /dev nodes and the corresponding +devices in the lower layer. It manages command queues, provides error +handling and power management functions, and responds to ioctl() +requests. + +SCSI upper layer +================ + +The upper layer supports the user-kernel interface by providing device +nodes. + +sd (SCSI Disk) +-------------- + +sd (sd_mod.o) + +sr (SCSI CD-ROM) +---------------- + +sr (sr_mod.o) + +st (SCSI Tape) +-------------- + +st (st.o) + +sg (SCSI Generic) +----------------- + +sg (sg.o) + +ch (SCSI Media Changer) +----------------------- + +ch (ch.c) + +SCSI mid layer +============== + +SCSI midlayer implementation +---------------------------- + +include/scsi/scsi_device.h +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/scsi/scsi_device.h + :internal: + +drivers/scsi/scsi.c +~~~~~~~~~~~~~~~~~~~ + +Main file for the SCSI midlayer. + +.. kernel-doc:: drivers/scsi/scsi.c + :export: + +drivers/scsi/scsicam.c +~~~~~~~~~~~~~~~~~~~~~~ + +`SCSI Common Access +Method <http://www.t10.org/ftp/t10/drafts/cam/cam-r12b.pdf>`__ support +functions, for use with HDIO_GETGEO, etc. + +.. kernel-doc:: drivers/scsi/scsicam.c + :export: + +drivers/scsi/scsi_error.c +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Common SCSI error/timeout handling routines. + +.. kernel-doc:: drivers/scsi/scsi_error.c + :export: + +drivers/scsi/scsi_devinfo.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Manage scsi_dev_info_list, which tracks blacklisted and whitelisted +devices. + +.. kernel-doc:: drivers/scsi/scsi_devinfo.c + :internal: + +drivers/scsi/scsi_ioctl.c +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Handle ioctl() calls for SCSI devices. + +.. kernel-doc:: drivers/scsi/scsi_ioctl.c + :export: + +drivers/scsi/scsi_lib.c +~~~~~~~~~~~~~~~~~~~~~~~~ + +SCSI queuing library. + +.. kernel-doc:: drivers/scsi/scsi_lib.c + :export: + +drivers/scsi/scsi_lib_dma.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SCSI library functions depending on DMA (map and unmap scatter-gather +lists). + +.. kernel-doc:: drivers/scsi/scsi_lib_dma.c + :export: + +drivers/scsi/scsi_proc.c +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The functions in this file provide an interface between the PROC file +system and the SCSI device drivers It is mainly used for debugging, +statistics and to pass information directly to the lowlevel driver. I.E. +plumbing to manage /proc/scsi/\* + +.. kernel-doc:: drivers/scsi/scsi_proc.c + :internal: + +drivers/scsi/scsi_netlink.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Infrastructure to provide async events from transports to userspace via +netlink, using a single NETLINK_SCSITRANSPORT protocol for all +transports. See `the original patch +submission <http://marc.info/?l=linux-scsi&m=115507374832500&w=2>`__ for +more details. + +.. kernel-doc:: drivers/scsi/scsi_netlink.c + :internal: + +drivers/scsi/scsi_scan.c +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Scan a host to determine which (if any) devices are attached. The +general scanning/probing algorithm is as follows, exceptions are made to +it depending on device specific flags, compilation options, and global +variable (boot or module load time) settings. A specific LUN is scanned +via an INQUIRY command; if the LUN has a device attached, a scsi_device +is allocated and setup for it. For every id of every channel on the +given host, start by scanning LUN 0. Skip hosts that don't respond at +all to a scan of LUN 0. Otherwise, if LUN 0 has a device attached, +allocate and setup a scsi_device for it. If target is SCSI-3 or up, +issue a REPORT LUN, and scan all of the LUNs returned by the REPORT LUN; +else, sequentially scan LUNs up until some maximum is reached, or a LUN +is seen that cannot have a device attached to it. + +.. kernel-doc:: drivers/scsi/scsi_scan.c + :internal: + +drivers/scsi/scsi_sysctl.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set up the sysctl entry: "/dev/scsi/logging_level" +(DEV_SCSI_LOGGING_LEVEL) which sets/returns scsi_logging_level. + +drivers/scsi/scsi_sysfs.c +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SCSI sysfs interface routines. + +.. kernel-doc:: drivers/scsi/scsi_sysfs.c + :export: + +drivers/scsi/hosts.c +~~~~~~~~~~~~~~~~~~~~ + +mid to lowlevel SCSI driver interface + +.. kernel-doc:: drivers/scsi/hosts.c + :export: + +drivers/scsi/scsi_common.c +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +general support functions + +.. kernel-doc:: drivers/scsi/scsi_common.c + :export: + +Transport classes +----------------- + +Transport classes are service libraries for drivers in the SCSI lower +layer, which expose transport attributes in sysfs. + +Fibre Channel transport +~~~~~~~~~~~~~~~~~~~~~~~ + +The file drivers/scsi/scsi_transport_fc.c defines transport attributes +for Fibre Channel. + +.. kernel-doc:: drivers/scsi/scsi_transport_fc.c + :export: + +iSCSI transport class +~~~~~~~~~~~~~~~~~~~~~ + +The file drivers/scsi/scsi_transport_iscsi.c defines transport +attributes for the iSCSI class, which sends SCSI packets over TCP/IP +connections. + +.. kernel-doc:: drivers/scsi/scsi_transport_iscsi.c + :export: + +Serial Attached SCSI (SAS) transport class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The file drivers/scsi/scsi_transport_sas.c defines transport +attributes for Serial Attached SCSI, a variant of SATA aimed at large +high-end systems. + +The SAS transport class contains common code to deal with SAS HBAs, an +aproximated representation of SAS topologies in the driver model, and +various sysfs attributes to expose these topologies and management +interfaces to userspace. + +In addition to the basic SCSI core objects this transport class +introduces two additional intermediate objects: The SAS PHY as +represented by struct sas_phy defines an "outgoing" PHY on a SAS HBA or +Expander, and the SAS remote PHY represented by struct sas_rphy defines +an "incoming" PHY on a SAS Expander or end device. Note that this is +purely a software concept, the underlying hardware for a PHY and a +remote PHY is the exactly the same. + +There is no concept of a SAS port in this code, users can see what PHYs +form a wide port based on the port_identifier attribute, which is the +same for all PHYs in a port. + +.. kernel-doc:: drivers/scsi/scsi_transport_sas.c + :export: + +SATA transport class +~~~~~~~~~~~~~~~~~~~~ + +The SATA transport is handled by libata, which has its own book of +documentation in this directory. + +Parallel SCSI (SPI) transport class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The file drivers/scsi/scsi_transport_spi.c defines transport +attributes for traditional (fast/wide/ultra) SCSI busses. + +.. kernel-doc:: drivers/scsi/scsi_transport_spi.c + :export: + +SCSI RDMA (SRP) transport class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The file drivers/scsi/scsi_transport_srp.c defines transport +attributes for SCSI over Remote Direct Memory Access. + +.. kernel-doc:: drivers/scsi/scsi_transport_srp.c + :export: + +SCSI lower layer +================ + +Host Bus Adapter transport types +-------------------------------- + +Many modern device controllers use the SCSI command set as a protocol to +communicate with their devices through many different types of physical +connections. + +In SCSI language a bus capable of carrying SCSI commands is called a +"transport", and a controller connecting to such a bus is called a "host +bus adapter" (HBA). + +Debug transport +~~~~~~~~~~~~~~~ + +The file drivers/scsi/scsi_debug.c simulates a host adapter with a +variable number of disks (or disk like devices) attached, sharing a +common amount of RAM. Does a lot of checking to make sure that we are +not getting blocks mixed up, and panics the kernel if anything out of +the ordinary is seen. + +To be more realistic, the simulated devices have the transport +attributes of SAS disks. + +For documentation see http://sg.danny.cz/sg/sdebug26.html + +todo +~~~~ + +Parallel (fast/wide/ultra) SCSI, USB, SATA, SAS, Fibre Channel, +FireWire, ATAPI devices, Infiniband, I2O, Parallel ports, +netlink... diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst new file mode 100644 index 0000000000..84b43061c1 --- /dev/null +++ b/Documentation/driver-api/serial/driver.rst @@ -0,0 +1,106 @@ +==================== +Low Level Serial API +==================== + + +This document is meant as a brief overview of some aspects of the new serial +driver. It is not complete, any questions you have should be directed to +<rmk@arm.linux.org.uk> + +The reference implementation is contained within amba-pl011.c. + + + +Low Level Serial Hardware Driver +-------------------------------- + +The low level serial hardware driver is responsible for supplying port +information (defined by uart_port) and a set of control methods (defined +by uart_ops) to the core serial driver. The low level driver is also +responsible for handling interrupts for the port, and providing any +console support. + + +Console Support +--------------- + +The serial core provides a few helper functions. This includes identifying +the correct port structure (via uart_get_console()) and decoding command line +arguments (uart_parse_options()). + +There is also a helper function (uart_console_write()) which performs a +character by character write, translating newlines to CRLF sequences. +Driver writers are recommended to use this function rather than implementing +their own version. + + +Locking +------- + +It is the responsibility of the low level hardware driver to perform the +necessary locking using port->lock. There are some exceptions (which +are described in the struct uart_ops listing below.) + +There are two locks. A per-port spinlock, and an overall semaphore. + +From the core driver perspective, the port->lock locks the following +data:: + + port->mctrl + port->icount + port->state->xmit.head (circ_buf->head) + port->state->xmit.tail (circ_buf->tail) + +The low level driver is free to use this lock to provide any additional +locking. + +The port_sem semaphore is used to protect against ports being added/ +removed or reconfigured at inappropriate times. Since v2.6.27, this +semaphore has been the 'mutex' member of the tty_port struct, and +commonly referred to as the port mutex. + + +uart_ops +-------- + +.. kernel-doc:: include/linux/serial_core.h + :identifiers: uart_ops + +Other functions +--------------- + +.. kernel-doc:: drivers/tty/serial/serial_core.c + :identifiers: uart_update_timeout uart_get_baud_rate uart_get_divisor + uart_match_port uart_write_wakeup uart_register_driver + uart_unregister_driver uart_suspend_port uart_resume_port + uart_add_one_port uart_remove_one_port uart_console_write + uart_parse_earlycon uart_parse_options uart_set_options + uart_get_lsr_info uart_handle_dcd_change uart_handle_cts_change + uart_try_toggle_sysrq uart_get_console + +.. kernel-doc:: include/linux/serial_core.h + :identifiers: uart_port_tx_limited uart_port_tx + +Other notes +----------- + +It is intended some day to drop the 'unused' entries from uart_port, and +allow low level drivers to register their own individual uart_port's with +the core. This will allow drivers to use uart_port as a pointer to a +structure containing both the uart_port entry with their own extensions, +thus:: + + struct my_port { + struct uart_port port; + int my_stuff; + }; + +Modem control lines via GPIO +---------------------------- + +Some helpers are provided in order to set/get modem control lines via GPIO. + +.. kernel-doc:: drivers/tty/serial/serial_mctrl_gpio.c + :identifiers: mctrl_gpio_init mctrl_gpio_free mctrl_gpio_to_gpiod + mctrl_gpio_set mctrl_gpio_get mctrl_gpio_enable_ms + mctrl_gpio_disable_ms diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst new file mode 100644 index 0000000000..03a55b987a --- /dev/null +++ b/Documentation/driver-api/serial/index.rst @@ -0,0 +1,27 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================== +Support for Serial devices +========================== + +.. toctree:: + :maxdepth: 1 + + + driver + +Serial drivers +============== + +.. toctree:: + :maxdepth: 1 + + serial-iso7816 + serial-rs485 + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/serial/serial-iso7816.rst b/Documentation/driver-api/serial/serial-iso7816.rst new file mode 100644 index 0000000000..d990143de0 --- /dev/null +++ b/Documentation/driver-api/serial/serial-iso7816.rst @@ -0,0 +1,90 @@ +============================= +ISO7816 Serial Communications +============================= + +1. Introduction +=============== + + ISO/IEC7816 is a series of standards specifying integrated circuit cards (ICC) + also known as smart cards. + +2. Hardware-related considerations +================================== + + Some CPUs/UARTs (e.g., Microchip AT91) contain a built-in mode capable of + handling communication with a smart card. + + For these microcontrollers, the Linux driver should be made capable of + working in both modes, and proper ioctls (see later) should be made + available at user-level to allow switching from one mode to the other, and + vice versa. + +3. Data Structures Already Available in the Kernel +================================================== + + The Linux kernel provides the serial_iso7816 structure (see [1]) to handle + ISO7816 communications. This data structure is used to set and configure + ISO7816 parameters in ioctls. + + Any driver for devices capable of working both as RS232 and ISO7816 should + implement the iso7816_config callback in the uart_port structure. The + serial_core calls iso7816_config to do the device specific part in response + to TIOCGISO7816 and TIOCSISO7816 ioctls (see below). The iso7816_config + callback receives a pointer to struct serial_iso7816. + +4. Usage from user-level +======================== + + From user-level, ISO7816 configuration can be get/set using the previous + ioctls. For instance, to set ISO7816 you can use the following code:: + + #include <linux/serial.h> + + /* Include definition for ISO7816 ioctls: TIOCSISO7816 and TIOCGISO7816 */ + #include <sys/ioctl.h> + + /* Open your specific device (e.g., /dev/mydevice): */ + int fd = open ("/dev/mydevice", O_RDWR); + if (fd < 0) { + /* Error handling. See errno. */ + } + + struct serial_iso7816 iso7816conf; + + /* Reserved fields as to be zeroed */ + memset(&iso7816conf, 0, sizeof(iso7816conf)); + + /* Enable ISO7816 mode: */ + iso7816conf.flags |= SER_ISO7816_ENABLED; + + /* Select the protocol: */ + /* T=0 */ + iso7816conf.flags |= SER_ISO7816_T(0); + /* or T=1 */ + iso7816conf.flags |= SER_ISO7816_T(1); + + /* Set the guard time: */ + iso7816conf.tg = 2; + + /* Set the clock frequency*/ + iso7816conf.clk = 3571200; + + /* Set transmission factors: */ + iso7816conf.sc_fi = 372; + iso7816conf.sc_di = 1; + + if (ioctl(fd_usart, TIOCSISO7816, &iso7816conf) < 0) { + /* Error handling. See errno. */ + } + + /* Use read() and write() syscalls here... */ + + /* Close the device when finished: */ + if (close (fd) < 0) { + /* Error handling. See errno. */ + } + +5. References +============= + + [1] include/uapi/linux/serial.h diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst new file mode 100644 index 0000000000..dce061ef76 --- /dev/null +++ b/Documentation/driver-api/serial/serial-rs485.rst @@ -0,0 +1,135 @@ +=========================== +RS485 Serial Communications +=========================== + +1. Introduction +=============== + + EIA-485, also known as TIA/EIA-485 or RS-485, is a standard defining the + electrical characteristics of drivers and receivers for use in balanced + digital multipoint systems. + This standard is widely used for communications in industrial automation + because it can be used effectively over long distances and in electrically + noisy environments. + +2. Hardware-related Considerations +================================== + + Some CPUs/UARTs (e.g., Atmel AT91 or 16C950 UART) contain a built-in + half-duplex mode capable of automatically controlling line direction by + toggling RTS or DTR signals. That can be used to control external + half-duplex hardware like an RS485 transceiver or any RS232-connected + half-duplex devices like some modems. + + For these microcontrollers, the Linux driver should be made capable of + working in both modes, and proper ioctls (see later) should be made + available at user-level to allow switching from one mode to the other, and + vice versa. + +3. Data Structures Already Available in the Kernel +================================================== + + The Linux kernel provides the struct serial_rs485 to handle RS485 + communications. This data structure is used to set and configure RS485 + parameters in the platform data and in ioctls. + + The device tree can also provide RS485 boot time parameters + [#DT-bindings]_. The serial core fills the struct serial_rs485 from the + values given by the device tree when the driver calls + uart_get_rs485_mode(). + + Any driver for devices capable of working both as RS232 and RS485 should + implement the ``rs485_config`` callback and provide ``rs485_supported`` + in the ``struct uart_port``. The serial core calls ``rs485_config`` to do + the device specific part in response to TIOCSRS485 ioctl (see below). The + ``rs485_config`` callback receives a pointer to a sanitizated struct + serial_rs485. The struct serial_rs485 userspace provides is sanitized + before calling ``rs485_config`` using ``rs485_supported`` that indicates + what RS485 features the driver supports for the ``struct uart_port``. + TIOCGRS485 ioctl can be used to read back the struct serial_rs485 + matching to the current configuration. + +.. kernel-doc:: include/uapi/linux/serial.h + :identifiers: serial_rs485 uart_get_rs485_mode + +4. Usage from user-level +======================== + + From user-level, RS485 configuration can be get/set using the previous + ioctls. For instance, to set RS485 you can use the following code:: + + #include <linux/serial.h> + + /* Include definition for RS485 ioctls: TIOCGRS485 and TIOCSRS485 */ + #include <sys/ioctl.h> + + /* Open your specific device (e.g., /dev/mydevice): */ + int fd = open ("/dev/mydevice", O_RDWR); + if (fd < 0) { + /* Error handling. See errno. */ + } + + struct serial_rs485 rs485conf; + + /* Enable RS485 mode: */ + rs485conf.flags |= SER_RS485_ENABLED; + + /* Set logical level for RTS pin equal to 1 when sending: */ + rs485conf.flags |= SER_RS485_RTS_ON_SEND; + /* or, set logical level for RTS pin equal to 0 when sending: */ + rs485conf.flags &= ~(SER_RS485_RTS_ON_SEND); + + /* Set logical level for RTS pin equal to 1 after sending: */ + rs485conf.flags |= SER_RS485_RTS_AFTER_SEND; + /* or, set logical level for RTS pin equal to 0 after sending: */ + rs485conf.flags &= ~(SER_RS485_RTS_AFTER_SEND); + + /* Set rts delay before send, if needed: */ + rs485conf.delay_rts_before_send = ...; + + /* Set rts delay after send, if needed: */ + rs485conf.delay_rts_after_send = ...; + + /* Set this flag if you want to receive data even while sending data */ + rs485conf.flags |= SER_RS485_RX_DURING_TX; + + if (ioctl (fd, TIOCSRS485, &rs485conf) < 0) { + /* Error handling. See errno. */ + } + + /* Use read() and write() syscalls here... */ + + /* Close the device when finished: */ + if (close (fd) < 0) { + /* Error handling. See errno. */ + } + +5. Multipoint Addressing +======================== + + The Linux kernel provides addressing mode for multipoint RS-485 serial + communications line. The addressing mode is enabled with + ``SER_RS485_ADDRB`` flag in struct serial_rs485. The struct serial_rs485 + has two additional flags and fields for enabling receive and destination + addresses. + + Address mode flags: + - ``SER_RS485_ADDRB``: Enabled addressing mode (sets also ADDRB in termios). + - ``SER_RS485_ADDR_RECV``: Receive (filter) address enabled. + - ``SER_RS485_ADDR_DEST``: Set destination address. + + Address fields (enabled with corresponding ``SER_RS485_ADDR_*`` flag): + - ``addr_recv``: Receive address. + - ``addr_dest``: Destination address. + + Once a receive address is set, the communication can occur only with the + particular device and other peers are filtered out. It is left up to the + receiver side to enforce the filtering. Receive address will be cleared + if ``SER_RS485_ADDR_RECV`` is not set. + + Note: not all devices supporting RS485 support multipoint addressing. + +6. References +============= + +.. [#DT-bindings] Documentation/devicetree/bindings/serial/rs485.txt diff --git a/Documentation/driver-api/slimbus.rst b/Documentation/driver-api/slimbus.rst new file mode 100644 index 0000000000..410eec79b2 --- /dev/null +++ b/Documentation/driver-api/slimbus.rst @@ -0,0 +1,132 @@ +============================ +Linux kernel SLIMbus support +============================ + +Overview +======== + +What is SLIMbus? +---------------- +SLIMbus (Serial Low Power Interchip Media Bus) is a specification developed by +MIPI (Mobile Industry Processor Interface) alliance. The bus uses master/slave +configuration, and is a 2-wire multi-drop implementation (clock, and data). + +Currently, SLIMbus is used to interface between application processors of SoCs +(System-on-Chip) and peripheral components (typically codec). SLIMbus uses +Time-Division-Multiplexing to accommodate multiple data channels, and +a control channel. + +The control channel is used for various control functions such as bus +management, configuration and status updates. These messages can be unicast (e.g. +reading/writing device specific values), or multicast (e.g. data channel +reconfiguration sequence is a broadcast message announced to all devices) + +A data channel is used for data-transfer between 2 SLIMbus devices. Data +channel uses dedicated ports on the device. + +Hardware description: +--------------------- +SLIMbus specification has different types of device classifications based on +their capabilities. +A manager device is responsible for enumeration, configuration, and dynamic +channel allocation. Every bus has 1 active manager. + +A generic device is a device providing application functionality (e.g. codec). + +Framer device is responsible for clocking the bus, and transmitting frame-sync +and framing information on the bus. + +Each SLIMbus component has an interface device for monitoring physical layer. + +Typically each SoC contains SLIMbus component having 1 manager, 1 framer device, +1 generic device (for data channel support), and 1 interface device. +External peripheral SLIMbus component usually has 1 generic device (for +functionality/data channel support), and an associated interface device. +The generic device's registers are mapped as 'value elements' so that they can +be written/read using SLIMbus control channel exchanging control/status type of +information. +In case there are multiple framer devices on the same bus, manager device is +responsible to select the active-framer for clocking the bus. + +Per specification, SLIMbus uses "clock gears" to do power management based on +current frequency and bandwidth requirements. There are 10 clock gears and each +gear changes the SLIMbus frequency to be twice its previous gear. + +Each device has a 6-byte enumeration-address and the manager assigns every +device with a 1-byte logical address after the devices report presence on the +bus. + +Software description: +--------------------- +There are 2 types of SLIMbus drivers: + +slim_controller represents a 'controller' for SLIMbus. This driver should +implement duties needed by the SoC (manager device, associated +interface device for monitoring the layers and reporting errors, default +framer device). + +slim_device represents the 'generic device/component' for SLIMbus, and a +slim_driver should implement driver for that slim_device. + +Device notifications to the driver: +----------------------------------- +Since SLIMbus devices have mechanisms for reporting their presence, the +framework allows drivers to bind when corresponding devices report their +presence on the bus. +However, it is possible that the driver needs to be probed +first so that it can enable corresponding SLIMbus device (e.g. power it up and/or +take it out of reset). To support that behavior, the framework allows drivers +to probe first as well (e.g. using standard DeviceTree compatibility field). +This creates the necessity for the driver to know when the device is functional +(i.e. reported present). device_up callback is used for that reason when the +device reports present and is assigned a logical address by the controller. + +Similarly, SLIMbus devices 'report absent' when they go down. A 'device_down' +callback notifies the driver when the device reports absent and its logical +address assignment is invalidated by the controller. + +Another notification "boot_device" is used to notify the slim_driver when +controller resets the bus. This notification allows the driver to take necessary +steps to boot the device so that it's functional after the bus has been reset. + +Driver and Controller APIs: +--------------------------- +.. kernel-doc:: include/linux/slimbus.h + :internal: + +.. kernel-doc:: drivers/slimbus/slimbus.h + :internal: + +.. kernel-doc:: drivers/slimbus/core.c + :export: + +Clock-pause: +------------ +SLIMbus mandates that a reconfiguration sequence (known as clock-pause) be +broadcast to all active devices on the bus before the bus can enter low-power +mode. Controller uses this sequence when it decides to enter low-power mode so +that corresponding clocks and/or power-rails can be turned off to save power. +Clock-pause is exited by waking up framer device (if controller driver initiates +exiting low power mode), or by toggling the data line (if a slave device wants +to initiate it). + +Clock-pause APIs: +~~~~~~~~~~~~~~~~~ +.. kernel-doc:: drivers/slimbus/sched.c + :export: + +Messaging: +---------- +The framework supports regmap and read/write apis to exchange control-information +with a SLIMbus device. APIs can be synchronous or asynchronous. +The header file <linux/slimbus.h> has more documentation about messaging APIs. + +Messaging APIs: +~~~~~~~~~~~~~~~ +.. kernel-doc:: drivers/slimbus/messaging.c + :export: + +Streaming APIs: +~~~~~~~~~~~~~~~ +.. kernel-doc:: drivers/slimbus/stream.c + :export: diff --git a/Documentation/driver-api/sm501.rst b/Documentation/driver-api/sm501.rst new file mode 100644 index 0000000000..882507453b --- /dev/null +++ b/Documentation/driver-api/sm501.rst @@ -0,0 +1,74 @@ +.. include:: <isonum.txt> + +============ +SM501 Driver +============ + +:Copyright: |copy| 2006, 2007 Simtec Electronics + +The Silicon Motion SM501 multimedia companion chip is a multifunction device +which may provide numerous interfaces including USB host controller USB gadget, +asynchronous serial ports, audio functions, and a dual display video interface. +The device may be connected by PCI or local bus with varying functions enabled. + +Core +---- + +The core driver in drivers/mfd provides common services for the +drivers which manage the specific hardware blocks. These services +include locking for common registers, clock control and resource +management. + +The core registers drivers for both PCI and generic bus based +chips via the platform device and driver system. + +On detection of a device, the core initialises the chip (which may +be specified by the platform data) and then exports the selected +peripheral set as platform devices for the specific drivers. + +The core re-uses the platform device system as the platform device +system provides enough features to support the drivers without the +need to create a new bus-type and the associated code to go with it. + + +Resources +--------- + +Each peripheral has a view of the device which is implicitly narrowed to +the specific set of resources that peripheral requires in order to +function correctly. + +The centralised memory allocation allows the driver to ensure that the +maximum possible resource allocation can be made to the video subsystem +as this is by-far the most resource-sensitive of the on-chip functions. + +The primary issue with memory allocation is that of moving the video +buffers once a display mode is chosen. Indeed when a video mode change +occurs the memory footprint of the video subsystem changes. + +Since video memory is difficult to move without changing the display +(unless sufficient contiguous memory can be provided for the old and new +modes simultaneously) the video driver fully utilises the memory area +given to it by aligning fb0 to the start of the area and fb1 to the end +of it. Any memory left over in the middle is used for the acceleration +functions, which are transient and thus their location is less critical +as it can be moved. + + +Configuration +------------- + +The platform device driver uses a set of platform data to pass +configurations through to the core and the subsidiary drivers +so that there can be support for more than one system carrying +an SM501 built into a single kernel image. + +The PCI driver assumes that the PCI card behaves as per the Silicon +Motion reference design. + +There is an errata (AB-5) affecting the selection of the +of the M1XCLK and M1CLK frequencies. These two clocks +must be sourced from the same PLL, although they can then +be divided down individually. If this is not set, then SM501 may +lock and hang the whole system. The driver will refuse to +attach if the PLL selection is different. diff --git a/Documentation/driver-api/soundwire/error_handling.rst b/Documentation/driver-api/soundwire/error_handling.rst new file mode 100644 index 0000000000..aa3a0a23a0 --- /dev/null +++ b/Documentation/driver-api/soundwire/error_handling.rst @@ -0,0 +1,65 @@ +======================== +SoundWire Error Handling +======================== + +The SoundWire PHY was designed with care and errors on the bus are going to +be very unlikely, and if they happen it should be limited to single bit +errors. Examples of this design can be found in the synchronization +mechanism (sync loss after two errors) and short CRCs used for the Bulk +Register Access. + +The errors can be detected with multiple mechanisms: + +1. Bus clash or parity errors: This mechanism relies on low-level detectors + that are independent of the payload and usages, and they cover both control + and audio data. The current implementation only logs such errors. + Improvements could be invalidating an entire programming sequence and + restarting from a known position. In the case of such errors outside of a + control/command sequence, there is no concealment or recovery for audio + data enabled by the SoundWire protocol, the location of the error will also + impact its audibility (most-significant bits will be more impacted in PCM), + and after a number of such errors are detected the bus might be reset. Note + that bus clashes due to programming errors (two streams using the same bit + slots) or electrical issues during the transmit/receive transition cannot + be distinguished, although a recurring bus clash when audio is enabled is a + indication of a bus allocation issue. The interrupt mechanism can also help + identify Slaves which detected a Bus Clash or a Parity Error, but they may + not be responsible for the errors so resetting them individually is not a + viable recovery strategy. + +2. Command status: Each command is associated with a status, which only + covers transmission of the data between devices. The ACK status indicates + that the command was received and will be executed by the end of the + current frame. A NAK indicates that the command was in error and will not + be applied. In case of a bad programming (command sent to non-existent + Slave or to a non-implemented register) or electrical issue, no response + signals the command was ignored. Some Master implementations allow for a + command to be retransmitted several times. If the retransmission fails, + backtracking and restarting the entire programming sequence might be a + solution. Alternatively some implementations might directly issue a bus + reset and re-enumerate all devices. + +3. Timeouts: In a number of cases such as ChannelPrepare or + ClockStopPrepare, the bus driver is supposed to poll a register field until + it transitions to a NotFinished value of zero. The MIPI SoundWire spec 1.1 + does not define timeouts but the MIPI SoundWire DisCo document adds + recommendation on timeouts. If such configurations do not complete, the + driver will return a -ETIMEOUT. Such timeouts are symptoms of a faulty + Slave device and are likely impossible to recover from. + +Errors during global reconfiguration sequences are extremely difficult to +handle: + +1. BankSwitch: An error during the last command issuing a BankSwitch is + difficult to backtrack from. Retransmitting the Bank Switch command may be + possible in a single segment setup, but this can lead to synchronization + problems when enabling multiple bus segments (a command with side effects + such as frame reconfiguration would be handled at different times). A global + hard-reset might be the best solution. + +Note that SoundWire does not provide a mechanism to detect illegal values +written in valid registers. In a number of cases the standard even mentions +that the Slave might behave in implementation-defined ways. The bus +implementation does not provide a recovery mechanism for such errors, Slave +or Master driver implementers are responsible for writing valid values in +valid registers and implement additional range checking if needed. diff --git a/Documentation/driver-api/soundwire/index.rst b/Documentation/driver-api/soundwire/index.rst new file mode 100644 index 0000000000..234911a0db --- /dev/null +++ b/Documentation/driver-api/soundwire/index.rst @@ -0,0 +1,18 @@ +======================= +SoundWire Documentation +======================= + +.. toctree:: + :maxdepth: 1 + + summary + stream + error_handling + locking + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/soundwire/locking.rst b/Documentation/driver-api/soundwire/locking.rst new file mode 100644 index 0000000000..3a7ffb3d87 --- /dev/null +++ b/Documentation/driver-api/soundwire/locking.rst @@ -0,0 +1,108 @@ +================= +SoundWire Locking +================= + +This document explains locking mechanism of the SoundWire Bus. Bus uses +following locks in order to avoid race conditions in Bus operations on +shared resources. + + - Bus lock + + - Message lock + +Bus lock +======== + +SoundWire Bus lock is a mutex and is part of Bus data structure +(sdw_bus) which is used for every Bus instance. This lock is used to +serialize each of the following operations(s) within SoundWire Bus instance. + + - Addition and removal of Slave(s), changing Slave status. + + - Prepare, Enable, Disable and De-prepare stream operations. + + - Access of Stream data structure. + +Message lock +============ + +SoundWire message transfer lock. This mutex is part of +Bus data structure (sdw_bus). This lock is used to serialize the message +transfers (read/write) within a SoundWire Bus instance. + +Below examples show how locks are acquired. + +Example 1 +--------- + +Message transfer. + + 1. For every message transfer + + a. Acquire Message lock. + + b. Transfer message (Read/Write) to Slave1 or broadcast message on + Bus in case of bank switch. + + c. Release Message lock + + :: + + +----------+ +---------+ + | | | | + | Bus | | Master | + | | | Driver | + | | | | + +----+-----+ +----+----+ + | | + | bus->ops->xfer_msg() | + <-------------------------------+ a. Acquire Message lock + | | b. Transfer message + | | + +-------------------------------> c. Release Message lock + | return success/error | d. Return success/error + | | + + + + +Example 2 +--------- + +Prepare operation. + + 1. Acquire lock for Bus instance associated with Master 1. + + 2. For every message transfer in Prepare operation + + a. Acquire Message lock. + + b. Transfer message (Read/Write) to Slave1 or broadcast message on + Bus in case of bank switch. + + c. Release Message lock. + + 3. Release lock for Bus instance associated with Master 1 :: + + +----------+ +---------+ + | | | | + | Bus | | Master | + | | | Driver | + | | | | + +----+-----+ +----+----+ + | | + | sdw_prepare_stream() | + <-------------------------------+ 1. Acquire bus lock + | | 2. Perform stream prepare + | | + | | + | bus->ops->xfer_msg() | + <-------------------------------+ a. Acquire Message lock + | | b. Transfer message + | | + +-------------------------------> c. Release Message lock + | return success/error | d. Return success/error + | | + | | + | return success/error | 3. Release bus lock + +-------------------------------> 4. Return success/error + | | + + + diff --git a/Documentation/driver-api/soundwire/stream.rst b/Documentation/driver-api/soundwire/stream.rst new file mode 100644 index 0000000000..b432a2de45 --- /dev/null +++ b/Documentation/driver-api/soundwire/stream.rst @@ -0,0 +1,527 @@ +========================= +Audio Stream in SoundWire +========================= + +An audio stream is a logical or virtual connection created between + + (1) System memory buffer(s) and Codec(s) + + (2) DSP memory buffer(s) and Codec(s) + + (3) FIFO(s) and Codec(s) + + (4) Codec(s) and Codec(s) + +which is typically driven by a DMA(s) channel through the data link. An +audio stream contains one or more channels of data. All channels within +stream must have same sample rate and same sample size. + +Assume a stream with two channels (Left & Right) is opened using SoundWire +interface. Below are some ways a stream can be represented in SoundWire. + +Stream Sample in memory (System memory, DSP memory or FIFOs) :: + + ------------------------- + | L | R | L | R | L | R | + ------------------------- + +Example 1: Stereo Stream with L and R channels is rendered from Master to +Slave. Both Master and Slave is using single port. :: + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | | | 1 | + | | Data Signal | | + | L + R +----------------------------------+ L + R | + | (Data) | Data Direction | (Data) | + +---------------+ +-----------------------> +---------------+ + + +Example 2: Stereo Stream with L and R channels is captured from Slave to +Master. Both Master and Slave is using single port. :: + + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | | | 1 | + | | Data Signal | | + | L + R +----------------------------------+ L + R | + | (Data) | Data Direction | (Data) | + +---------------+ <-----------------------+ +---------------+ + + +Example 3: Stereo Stream with L and R channels is rendered by Master. Each +of the L and R channel is received by two different Slaves. Master and both +Slaves are using single port. :: + + +---------------+ Clock Signal +---------------+ + | Master +---------+------------------------+ Slave | + | Interface | | | Interface | + | | | | 1 | + | | | Data Signal | | + | L + R +---+------------------------------+ L | + | (Data) | | | Data Direction | (Data) | + +---------------+ | | +-------------> +---------------+ + | | + | | + | | +---------------+ + | +----------------------> | Slave | + | | Interface | + | | 2 | + | | | + +----------------------------> | R | + | (Data) | + +---------------+ + +Example 4: Stereo Stream with L and R channels is rendered by +Master. Both of the L and R channels are received by two different +Slaves. Master and both Slaves are using single port handling +L+R. Each Slave device processes the L + R data locally, typically +based on static configuration or dynamic orientation, and may drive +one or more speakers. :: + + +---------------+ Clock Signal +---------------+ + | Master +---------+------------------------+ Slave | + | Interface | | | Interface | + | | | | 1 | + | | | Data Signal | | + | L + R +---+------------------------------+ L + R | + | (Data) | | | Data Direction | (Data) | + +---------------+ | | +-------------> +---------------+ + | | + | | + | | +---------------+ + | +----------------------> | Slave | + | | Interface | + | | 2 | + | | | + +----------------------------> | L + R | + | (Data) | + +---------------+ + +Example 5: Stereo Stream with L and R channel is rendered by two different +Ports of the Master and is received by only single Port of the Slave +interface. :: + + +--------------------+ + | | + | +--------------+ +----------------+ + | | || | | + | | Data Port || L Channel | | + | | 1 |------------+ | | + | | L Channel || | +-----+----+ | + | | (Data) || | L + R Channel || Data | | + | Master +----------+ | +---+---------> || Port | | + | Interface | | || 1 | | + | +--------------+ | || | | + | | || | +----------+ | + | | Data Port |------------+ | | + | | 2 || R Channel | Slave | + | | R Channel || | Interface | + | | (Data) || | 1 | + | +--------------+ Clock Signal | L + R | + | +---------------------------> | (Data) | + +--------------------+ | | + +----------------+ + +Example 6: Stereo Stream with L and R channel is rendered by 2 Masters, each +rendering one channel, and is received by two different Slaves, each +receiving one channel. Both Masters and both Slaves are using single port. :: + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | 1 | | 1 | + | | Data Signal | | + | L +----------------------------------+ L | + | (Data) | Data Direction | (Data) | + +---------------+ +-----------------------> +---------------+ + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | 2 | | 2 | + | | Data Signal | | + | R +----------------------------------+ R | + | (Data) | Data Direction | (Data) | + +---------------+ +-----------------------> +---------------+ + +Example 7: Stereo Stream with L and R channel is rendered by 2 +Masters, each rendering both channels. Each Slave receives L + R. This +is the same application as Example 4 but with Slaves placed on +separate links. :: + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | 1 | | 1 | + | | Data Signal | | + | L + R +----------------------------------+ L + R | + | (Data) | Data Direction | (Data) | + +---------------+ +-----------------------> +---------------+ + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | 2 | | 2 | + | | Data Signal | | + | L + R +----------------------------------+ L + R | + | (Data) | Data Direction | (Data) | + +---------------+ +-----------------------> +---------------+ + +Example 8: 4-channel Stream is rendered by 2 Masters, each rendering a +2 channels. Each Slave receives 2 channels. :: + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | 1 | | 1 | + | | Data Signal | | + | L1 + R1 +----------------------------------+ L1 + R1 | + | (Data) | Data Direction | (Data) | + +---------------+ +-----------------------> +---------------+ + + +---------------+ Clock Signal +---------------+ + | Master +----------------------------------+ Slave | + | Interface | | Interface | + | 2 | | 2 | + | | Data Signal | | + | L2 + R2 +----------------------------------+ L2 + R2 | + | (Data) | Data Direction | (Data) | + +---------------+ +-----------------------> +---------------+ + +Note1: In multi-link cases like above, to lock, one would acquire a global +lock and then go on locking bus instances. But, in this case the caller +framework(ASoC DPCM) guarantees that stream operations on a card are +always serialized. So, there is no race condition and hence no need for +global lock. + +Note2: A Slave device may be configured to receive all channels +transmitted on a link for a given Stream (Example 4) or just a subset +of the data (Example 3). The configuration of the Slave device is not +handled by a SoundWire subsystem API, but instead by the +snd_soc_dai_set_tdm_slot() API. The platform or machine driver will +typically configure which of the slots are used. For Example 4, the +same slots would be used by all Devices, while for Example 3 the Slave +Device1 would use e.g. Slot 0 and Slave device2 slot 1. + +Note3: Multiple Sink ports can extract the same information for the +same bitSlots in the SoundWire frame, however multiple Source ports +shall be configured with different bitSlot configurations. This is the +same limitation as with I2S/PCM TDM usages. + +SoundWire Stream Management flow +================================ + +Stream definitions +------------------ + + (1) Current stream: This is classified as the stream on which operation has + to be performed like prepare, enable, disable, de-prepare etc. + + (2) Active stream: This is classified as the stream which is already active + on Bus other than current stream. There can be multiple active streams + on the Bus. + +SoundWire Bus manages stream operations for each stream getting +rendered/captured on the SoundWire Bus. This section explains Bus operations +done for each of the stream allocated/released on Bus. Following are the +stream states maintained by the Bus for each of the audio stream. + + +SoundWire stream states +----------------------- + +Below shows the SoundWire stream states and state transition diagram. :: + + +-----------+ +------------+ +----------+ +----------+ + | ALLOCATED +---->| CONFIGURED +---->| PREPARED +---->| ENABLED | + | STATE | | STATE | | STATE | | STATE | + +-----------+ +------------+ +---+--+---+ +----+-----+ + ^ ^ ^ + | | | + __| |___________ | + | | | + v | v + +----------+ +-----+------+ +-+--+-----+ + | RELEASED |<----------+ DEPREPARED |<-------+ DISABLED | + | STATE | | STATE | | STATE | + +----------+ +------------+ +----------+ + +NOTE: State transitions between ``SDW_STREAM_ENABLED`` and +``SDW_STREAM_DISABLED`` are only relevant when then INFO_PAUSE flag is +supported at the ALSA/ASoC level. Likewise the transition between +``SDW_DISABLED_STATE`` and ``SDW_PREPARED_STATE`` depends on the +INFO_RESUME flag. + +NOTE2: The framework implements basic state transition checks, but +does not e.g. check if a transition from DISABLED to ENABLED is valid +on a specific platform. Such tests need to be added at the ALSA/ASoC +level. + +Stream State Operations +----------------------- + +Below section explains the operations done by the Bus on Master(s) and +Slave(s) as part of stream state transitions. + +SDW_STREAM_ALLOCATED +~~~~~~~~~~~~~~~~~~~~ + +Allocation state for stream. This is the entry state +of the stream. Operations performed before entering in this state: + + (1) A stream runtime is allocated for the stream. This stream + runtime is used as a reference for all the operations performed + on the stream. + + (2) The resources required for holding stream runtime information are + allocated and initialized. This holds all stream related information + such as stream type (PCM/PDM) and parameters, Master and Slave + interface associated with the stream, stream state etc. + +After all above operations are successful, stream state is set to +``SDW_STREAM_ALLOCATED``. + +Bus implements below API for allocate a stream which needs to be called once +per stream. From ASoC DPCM framework, this stream state maybe linked to +.startup() operation. + +.. code-block:: c + + int sdw_alloc_stream(char * stream_name); + +The SoundWire core provides a sdw_startup_stream() helper function, +typically called during a dailink .startup() callback, which performs +stream allocation and sets the stream pointer for all DAIs +connected to a stream. + +SDW_STREAM_CONFIGURED +~~~~~~~~~~~~~~~~~~~~~ + +Configuration state of stream. Operations performed before entering in +this state: + + (1) The resources allocated for stream information in SDW_STREAM_ALLOCATED + state are updated here. This includes stream parameters, Master(s) + and Slave(s) runtime information associated with current stream. + + (2) All the Master(s) and Slave(s) associated with current stream provide + the port information to Bus which includes port numbers allocated by + Master(s) and Slave(s) for current stream and their channel mask. + +After all above operations are successful, stream state is set to +``SDW_STREAM_CONFIGURED``. + +Bus implements below APIs for CONFIG state which needs to be called by +the respective Master(s) and Slave(s) associated with stream. These APIs can +only be invoked once by respective Master(s) and Slave(s). From ASoC DPCM +framework, this stream state is linked to .hw_params() operation. + +.. code-block:: c + + int sdw_stream_add_master(struct sdw_bus * bus, + struct sdw_stream_config * stream_config, + struct sdw_ports_config * ports_config, + struct sdw_stream_runtime * stream); + + int sdw_stream_add_slave(struct sdw_slave * slave, + struct sdw_stream_config * stream_config, + struct sdw_ports_config * ports_config, + struct sdw_stream_runtime * stream); + + +SDW_STREAM_PREPARED +~~~~~~~~~~~~~~~~~~~ + +Prepare state of stream. Operations performed before entering in this state: + + (0) Steps 1 and 2 are omitted in the case of a resume operation, + where the bus bandwidth is known. + + (1) Bus parameters such as bandwidth, frame shape, clock frequency, + are computed based on current stream as well as already active + stream(s) on Bus. Re-computation is required to accommodate current + stream on the Bus. + + (2) Transport and port parameters of all Master(s) and Slave(s) port(s) are + computed for the current as well as already active stream based on frame + shape and clock frequency computed in step 1. + + (3) Computed Bus and transport parameters are programmed in Master(s) and + Slave(s) registers. The banked registers programming is done on the + alternate bank (bank currently unused). Port(s) are enabled for the + already active stream(s) on the alternate bank (bank currently unused). + This is done in order to not disrupt already active stream(s). + + (4) Once all the values are programmed, Bus initiates switch to alternate + bank where all new values programmed gets into effect. + + (5) Ports of Master(s) and Slave(s) for current stream are prepared by + programming PrepareCtrl register. + +After all above operations are successful, stream state is set to +``SDW_STREAM_PREPARED``. + +Bus implements below API for PREPARE state which needs to be called +once per stream. From ASoC DPCM framework, this stream state is linked +to .prepare() operation. Since the .trigger() operations may not +follow the .prepare(), a direct transition from +``SDW_STREAM_PREPARED`` to ``SDW_STREAM_DEPREPARED`` is allowed. + +.. code-block:: c + + int sdw_prepare_stream(struct sdw_stream_runtime * stream); + + +SDW_STREAM_ENABLED +~~~~~~~~~~~~~~~~~~ + +Enable state of stream. The data port(s) are enabled upon entering this state. +Operations performed before entering in this state: + + (1) All the values computed in SDW_STREAM_PREPARED state are programmed + in alternate bank (bank currently unused). It includes programming of + already active stream(s) as well. + + (2) All the Master(s) and Slave(s) port(s) for the current stream are + enabled on alternate bank (bank currently unused) by programming + ChannelEn register. + + (3) Once all the values are programmed, Bus initiates switch to alternate + bank where all new values programmed gets into effect and port(s) + associated with current stream are enabled. + +After all above operations are successful, stream state is set to +``SDW_STREAM_ENABLED``. + +Bus implements below API for ENABLE state which needs to be called once per +stream. From ASoC DPCM framework, this stream state is linked to +.trigger() start operation. + +.. code-block:: c + + int sdw_enable_stream(struct sdw_stream_runtime * stream); + +SDW_STREAM_DISABLED +~~~~~~~~~~~~~~~~~~~ + +Disable state of stream. The data port(s) are disabled upon exiting this state. +Operations performed before entering in this state: + + (1) All the Master(s) and Slave(s) port(s) for the current stream are + disabled on alternate bank (bank currently unused) by programming + ChannelEn register. + + (2) All the current configuration of Bus and active stream(s) are programmed + into alternate bank (bank currently unused). + + (3) Once all the values are programmed, Bus initiates switch to alternate + bank where all new values programmed gets into effect and port(s) associated + with current stream are disabled. + +After all above operations are successful, stream state is set to +``SDW_STREAM_DISABLED``. + +Bus implements below API for DISABLED state which needs to be called once +per stream. From ASoC DPCM framework, this stream state is linked to +.trigger() stop operation. + +When the INFO_PAUSE flag is supported, a direct transition to +``SDW_STREAM_ENABLED`` is allowed. + +For resume operations where ASoC will use the .prepare() callback, the +stream can transition from ``SDW_STREAM_DISABLED`` to +``SDW_STREAM_PREPARED``, with all required settings restored but +without updating the bandwidth and bit allocation. + +.. code-block:: c + + int sdw_disable_stream(struct sdw_stream_runtime * stream); + + +SDW_STREAM_DEPREPARED +~~~~~~~~~~~~~~~~~~~~~ + +De-prepare state of stream. Operations performed before entering in this +state: + + (1) All the port(s) of Master(s) and Slave(s) for current stream are + de-prepared by programming PrepareCtrl register. + + (2) The payload bandwidth of current stream is reduced from the total + bandwidth requirement of bus and new parameters calculated and + applied by performing bank switch etc. + +After all above operations are successful, stream state is set to +``SDW_STREAM_DEPREPARED``. + +Bus implements below API for DEPREPARED state which needs to be called +once per stream. ALSA/ASoC do not have a concept of 'deprepare', and +the mapping from this stream state to ALSA/ASoC operation may be +implementation specific. + +When the INFO_PAUSE flag is supported, the stream state is linked to +the .hw_free() operation - the stream is not deprepared on a +TRIGGER_STOP. + +Other implementations may transition to the ``SDW_STREAM_DEPREPARED`` +state on TRIGGER_STOP, should they require a transition through the +``SDW_STREAM_PREPARED`` state. + +.. code-block:: c + + int sdw_deprepare_stream(struct sdw_stream_runtime * stream); + + +SDW_STREAM_RELEASED +~~~~~~~~~~~~~~~~~~~ + +Release state of stream. Operations performed before entering in this state: + + (1) Release port resources for all Master(s) and Slave(s) port(s) + associated with current stream. + + (2) Release Master(s) and Slave(s) runtime resources associated with + current stream. + + (3) Release stream runtime resources associated with current stream. + +After all above operations are successful, stream state is set to +``SDW_STREAM_RELEASED``. + +Bus implements below APIs for RELEASE state which needs to be called by +all the Master(s) and Slave(s) associated with stream. From ASoC DPCM +framework, this stream state is linked to .hw_free() operation. + +.. code-block:: c + + int sdw_stream_remove_master(struct sdw_bus * bus, + struct sdw_stream_runtime * stream); + int sdw_stream_remove_slave(struct sdw_slave * slave, + struct sdw_stream_runtime * stream); + + +The .shutdown() ASoC DPCM operation calls below Bus API to release +stream assigned as part of ALLOCATED state. + +In .shutdown() the data structure maintaining stream state are freed up. + +.. code-block:: c + + void sdw_release_stream(struct sdw_stream_runtime * stream); + +The SoundWire core provides a sdw_shutdown_stream() helper function, +typically called during a dailink .shutdown() callback, which clears +the stream pointer for all DAIS connected to a stream and releases the +memory allocated for the stream. + +Not Supported +============= + +1. A single port with multiple channels supported cannot be used between two + streams or across stream. For example a port with 4 channels cannot be used + to handle 2 independent stereo streams even though it's possible in theory + in SoundWire. diff --git a/Documentation/driver-api/soundwire/summary.rst b/Documentation/driver-api/soundwire/summary.rst new file mode 100644 index 0000000000..01dcb954f6 --- /dev/null +++ b/Documentation/driver-api/soundwire/summary.rst @@ -0,0 +1,208 @@ +=========================== +SoundWire Subsystem Summary +=========================== + +SoundWire is a new interface ratified in 2015 by the MIPI Alliance. +SoundWire is used for transporting data typically related to audio +functions. SoundWire interface is optimized to integrate audio devices in +mobile or mobile inspired systems. + +SoundWire is a 2-pin multi-drop interface with data and clock line. It +facilitates development of low cost, efficient, high performance systems. +Broad level key features of SoundWire interface include: + + (1) Transporting all of payload data channels, control information, and setup + commands over a single two-pin interface. + + (2) Lower clock frequency, and hence lower power consumption, by use of DDR + (Dual Data Rate) data transmission. + + (3) Clock scaling and optional multiple data lanes to give wide flexibility + in data rate to match system requirements. + + (4) Device status monitoring, including interrupt-style alerts to the Master. + +The SoundWire protocol supports up to eleven Slave interfaces. All the +interfaces share the common Bus containing data and clock line. Each of the +Slaves can support up to 14 Data Ports. 13 Data Ports are dedicated to audio +transport. Data Port0 is dedicated to transport of Bulk control information, +each of the audio Data Ports (1..14) can support up to 8 Channels in +transmit or receiving mode (typically fixed direction but configurable +direction is enabled by the specification). Bandwidth restrictions to +~19.2..24.576Mbits/s don't however allow for 11*13*8 channels to be +transmitted simultaneously. + +Below figure shows an example of connectivity between a SoundWire Master and +two Slave devices. :: + + +---------------+ +---------------+ + | | Clock Signal | | + | Master |-------+-------------------------------| Slave | + | Interface | | Data Signal | Interface 1 | + | |-------|-------+-----------------------| | + +---------------+ | | +---------------+ + | | + | | + | | + +--+-------+--+ + | | + | Slave | + | Interface 2 | + | | + +-------------+ + + +Terminology +=========== + +The MIPI SoundWire specification uses the term 'device' to refer to a Master +or Slave interface, which of course can be confusing. In this summary and +code we use the term interface only to refer to the hardware. We follow the +Linux device model by mapping each Slave interface connected on the bus as a +device managed by a specific driver. The Linux SoundWire subsystem provides +a framework to implement a SoundWire Slave driver with an API allowing +3rd-party vendors to enable implementation-defined functionality while +common setup/configuration tasks are handled by the bus. + +Bus: +Implements SoundWire Linux Bus which handles the SoundWire protocol. +Programs all the MIPI-defined Slave registers. Represents a SoundWire +Master. Multiple instances of Bus may be present in a system. + +Slave: +Registers as SoundWire Slave device (Linux Device). Multiple Slave devices +can register to a Bus instance. + +Slave driver: +Driver controlling the Slave device. MIPI-specified registers are controlled +directly by the Bus (and transmitted through the Master driver/interface). +Any implementation-defined Slave register is controlled by Slave driver. In +practice, it is expected that the Slave driver relies on regmap and does not +request direct register access. + +Programming interfaces (SoundWire Master interface Driver) +========================================================== + +SoundWire Bus supports programming interfaces for the SoundWire Master +implementation and SoundWire Slave devices. All the code uses the "sdw" +prefix commonly used by SoC designers and 3rd party vendors. + +Each of the SoundWire Master interfaces needs to be registered to the Bus. +Bus implements API to read standard Master MIPI properties and also provides +callback in Master ops for Master driver to implement its own functions that +provides capabilities information. DT support is not implemented at this +time but should be trivial to add since capabilities are enabled with the +``device_property_`` API. + +The Master interface along with the Master interface capabilities are +registered based on board file, DT or ACPI. + +Following is the Bus API to register the SoundWire Bus: + +.. code-block:: c + + int sdw_bus_master_add(struct sdw_bus *bus, + struct device *parent, + struct fwnode_handle) + { + sdw_master_device_add(bus, parent, fwnode); + + mutex_init(&bus->lock); + INIT_LIST_HEAD(&bus->slaves); + + /* Check ACPI for Slave devices */ + sdw_acpi_find_slaves(bus); + + /* Check DT for Slave devices */ + sdw_of_find_slaves(bus); + + return 0; + } + +This will initialize sdw_bus object for Master device. "sdw_master_ops" and +"sdw_master_port_ops" callback functions are provided to the Bus. + +"sdw_master_ops" is used by Bus to control the Bus in the hardware specific +way. It includes Bus control functions such as sending the SoundWire +read/write messages on Bus, setting up clock frequency & Stream +Synchronization Point (SSP). The "sdw_master_ops" structure abstracts the +hardware details of the Master from the Bus. + +"sdw_master_port_ops" is used by Bus to setup the Port parameters of the +Master interface Port. Master interface Port register map is not defined by +MIPI specification, so Bus calls the "sdw_master_port_ops" callback +function to do Port operations like "Port Prepare", "Port Transport params +set", "Port enable and disable". The implementation of the Master driver can +then perform hardware-specific configurations. + +Programming interfaces (SoundWire Slave Driver) +=============================================== + +The MIPI specification requires each Slave interface to expose a unique +48-bit identifier, stored in 6 read-only dev_id registers. This dev_id +identifier contains vendor and part information, as well as a field enabling +to differentiate between identical components. An additional class field is +currently unused. Slave driver is written for a specific vendor and part +identifier, Bus enumerates the Slave device based on these two ids. +Slave device and driver match is done based on these two ids . Probe +of the Slave driver is called by Bus on successful match between device and +driver id. A parent/child relationship is enforced between Master and Slave +devices (the logical representation is aligned with the physical +connectivity). + +The information on Master/Slave dependencies is stored in platform data, +board-file, ACPI or DT. The MIPI Software specification defines additional +link_id parameters for controllers that have multiple Master interfaces. The +dev_id registers are only unique in the scope of a link, and the link_id +unique in the scope of a controller. Both dev_id and link_id are not +necessarily unique at the system level but the parent/child information is +used to avoid ambiguity. + +.. code-block:: c + + static const struct sdw_device_id slave_id[] = { + SDW_SLAVE_ENTRY(0x025d, 0x700, 0), + {}, + }; + MODULE_DEVICE_TABLE(sdw, slave_id); + + static struct sdw_driver slave_sdw_driver = { + .driver = { + .name = "slave_xxx", + .pm = &slave_runtime_pm, + }, + .probe = slave_sdw_probe, + .remove = slave_sdw_remove, + .ops = &slave_slave_ops, + .id_table = slave_id, + }; + + +For capabilities, Bus implements API to read standard Slave MIPI properties +and also provides callback in Slave ops for Slave driver to implement own +function that provides capabilities information. Bus needs to know a set of +Slave capabilities to program Slave registers and to control the Bus +reconfigurations. + +Future enhancements to be done +============================== + + (1) Bulk Register Access (BRA) transfers. + + + (2) Multiple data lane support. + +Links +===== + +SoundWire MIPI specification 1.1 is available at: +https://members.mipi.org/wg/All-Members/document/70290 + +SoundWire MIPI DisCo (Discovery and Configuration) specification is +available at: +https://www.mipi.org/specifications/mipi-disco-soundwire + +(publicly accessible with registration or directly accessible to MIPI +members) + +MIPI Alliance Manufacturer ID Page: mid.mipi.org diff --git a/Documentation/driver-api/spi.rst b/Documentation/driver-api/spi.rst new file mode 100644 index 0000000000..f288870450 --- /dev/null +++ b/Documentation/driver-api/spi.rst @@ -0,0 +1,53 @@ +Serial Peripheral Interface (SPI) +================================= + +SPI is the "Serial Peripheral Interface", widely used with embedded +systems because it is a simple and efficient interface: basically a +multiplexed shift register. Its three signal wires hold a clock (SCK, +often in the range of 1-20 MHz), a "Master Out, Slave In" (MOSI) data +line, and a "Master In, Slave Out" (MISO) data line. SPI is a full +duplex protocol; for each bit shifted out the MOSI line (one per clock) +another is shifted in on the MISO line. Those bits are assembled into +words of various sizes on the way to and from system memory. An +additional chipselect line is usually active-low (nCS); four signals are +normally used for each peripheral, plus sometimes an interrupt. + +The SPI bus facilities listed here provide a generalized interface to +declare SPI busses and devices, manage them according to the standard +Linux driver model, and perform input/output operations. At this time, +only "master" side interfaces are supported, where Linux talks to SPI +peripherals and does not implement such a peripheral itself. (Interfaces +to support implementing SPI slaves would necessarily look different.) + +The programming interface is structured around two kinds of driver, and +two kinds of device. A "Controller Driver" abstracts the controller +hardware, which may be as simple as a set of GPIO pins or as complex as +a pair of FIFOs connected to dual DMA engines on the other side of the +SPI shift register (maximizing throughput). Such drivers bridge between +whatever bus they sit on (often the platform bus) and SPI, and expose +the SPI side of their device as a :c:type:`struct spi_controller +<spi_controller>`. SPI devices are children of that master, +represented as a :c:type:`struct spi_device <spi_device>` and +manufactured from :c:type:`struct spi_board_info +<spi_board_info>` descriptors which are usually provided by +board-specific initialization code. A :c:type:`struct spi_driver +<spi_driver>` is called a "Protocol Driver", and is bound to a +spi_device using normal driver model calls. + +The I/O model is a set of queued messages. Protocol drivers submit one +or more :c:type:`struct spi_message <spi_message>` objects, +which are processed and completed asynchronously. (There are synchronous +wrappers, however.) Messages are built from one or more +:c:type:`struct spi_transfer <spi_transfer>` objects, each of +which wraps a full duplex SPI transfer. A variety of protocol tweaking +options are needed, because different chips adopt very different +policies for how they use the bits transferred with SPI. + +.. kernel-doc:: include/linux/spi/spi.h + :internal: + +.. kernel-doc:: drivers/spi/spi.c + :functions: spi_register_board_info + +.. kernel-doc:: drivers/spi/spi.c + :export: diff --git a/Documentation/driver-api/surface_aggregator/client-api.rst b/Documentation/driver-api/surface_aggregator/client-api.rst new file mode 100644 index 0000000000..8e0b000d0e --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/client-api.rst @@ -0,0 +1,38 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +=============================== +Client Driver API Documentation +=============================== + +.. contents:: + :depth: 2 + + +Serial Hub Communication +======================== + +.. kernel-doc:: include/linux/surface_aggregator/serial_hub.h + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_packet_layer.c + :export: + + +Controller and Core Interface +============================= + +.. kernel-doc:: include/linux/surface_aggregator/controller.h + +.. kernel-doc:: drivers/platform/surface/aggregator/controller.c + :export: + +.. kernel-doc:: drivers/platform/surface/aggregator/core.c + :export: + + +Client Bus and Client Device API +================================ + +.. kernel-doc:: include/linux/surface_aggregator/device.h + +.. kernel-doc:: drivers/platform/surface/aggregator/bus.c + :export: diff --git a/Documentation/driver-api/surface_aggregator/client.rst b/Documentation/driver-api/surface_aggregator/client.rst new file mode 100644 index 0000000000..e100ab0a24 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/client.rst @@ -0,0 +1,397 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. |ssam_controller| replace:: :c:type:`struct ssam_controller <ssam_controller>` +.. |ssam_device| replace:: :c:type:`struct ssam_device <ssam_device>` +.. |ssam_device_driver| replace:: :c:type:`struct ssam_device_driver <ssam_device_driver>` +.. |ssam_client_bind| replace:: :c:func:`ssam_client_bind` +.. |ssam_client_link| replace:: :c:func:`ssam_client_link` +.. |ssam_get_controller| replace:: :c:func:`ssam_get_controller` +.. |ssam_controller_get| replace:: :c:func:`ssam_controller_get` +.. |ssam_controller_put| replace:: :c:func:`ssam_controller_put` +.. |ssam_device_alloc| replace:: :c:func:`ssam_device_alloc` +.. |ssam_device_add| replace:: :c:func:`ssam_device_add` +.. |ssam_device_remove| replace:: :c:func:`ssam_device_remove` +.. |ssam_device_driver_register| replace:: :c:func:`ssam_device_driver_register` +.. |ssam_device_driver_unregister| replace:: :c:func:`ssam_device_driver_unregister` +.. |module_ssam_device_driver| replace:: :c:func:`module_ssam_device_driver` +.. |SSAM_DEVICE| replace:: :c:func:`SSAM_DEVICE` +.. |ssam_notifier_register| replace:: :c:func:`ssam_notifier_register` +.. |ssam_notifier_unregister| replace:: :c:func:`ssam_notifier_unregister` +.. |ssam_device_notifier_register| replace:: :c:func:`ssam_device_notifier_register` +.. |ssam_device_notifier_unregister| replace:: :c:func:`ssam_device_notifier_unregister` +.. |ssam_request_do_sync| replace:: :c:func:`ssam_request_do_sync` +.. |ssam_event_mask| replace:: :c:type:`enum ssam_event_mask <ssam_event_mask>` + + +====================== +Writing Client Drivers +====================== + +For the API documentation, refer to: + +.. toctree:: + :maxdepth: 2 + + client-api + + +Overview +======== + +Client drivers can be set up in two main ways, depending on how the +corresponding device is made available to the system. We specifically +differentiate between devices that are presented to the system via one of +the conventional ways, e.g. as platform devices via ACPI, and devices that +are non-discoverable and instead need to be explicitly provided by some +other mechanism, as discussed further below. + + +Non-SSAM Client Drivers +======================= + +All communication with the SAM EC is handled via the |ssam_controller| +representing that EC to the kernel. Drivers targeting a non-SSAM device (and +thus not being a |ssam_device_driver|) need to explicitly establish a +connection/relation to that controller. This can be done via the +|ssam_client_bind| function. Said function returns a reference to the SSAM +controller, but, more importantly, also establishes a device link between +client device and controller (this can also be done separate via +|ssam_client_link|). It is important to do this, as it, first, guarantees +that the returned controller is valid for use in the client driver for as +long as this driver is bound to its device, i.e. that the driver gets +unbound before the controller ever becomes invalid, and, second, as it +ensures correct suspend/resume ordering. This setup should be done in the +driver's probe function, and may be used to defer probing in case the SSAM +subsystem is not ready yet, for example: + +.. code-block:: c + + static int client_driver_probe(struct platform_device *pdev) + { + struct ssam_controller *ctrl; + + ctrl = ssam_client_bind(&pdev->dev); + if (IS_ERR(ctrl)) + return PTR_ERR(ctrl) == -ENODEV ? -EPROBE_DEFER : PTR_ERR(ctrl); + + // ... + + return 0; + } + +The controller may be separately obtained via |ssam_get_controller| and its +lifetime be guaranteed via |ssam_controller_get| and |ssam_controller_put|. +Note that none of these functions, however, guarantee that the controller +will not be shut down or suspended. These functions essentially only operate +on the reference, i.e. only guarantee a bare minimum of accessibility +without any guarantees at all on practical operability. + + +Adding SSAM Devices +=================== + +If a device does not already exist/is not already provided via conventional +means, it should be provided as |ssam_device| via the SSAM client device +hub. New devices can be added to this hub by entering their UID into the +corresponding registry. SSAM devices can also be manually allocated via +|ssam_device_alloc|, subsequently to which they have to be added via +|ssam_device_add| and eventually removed via |ssam_device_remove|. By +default, the parent of the device is set to the controller device provided +for allocation, however this may be changed before the device is added. Note +that, when changing the parent device, care must be taken to ensure that the +controller lifetime and suspend/resume ordering guarantees, in the default +setup provided through the parent-child relation, are preserved. If +necessary, by use of |ssam_client_link| as is done for non-SSAM client +drivers and described in more detail above. + +A client device must always be removed by the party which added the +respective device before the controller shuts down. Such removal can be +guaranteed by linking the driver providing the SSAM device to the controller +via |ssam_client_link|, causing it to unbind before the controller driver +unbinds. Client devices registered with the controller as parent are +automatically removed when the controller shuts down, but this should not be +relied upon, especially as this does not extend to client devices with a +different parent. + + +SSAM Client Drivers +=================== + +SSAM client device drivers are, in essence, no different than other device +driver types. They are represented via |ssam_device_driver| and bind to a +|ssam_device| via its UID (:c:type:`struct ssam_device.uid <ssam_device>`) +member and the match table +(:c:type:`struct ssam_device_driver.match_table <ssam_device_driver>`), +which should be set when declaring the driver struct instance. Refer to the +|SSAM_DEVICE| macro documentation for more details on how to define members +of the driver's match table. + +The UID for SSAM client devices consists of a ``domain``, a ``category``, +a ``target``, an ``instance``, and a ``function``. The ``domain`` is used +differentiate between physical SAM devices +(:c:type:`SSAM_DOMAIN_SERIALHUB <ssam_device_domain>`), i.e. devices that can +be accessed via the Surface Serial Hub, and virtual ones +(:c:type:`SSAM_DOMAIN_VIRTUAL <ssam_device_domain>`), such as client-device +hubs, that have no real representation on the SAM EC and are solely used on +the kernel/driver-side. For physical devices, ``category`` represents the +target category, ``target`` the target ID, and ``instance`` the instance ID +used to access the physical SAM device. In addition, ``function`` references +a specific device functionality, but has no meaning to the SAM EC. The +(default) name of a client device is generated based on its UID. + +A driver instance can be registered via |ssam_device_driver_register| and +unregistered via |ssam_device_driver_unregister|. For convenience, the +|module_ssam_device_driver| macro may be used to define module init- and +exit-functions registering the driver. + +The controller associated with a SSAM client device can be found in its +:c:type:`struct ssam_device.ctrl <ssam_device>` member. This reference is +guaranteed to be valid for at least as long as the client driver is bound, +but should also be valid for as long as the client device exists. Note, +however, that access outside of the bound client driver must ensure that the +controller device is not suspended while making any requests or +(un-)registering event notifiers (and thus should generally be avoided). This +is guaranteed when the controller is accessed from inside the bound client +driver. + + +Making Synchronous Requests +=========================== + +Synchronous requests are (currently) the main form of host-initiated +communication with the EC. There are a couple of ways to define and execute +such requests, however, most of them boil down to something similar as shown +in the example below. This example defines a write-read request, meaning +that the caller provides an argument to the SAM EC and receives a response. +The caller needs to know the (maximum) length of the response payload and +provide a buffer for it. + +Care must be taken to ensure that any command payload data passed to the SAM +EC is provided in little-endian format and, similarly, any response payload +data received from it is converted from little-endian to host endianness. + +.. code-block:: c + + int perform_request(struct ssam_controller *ctrl, u32 arg, u32 *ret) + { + struct ssam_request rqst; + struct ssam_response resp; + int status; + + /* Convert request argument to little-endian. */ + __le32 arg_le = cpu_to_le32(arg); + __le32 ret_le = cpu_to_le32(0); + + /* + * Initialize request specification. Replace this with your values. + * The rqst.payload field may be NULL if rqst.length is zero, + * indicating that the request does not have any argument. + * + * Note: The request parameters used here are not valid, i.e. + * they do not correspond to an actual SAM/EC request. + */ + rqst.target_category = SSAM_SSH_TC_SAM; + rqst.target_id = SSAM_SSH_TID_SAM; + rqst.command_id = 0x02; + rqst.instance_id = 0x03; + rqst.flags = SSAM_REQUEST_HAS_RESPONSE; + rqst.length = sizeof(arg_le); + rqst.payload = (u8 *)&arg_le; + + /* Initialize request response. */ + resp.capacity = sizeof(ret_le); + resp.length = 0; + resp.pointer = (u8 *)&ret_le; + + /* + * Perform actual request. The response pointer may be null in case + * the request does not have any response. This must be consistent + * with the SSAM_REQUEST_HAS_RESPONSE flag set in the specification + * above. + */ + status = ssam_request_do_sync(ctrl, &rqst, &resp); + + /* + * Alternatively use + * + * ssam_request_do_sync_onstack(ctrl, &rqst, &resp, sizeof(arg_le)); + * + * to perform the request, allocating the message buffer directly + * on the stack as opposed to allocation via kzalloc(). + */ + + /* + * Convert request response back to native format. Note that in the + * error case, this value is not touched by the SSAM core, i.e. + * 'ret_le' will be zero as specified in its initialization. + */ + *ret = le32_to_cpu(ret_le); + + return status; + } + +Note that |ssam_request_do_sync| in its essence is a wrapper over lower-level +request primitives, which may also be used to perform requests. Refer to its +implementation and documentation for more details. + +An arguably more user-friendly way of defining such functions is by using +one of the generator macros, for example via: + +.. code-block:: c + + SSAM_DEFINE_SYNC_REQUEST_W(__ssam_tmp_perf_mode_set, __le32, { + .target_category = SSAM_SSH_TC_TMP, + .target_id = SSAM_SSH_TID_SAM, + .command_id = 0x03, + .instance_id = 0x00, + }); + +This example defines a function + +.. code-block:: c + + static int __ssam_tmp_perf_mode_set(struct ssam_controller *ctrl, const __le32 *arg); + +executing the specified request, with the controller passed in when calling +said function. In this example, the argument is provided via the ``arg`` +pointer. Note that the generated function allocates the message buffer on +the stack. Thus, if the argument provided via the request is large, these +kinds of macros should be avoided. Also note that, in contrast to the +previous non-macro example, this function does not do any endianness +conversion, which has to be handled by the caller. Apart from those +differences the function generated by the macro is similar to the one +provided in the non-macro example above. + +The full list of such function-generating macros is + +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_N` for requests without return value and + without argument. +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_R` for requests with return value but no + argument. +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_W` for requests without return value but + with argument. + +Refer to their respective documentation for more details. For each one of +these macros, a special variant is provided, which targets request types +applicable to multiple instances of the same device type: + +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_MD_N` +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_MD_R` +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_MD_W` + +The difference of those macros to the previously mentioned versions is, that +the device target and instance IDs are not fixed for the generated function, +but instead have to be provided by the caller of said function. + +Additionally, variants for direct use with client devices, i.e. +|ssam_device|, are also provided. These can, for example, be used as +follows: + +.. code-block:: c + + SSAM_DEFINE_SYNC_REQUEST_CL_R(ssam_bat_get_sta, __le32, { + .target_category = SSAM_SSH_TC_BAT, + .command_id = 0x01, + }); + +This invocation of the macro defines a function + +.. code-block:: c + + static int ssam_bat_get_sta(struct ssam_device *sdev, __le32 *ret); + +executing the specified request, using the device IDs and controller given +in the client device. The full list of such macros for client devices is: + +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_CL_N` +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_CL_R` +- :c:func:`SSAM_DEFINE_SYNC_REQUEST_CL_W` + + +Handling Events +=============== + +To receive events from the SAM EC, an event notifier must be registered for +the desired event via |ssam_notifier_register|. The notifier must be +unregistered via |ssam_notifier_unregister| once it is not required any +more. For |ssam_device| type clients, the |ssam_device_notifier_register| and +|ssam_device_notifier_unregister| wrappers should be preferred as they properly +handle hot-removal of client devices. + +Event notifiers are registered by providing (at minimum) a callback to call +in case an event has been received, the registry specifying how the event +should be enabled, an event ID specifying for which target category and, +optionally and depending on the registry used, for which instance ID events +should be enabled, and finally, flags describing how the EC will send these +events. If the specific registry does not enable events by instance ID, the +instance ID must be set to zero. Additionally, a priority for the respective +notifier may be specified, which determines its order in relation to any +other notifier registered for the same target category. + +By default, event notifiers will receive all events for the specific target +category, regardless of the instance ID specified when registering the +notifier. The core may be instructed to only call a notifier if the target +ID or instance ID (or both) of the event match the ones implied by the +notifier IDs (in case of target ID, the target ID of the registry), by +providing an event mask (see |ssam_event_mask|). + +In general, the target ID of the registry is also the target ID of the +enabled event (with the notable exception being keyboard input events on the +Surface Laptop 1 and 2, which are enabled via a registry with target ID 1, +but provide events with target ID 2). + +A full example for registering an event notifier and handling received +events is provided below: + +.. code-block:: c + + u32 notifier_callback(struct ssam_event_notifier *nf, + const struct ssam_event *event) + { + int status = ... + + /* Handle the event here ... */ + + /* Convert return value and indicate that we handled the event. */ + return ssam_notifier_from_errno(status) | SSAM_NOTIF_HANDLED; + } + + int setup_notifier(struct ssam_device *sdev, + struct ssam_event_notifier *nf) + { + /* Set priority wrt. other handlers of same target category. */ + nf->base.priority = 1; + + /* Set event/notifier callback. */ + nf->base.fn = notifier_callback; + + /* Specify event registry, i.e. how events get enabled/disabled. */ + nf->event.reg = SSAM_EVENT_REGISTRY_KIP; + + /* Specify which event to enable/disable */ + nf->event.id.target_category = sdev->uid.category; + nf->event.id.instance = sdev->uid.instance; + + /* + * Specify for which events the notifier callback gets executed. + * This essentially tells the core if it can skip notifiers that + * don't have target or instance IDs matching those of the event. + */ + nf->event.mask = SSAM_EVENT_MASK_STRICT; + + /* Specify event flags. */ + nf->event.flags = SSAM_EVENT_SEQUENCED; + + return ssam_notifier_register(sdev->ctrl, nf); + } + +Multiple event notifiers can be registered for the same event. The event +handler core takes care of enabling and disabling events when notifiers are +registered and unregistered, by keeping track of how many notifiers for a +specific event (combination of registry, event target category, and event +instance ID) are currently registered. This means that a specific event will +be enabled when the first notifier for it is being registered and disabled +when the last notifier for it is being unregistered. Note that the event +flags are therefore only used on the first registered notifier, however, one +should take care that notifiers for a specific event are always registered +with the same flag and it is considered a bug to do otherwise. diff --git a/Documentation/driver-api/surface_aggregator/clients/cdev.rst b/Documentation/driver-api/surface_aggregator/clients/cdev.rst new file mode 100644 index 0000000000..0134a841a0 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/clients/cdev.rst @@ -0,0 +1,204 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. |ssam_cdev_request| replace:: :c:type:`struct ssam_cdev_request <ssam_cdev_request>` +.. |ssam_cdev_request_flags| replace:: :c:type:`enum ssam_cdev_request_flags <ssam_cdev_request_flags>` +.. |ssam_cdev_event| replace:: :c:type:`struct ssam_cdev_event <ssam_cdev_event>` + +============================== +User-Space EC Interface (cdev) +============================== + +The ``surface_aggregator_cdev`` module provides a misc-device for the SSAM +controller to allow for a (more or less) direct connection from user-space to +the SAM EC. It is intended to be used for development and debugging, and +therefore should not be used or relied upon in any other way. Note that this +module is not loaded automatically, but instead must be loaded manually. + +The provided interface is accessible through the ``/dev/surface/aggregator`` +device-file. All functionality of this interface is provided via IOCTLs. +These IOCTLs and their respective input/output parameter structs are defined in +``include/uapi/linux/surface_aggregator/cdev.h``. + +A small python library and scripts for accessing this interface can be found +at https://github.com/linux-surface/surface-aggregator-module/tree/master/scripts/ssam. + +.. contents:: + + +Receiving Events +================ + +Events can be received by reading from the device-file. The are represented by +the |ssam_cdev_event| datatype. + +Before events are available to be read, however, the desired notifiers must be +registered via the ``SSAM_CDEV_NOTIF_REGISTER`` IOCTL. Notifiers are, in +essence, callbacks, called when the EC sends an event. They are, in this +interface, associated with a specific target category and device-file-instance. +They forward any event of this category to the buffer of the corresponding +instance, from which it can then be read. + +Notifiers themselves do not enable events on the EC. Thus, it may additionally +be necessary to enable events via the ``SSAM_CDEV_EVENT_ENABLE`` IOCTL. While +notifiers work per-client (i.e. per-device-file-instance), events are enabled +globally, for the EC and all of its clients (regardless of userspace or +non-userspace). The ``SSAM_CDEV_EVENT_ENABLE`` and ``SSAM_CDEV_EVENT_DISABLE`` +IOCTLs take care of reference counting the events, such that an event is +enabled as long as there is a client that has requested it. + +Note that enabled events are not automatically disabled once the client +instance is closed. Therefore any client process (or group of processes) should +balance their event enable calls with the corresponding event disable calls. It +is, however, perfectly valid to enable and disable events on different client +instances. For example, it is valid to set up notifiers and read events on +client instance ``A``, enable those events on instance ``B`` (note that these +will also be received by A since events are enabled/disabled globally), and +after no more events are desired, disable the previously enabled events via +instance ``C``. + + +Controller IOCTLs +================= + +The following IOCTLs are provided: + +.. flat-table:: Controller IOCTLs + :widths: 1 1 1 1 4 + :header-rows: 1 + + * - Type + - Number + - Direction + - Name + - Description + + * - ``0xA5`` + - ``1`` + - ``WR`` + - ``REQUEST`` + - Perform synchronous SAM request. + + * - ``0xA5`` + - ``2`` + - ``W`` + - ``NOTIF_REGISTER`` + - Register event notifier. + + * - ``0xA5`` + - ``3`` + - ``W`` + - ``NOTIF_UNREGISTER`` + - Unregister event notifier. + + * - ``0xA5`` + - ``4`` + - ``W`` + - ``EVENT_ENABLE`` + - Enable event source. + + * - ``0xA5`` + - ``5`` + - ``W`` + - ``EVENT_DISABLE`` + - Disable event source. + + +``SSAM_CDEV_REQUEST`` +--------------------- + +Defined as ``_IOWR(0xA5, 1, struct ssam_cdev_request)``. + +Executes a synchronous SAM request. The request specification is passed in +as argument of type |ssam_cdev_request|, which is then written to/modified +by the IOCTL to return status and result of the request. + +Request payload data must be allocated separately and is passed in via the +``payload.data`` and ``payload.length`` members. If a response is required, +the response buffer must be allocated by the caller and passed in via the +``response.data`` member. The ``response.length`` member must be set to the +capacity of this buffer, or if no response is required, zero. Upon +completion of the request, the call will write the response to the response +buffer (if its capacity allows it) and overwrite the length field with the +actual size of the response, in bytes. + +Additionally, if the request has a response, this must be indicated via the +request flags, as is done with in-kernel requests. Request flags can be set +via the ``flags`` member and the values correspond to the values found in +|ssam_cdev_request_flags|. + +Finally, the status of the request itself is returned in the ``status`` +member (a negative errno value indicating failure). Note that failure +indication of the IOCTL is separated from failure indication of the request: +The IOCTL returns a negative status code if anything failed during setup of +the request (``-EFAULT``) or if the provided argument or any of its fields +are invalid (``-EINVAL``). In this case, the status value of the request +argument may be set, providing more detail on what went wrong (e.g. +``-ENOMEM`` for out-of-memory), but this value may also be zero. The IOCTL +will return with a zero status code in case the request has been set up, +submitted, and completed (i.e. handed back to user-space) successfully from +inside the IOCTL, but the request ``status`` member may still be negative in +case the actual execution of the request failed after it has been submitted. + +A full definition of the argument struct is provided below. + +``SSAM_CDEV_NOTIF_REGISTER`` +---------------------------- + +Defined as ``_IOW(0xA5, 2, struct ssam_cdev_notifier_desc)``. + +Register a notifier for the event target category specified in the given +notifier description with the specified priority. Notifiers registration is +required to receive events, but does not enable events themselves. After a +notifier for a specific target category has been registered, all events of that +category will be forwarded to the userspace client and can then be read from +the device file instance. Note that events may have to be enabled, e.g. via the +``SSAM_CDEV_EVENT_ENABLE`` IOCTL, before the EC will send them. + +Only one notifier can be registered per target category and client instance. If +a notifier has already been registered, this IOCTL will fail with ``-EEXIST``. + +Notifiers will automatically be removed when the device file instance is +closed. + +``SSAM_CDEV_NOTIF_UNREGISTER`` +------------------------------ + +Defined as ``_IOW(0xA5, 3, struct ssam_cdev_notifier_desc)``. + +Unregisters the notifier associated with the specified target category. The +priority field will be ignored by this IOCTL. If no notifier has been +registered for this client instance and the given category, this IOCTL will +fail with ``-ENOENT``. + +``SSAM_CDEV_EVENT_ENABLE`` +-------------------------- + +Defined as ``_IOW(0xA5, 4, struct ssam_cdev_event_desc)``. + +Enable the event associated with the given event descriptor. + +Note that this call will not register a notifier itself, it will only enable +events on the controller. If you want to receive events by reading from the +device file, you will need to register the corresponding notifier(s) on that +instance. + +Events are not automatically disabled when the device file is closed. This must +be done manually, via a call to the ``SSAM_CDEV_EVENT_DISABLE`` IOCTL. + +``SSAM_CDEV_EVENT_DISABLE`` +--------------------------- + +Defined as ``_IOW(0xA5, 5, struct ssam_cdev_event_desc)``. + +Disable the event associated with the given event descriptor. + +Note that this will not unregister any notifiers. Events may still be received +and forwarded to user-space after this call. The only safe way of stopping +events from being received is unregistering all previously registered +notifiers. + + +Structures and Enums +==================== + +.. kernel-doc:: include/uapi/linux/surface_aggregator/cdev.h diff --git a/Documentation/driver-api/surface_aggregator/clients/dtx.rst b/Documentation/driver-api/surface_aggregator/clients/dtx.rst new file mode 100644 index 0000000000..e7e7c20007 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/clients/dtx.rst @@ -0,0 +1,718 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. |__u16| replace:: :c:type:`__u16 <__u16>` +.. |sdtx_event| replace:: :c:type:`struct sdtx_event <sdtx_event>` +.. |sdtx_event_code| replace:: :c:type:`enum sdtx_event_code <sdtx_event_code>` +.. |sdtx_base_info| replace:: :c:type:`struct sdtx_base_info <sdtx_base_info>` +.. |sdtx_device_mode| replace:: :c:type:`struct sdtx_device_mode <sdtx_device_mode>` + +====================================================== +User-Space DTX (Clipboard Detachment System) Interface +====================================================== + +The ``surface_dtx`` driver is responsible for proper clipboard detachment +and re-attachment handling. To this end, it provides the ``/dev/surface/dtx`` +device file, through which it can interface with a user-space daemon. This +daemon is then ultimately responsible for determining and taking necessary +actions, such as unmounting devices attached to the base, +unloading/reloading the graphics-driver, user-notifications, etc. + +There are two basic communication principles used in this driver: Commands +(in other parts of the documentation also referred to as requests) and +events. Commands are sent to the EC and may have a different implications in +different contexts. Events are sent by the EC upon some internal state +change. Commands are always driver-initiated, whereas events are always +initiated by the EC. + +.. contents:: + +Nomenclature +============ + +* **Clipboard:** + The detachable upper part of the Surface Book, housing the screen and CPU. + +* **Base:** + The lower part of the Surface Book from which the clipboard can be + detached, optionally (model dependent) housing the discrete GPU (dGPU). + +* **Latch:** + The mechanism keeping the clipboard attached to the base in normal + operation and allowing it to be detached when requested. + +* **Silently ignored commands:** + The command is accepted by the EC as a valid command and acknowledged + (following the standard communication protocol), but the EC does not act + upon it, i.e. ignores it.e upper part of the + + +Detachment Process +================== + +Warning: This part of the documentation is based on reverse engineering and +testing and thus may contain errors or be incomplete. + +Latch States +------------ + +The latch mechanism has two major states: *open* and *closed*. In the +*closed* state (default), the clipboard is secured to the base, whereas in +the *open* state, the clipboard can be removed by a user. + +The latch can additionally be locked and, correspondingly, unlocked, which +can influence the detachment procedure. Specifically, this locking mechanism +is intended to prevent the dGPU, positioned in the base of the device, from +being hot-unplugged while in use. More details can be found in the +documentation for the detachment procedure below. By default, the latch is +unlocked. + +Detachment Procedure +-------------------- + +Note that the detachment process is governed fully by the EC. The +``surface_dtx`` driver only relays events from the EC to user-space and +commands from user-space to the EC, i.e. it does not influence this process. + +The detachment process is started with the user pressing the *detach* button +on the base of the device or executing the ``SDTX_IOCTL_LATCH_REQUEST`` IOCTL. +Following that: + +1. The EC turns on the indicator led on the detach-button, sends a + *detach-request* event (``SDTX_EVENT_REQUEST``), and awaits further + instructions/commands. In case the latch is unlocked, the led will flash + green. If the latch has been locked, the led will be solid red + +2. The event is, via the ``surface_dtx`` driver, relayed to user-space, where + an appropriate user-space daemon can handle it and send instructions back + to the EC via IOCTLs provided by this driver. + +3. The EC waits for instructions from user-space and acts according to them. + If the EC does not receive any instructions in a given period, it will + time out and continue as follows: + + - If the latch is unlocked, the EC will open the latch and the clipboard + can be detached from the base. This is the exact behavior as without + this driver or any user-space daemon. See the ``SDTX_IOCTL_LATCH_CONFIRM`` + description below for more details on the follow-up behavior of the EC. + + - If the latch is locked, the EC will *not* open the latch, meaning the + clipboard cannot be detached from the base. Furthermore, the EC sends + an cancel event (``SDTX_EVENT_CANCEL``) detailing this with the cancel + reason ``SDTX_DETACH_TIMEDOUT`` (see :ref:`events` for details). + +Valid responses by a user-space daemon to a detachment request event are: + +- Execute ``SDTX_IOCTL_LATCH_REQUEST``. This will immediately abort the + detachment process. Furthermore, the EC will send a detach-request event, + similar to the user pressing the detach-button to cancel said process (see + below). + +- Execute ``SDTX_IOCTL_LATCH_CONFIRM``. This will cause the EC to open the + latch, after which the user can separate clipboard and base. + + As this changes the latch state, a *latch-status* event + (``SDTX_EVENT_LATCH_STATUS``) will be sent once the latch has been opened + successfully. If the EC fails to open the latch, e.g. due to hardware + error or low battery, a latch-cancel event (``SDTX_EVENT_CANCEL``) will be + sent with the cancel reason indicating the specific failure. + + If the latch is currently locked, the latch will automatically be + unlocked before it is opened. + +- Execute ``SDTX_IOCTL_LATCH_HEARTBEAT``. This will reset the internal timeout. + No other actions will be performed, i.e. the detachment process will neither + be completed nor canceled, and the EC will still be waiting for further + responses. + +- Execute ``SDTX_IOCTL_LATCH_CANCEL``. This will abort the detachment process, + similar to ``SDTX_IOCTL_LATCH_REQUEST``, described above, or the button + press, described below. A *generic request* event (``SDTX_EVENT_REQUEST``) + is send in response to this. In contrast to those, however, this command + does not trigger a new detachment process if none is currently in + progress. + +- Do nothing. The detachment process eventually times out as described in + point 3. + +See :ref:`ioctls` for more details on these responses. + +It is important to note that, if the user presses the detach button at any +point when a detachment operation is in progress (i.e. after the EC has sent +the initial *detach-request* event (``SDTX_EVENT_REQUEST``) and before it +received the corresponding response concluding the process), the detachment +process is canceled on the EC-level and an identical event is being sent. +Thus a *detach-request* event, by itself, does not signal the start of the +detachment process. + +The detachment process may further be canceled by the EC due to hardware +failures or a low clipboard battery. This is done via a cancel event +(``SDTX_EVENT_CANCEL``) with the corresponding cancel reason. + + +User-Space Interface Documentation +================================== + +Error Codes and Status Values +----------------------------- + +Error and status codes are divided into different categories, which can be +used to determine if the status code is an error, and, if it is, the +severity and type of that error. The current categories are: + +.. flat-table:: Overview of Status/Error Categories. + :widths: 2 1 3 + :header-rows: 1 + + * - Name + - Value + - Short Description + + * - ``STATUS`` + - ``0x0000`` + - Non-error status codes. + + * - ``RUNTIME_ERROR`` + - ``0x1000`` + - Non-critical runtime errors. + + * - ``HARDWARE_ERROR`` + - ``0x2000`` + - Critical hardware failures. + + * - ``UNKNOWN`` + - ``0xF000`` + - Unknown error codes. + +Other categories are reserved for future use. The ``SDTX_CATEGORY()`` macro +can be used to determine the category of any status value. The +``SDTX_SUCCESS()`` macro can be used to check if the status value is a +success value (``SDTX_CATEGORY_STATUS``) or if it indicates a failure. + +Unknown status or error codes sent by the EC are assigned to the ``UNKNOWN`` +category by the driver and may be implemented via their own code in the +future. + +Currently used error codes are: + +.. flat-table:: Overview of Error Codes. + :widths: 2 1 1 3 + :header-rows: 1 + + * - Name + - Category + - Value + - Short Description + + * - ``SDTX_DETACH_NOT_FEASIBLE`` + - ``RUNTIME`` + - ``0x1001`` + - Detachment not feasible due to low clipboard battery. + + * - ``SDTX_DETACH_TIMEDOUT`` + - ``RUNTIME`` + - ``0x1002`` + - Detachment process timed out while the latch was locked. + + * - ``SDTX_ERR_FAILED_TO_OPEN`` + - ``HARDWARE`` + - ``0x2001`` + - Failed to open latch. + + * - ``SDTX_ERR_FAILED_TO_REMAIN_OPEN`` + - ``HARDWARE`` + - ``0x2002`` + - Failed to keep latch open. + + * - ``SDTX_ERR_FAILED_TO_CLOSE`` + - ``HARDWARE`` + - ``0x2003`` + - Failed to close latch. + +Other error codes are reserved for future use. Non-error status codes may +overlap and are generally only unique within their use-case: + +.. flat-table:: Latch Status Codes. + :widths: 2 1 1 3 + :header-rows: 1 + + * - Name + - Category + - Value + - Short Description + + * - ``SDTX_LATCH_CLOSED`` + - ``STATUS`` + - ``0x0000`` + - Latch is closed/has been closed. + + * - ``SDTX_LATCH_OPENED`` + - ``STATUS`` + - ``0x0001`` + - Latch is open/has been opened. + +.. flat-table:: Base State Codes. + :widths: 2 1 1 3 + :header-rows: 1 + + * - Name + - Category + - Value + - Short Description + + * - ``SDTX_BASE_DETACHED`` + - ``STATUS`` + - ``0x0000`` + - Base has been detached/is not present. + + * - ``SDTX_BASE_ATTACHED`` + - ``STATUS`` + - ``0x0001`` + - Base has been attached/is present. + +Again, other codes are reserved for future use. + +.. _events: + +Events +------ + +Events can be received by reading from the device file. They are disabled by +default and have to be enabled by executing ``SDTX_IOCTL_EVENTS_ENABLE`` +first. All events follow the layout prescribed by |sdtx_event|. Specific +event types can be identified by their event code, described in +|sdtx_event_code|. Note that other event codes are reserved for future use, +thus an event parser must be able to handle any unknown/unsupported event +types gracefully, by relying on the payload length given in the event header. + +Currently provided event types are: + +.. flat-table:: Overview of DTX events. + :widths: 2 1 1 3 + :header-rows: 1 + + * - Name + - Code + - Payload + - Short Description + + * - ``SDTX_EVENT_REQUEST`` + - ``1`` + - ``0`` bytes + - Detachment process initiated/aborted. + + * - ``SDTX_EVENT_CANCEL`` + - ``2`` + - ``2`` bytes + - EC canceled detachment process. + + * - ``SDTX_EVENT_BASE_CONNECTION`` + - ``3`` + - ``4`` bytes + - Base connection state changed. + + * - ``SDTX_EVENT_LATCH_STATUS`` + - ``4`` + - ``2`` bytes + - Latch status changed. + + * - ``SDTX_EVENT_DEVICE_MODE`` + - ``5`` + - ``2`` bytes + - Device mode changed. + +Individual events in more detail: + +``SDTX_EVENT_REQUEST`` +^^^^^^^^^^^^^^^^^^^^^^ + +Sent when a detachment process is started or, if in progress, aborted by the +user, either via a detach button press or a detach request +(``SDTX_IOCTL_LATCH_REQUEST``) being sent from user-space. + +Does not have any payload. + +``SDTX_EVENT_CANCEL`` +^^^^^^^^^^^^^^^^^^^^^ + +Sent when a detachment process is canceled by the EC due to unfulfilled +preconditions (e.g. clipboard battery too low to detach) or hardware +failure. The reason for cancellation is given in the event payload detailed +below and can be one of + +* ``SDTX_DETACH_TIMEDOUT``: Detachment timed out while the latch was locked. + The latch has neither been opened nor unlocked. + +* ``SDTX_DETACH_NOT_FEASIBLE``: Detachment not feasible due to low clipboard + battery. + +* ``SDTX_ERR_FAILED_TO_OPEN``: Could not open the latch (hardware failure). + +* ``SDTX_ERR_FAILED_TO_REMAIN_OPEN``: Could not keep the latch open (hardware + failure). + +* ``SDTX_ERR_FAILED_TO_CLOSE``: Could not close the latch (hardware failure). + +Other error codes in this context are reserved for future use. + +These codes can be classified via the ``SDTX_CATEGORY()`` macro to discern +between critical hardware errors (``SDTX_CATEGORY_HARDWARE_ERROR``) or +runtime errors (``SDTX_CATEGORY_RUNTIME_ERROR``), the latter of which may +happen during normal operation if certain preconditions for detachment are +not given. + +.. flat-table:: Detachment Cancel Event Payload + :widths: 1 1 4 + :header-rows: 1 + + * - Field + - Type + - Description + + * - ``reason`` + - |__u16| + - Reason for cancellation. + +``SDTX_EVENT_BASE_CONNECTION`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sent when the base connection state has changed, i.e. when the base has been +attached, detached, or detachment has become infeasible due to low clipboard +battery. The new state and, if a base is connected, ID of the base is +provided as payload of type |sdtx_base_info| with its layout presented +below: + +.. flat-table:: Base-Connection-Change Event Payload + :widths: 1 1 4 + :header-rows: 1 + + * - Field + - Type + - Description + + * - ``state`` + - |__u16| + - Base connection state. + + * - ``base_id`` + - |__u16| + - Type of base connected (zero if none). + +Possible values for ``state`` are: + +* ``SDTX_BASE_DETACHED``, +* ``SDTX_BASE_ATTACHED``, and +* ``SDTX_DETACH_NOT_FEASIBLE``. + +Other values are reserved for future use. + +``SDTX_EVENT_LATCH_STATUS`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sent when the latch status has changed, i.e. when the latch has been opened, +closed, or an error occurred. The current status is provided as payload: + +.. flat-table:: Latch-Status-Change Event Payload + :widths: 1 1 4 + :header-rows: 1 + + * - Field + - Type + - Description + + * - ``status`` + - |__u16| + - Latch status. + +Possible values for ``status`` are: + +* ``SDTX_LATCH_CLOSED``, +* ``SDTX_LATCH_OPENED``, +* ``SDTX_ERR_FAILED_TO_OPEN``, +* ``SDTX_ERR_FAILED_TO_REMAIN_OPEN``, and +* ``SDTX_ERR_FAILED_TO_CLOSE``. + +Other values are reserved for future use. + +``SDTX_EVENT_DEVICE_MODE`` +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sent when the device mode has changed. The new device mode is provided as +payload: + +.. flat-table:: Device-Mode-Change Event Payload + :widths: 1 1 4 + :header-rows: 1 + + * - Field + - Type + - Description + + * - ``mode`` + - |__u16| + - Device operation mode. + +Possible values for ``mode`` are: + +* ``SDTX_DEVICE_MODE_TABLET``, +* ``SDTX_DEVICE_MODE_LAPTOP``, and +* ``SDTX_DEVICE_MODE_STUDIO``. + +Other values are reserved for future use. + +.. _ioctls: + +IOCTLs +------ + +The following IOCTLs are provided: + +.. flat-table:: Overview of DTX IOCTLs + :widths: 1 1 1 1 4 + :header-rows: 1 + + * - Type + - Number + - Direction + - Name + - Description + + * - ``0xA5`` + - ``0x21`` + - ``-`` + - ``EVENTS_ENABLE`` + - Enable events for the current file descriptor. + + * - ``0xA5`` + - ``0x22`` + - ``-`` + - ``EVENTS_DISABLE`` + - Disable events for the current file descriptor. + + * - ``0xA5`` + - ``0x23`` + - ``-`` + - ``LATCH_LOCK`` + - Lock the latch. + + * - ``0xA5`` + - ``0x24`` + - ``-`` + - ``LATCH_UNLOCK`` + - Unlock the latch. + + * - ``0xA5`` + - ``0x25`` + - ``-`` + - ``LATCH_REQUEST`` + - Request clipboard detachment. + + * - ``0xA5`` + - ``0x26`` + - ``-`` + - ``LATCH_CONFIRM`` + - Confirm clipboard detachment request. + + * - ``0xA5`` + - ``0x27`` + - ``-`` + - ``LATCH_HEARTBEAT`` + - Send heartbeat signal to EC. + + * - ``0xA5`` + - ``0x28`` + - ``-`` + - ``LATCH_CANCEL`` + - Cancel detachment process. + + * - ``0xA5`` + - ``0x29`` + - ``R`` + - ``GET_BASE_INFO`` + - Get current base/connection information. + + * - ``0xA5`` + - ``0x2A`` + - ``R`` + - ``GET_DEVICE_MODE`` + - Get current device operation mode. + + * - ``0xA5`` + - ``0x2B`` + - ``R`` + - ``GET_LATCH_STATUS`` + - Get current device latch status. + +``SDTX_IOCTL_EVENTS_ENABLE`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x22)``. + +Enable events for the current file descriptor. Events can be obtained by +reading from the device, if enabled. Events are disabled by default. + +``SDTX_IOCTL_EVENTS_DISABLE`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x22)``. + +Disable events for the current file descriptor. Events can be obtained by +reading from the device, if enabled. Events are disabled by default. + +``SDTX_IOCTL_LATCH_LOCK`` +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x23)``. + +Locks the latch, causing the detachment procedure to abort without opening +the latch on timeout. The latch is unlocked by default. This command will be +silently ignored if the latch is already locked. + +``SDTX_IOCTL_LATCH_UNLOCK`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x24)``. + +Unlocks the latch, causing the detachment procedure to open the latch on +timeout. The latch is unlocked by default. This command will not open the +latch when sent during an ongoing detachment process. It will be silently +ignored if the latch is already unlocked. + +``SDTX_IOCTL_LATCH_REQUEST`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x25)``. + +Generic latch request. Behavior depends on the context: If no +detachment-process is active, detachment is requested. Otherwise the +currently active detachment-process will be aborted. + +If a detachment process is canceled by this operation, a generic detachment +request event (``SDTX_EVENT_REQUEST``) will be sent. + +This essentially behaves the same as a detachment button press. + +``SDTX_IOCTL_LATCH_CONFIRM`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x26)``. + +Acknowledges and confirms a latch request. If sent during an ongoing +detachment process, this command causes the latch to be opened immediately. +The latch will also be opened if it has been locked. In this case, the latch +lock is reset to the unlocked state. + +This command will be silently ignored if there is currently no detachment +procedure in progress. + +``SDTX_IOCTL_LATCH_HEARTBEAT`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x27)``. + +Sends a heartbeat, essentially resetting the detachment timeout. This +command can be used to keep the detachment process alive while work required +for the detachment to succeed is still in progress. + +This command will be silently ignored if there is currently no detachment +procedure in progress. + +``SDTX_IOCTL_LATCH_CANCEL`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IO(0xA5, 0x28)``. + +Cancels detachment in progress (if any). If a detachment process is canceled +by this operation, a generic detachment request event +(``SDTX_EVENT_REQUEST``) will be sent. + +This command will be silently ignored if there is currently no detachment +procedure in progress. + +``SDTX_IOCTL_GET_BASE_INFO`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IOR(0xA5, 0x29, struct sdtx_base_info)``. + +Get the current base connection state (i.e. attached/detached) and the type +of the base connected to the clipboard. This is command essentially provides +a way to query the information provided by the base connection change event +(``SDTX_EVENT_BASE_CONNECTION``). + +Possible values for ``struct sdtx_base_info.state`` are: + +* ``SDTX_BASE_DETACHED``, +* ``SDTX_BASE_ATTACHED``, and +* ``SDTX_DETACH_NOT_FEASIBLE``. + +Other values are reserved for future use. + +``SDTX_IOCTL_GET_DEVICE_MODE`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IOR(0xA5, 0x2A, __u16)``. + +Returns the device operation mode, indicating if and how the base is +attached to the clipboard. This is command essentially provides a way to +query the information provided by the device mode change event +(``SDTX_EVENT_DEVICE_MODE``). + +Returned values are: + +* ``SDTX_DEVICE_MODE_LAPTOP`` +* ``SDTX_DEVICE_MODE_TABLET`` +* ``SDTX_DEVICE_MODE_STUDIO`` + +See |sdtx_device_mode| for details. Other values are reserved for future +use. + + +``SDTX_IOCTL_GET_LATCH_STATUS`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Defined as ``_IOR(0xA5, 0x2B, __u16)``. + +Get the current latch status or (presumably) the last error encountered when +trying to open/close the latch. This is command essentially provides a way +to query the information provided by the latch status change event +(``SDTX_EVENT_LATCH_STATUS``). + +Returned values are: + +* ``SDTX_LATCH_CLOSED``, +* ``SDTX_LATCH_OPENED``, +* ``SDTX_ERR_FAILED_TO_OPEN``, +* ``SDTX_ERR_FAILED_TO_REMAIN_OPEN``, and +* ``SDTX_ERR_FAILED_TO_CLOSE``. + +Other values are reserved for future use. + +A Note on Base IDs +------------------ + +Base types/IDs provided via ``SDTX_EVENT_BASE_CONNECTION`` or +``SDTX_IOCTL_GET_BASE_INFO`` are directly forwarded from the EC in the lower +byte of the combined |__u16| value, with the driver storing the EC type from +which this ID comes in the high byte (without this, base IDs over different +types of ECs may be overlapping). + +The ``SDTX_DEVICE_TYPE()`` macro can be used to determine the EC device +type. This can be one of + +* ``SDTX_DEVICE_TYPE_HID``, for Surface Aggregator Module over HID, and + +* ``SDTX_DEVICE_TYPE_SSH``, for Surface Aggregator Module over Surface Serial + Hub. + +Note that currently only the ``SSH`` type EC is supported, however ``HID`` +type is reserved for future use. + +Structures and Enums +-------------------- + +.. kernel-doc:: include/uapi/linux/surface_aggregator/dtx.h + +API Users +========= + +A user-space daemon utilizing this API can be found at +https://github.com/linux-surface/surface-dtx-daemon. diff --git a/Documentation/driver-api/surface_aggregator/clients/index.rst b/Documentation/driver-api/surface_aggregator/clients/index.rst new file mode 100644 index 0000000000..30160513af --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/clients/index.rst @@ -0,0 +1,23 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +=========================== +Client Driver Documentation +=========================== + +This is the documentation for client drivers themselves. Refer to +Documentation/driver-api/surface_aggregator/client.rst for documentation +on how to write client drivers. + +.. toctree:: + :maxdepth: 1 + + cdev + dtx + san + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/surface_aggregator/clients/san.rst b/Documentation/driver-api/surface_aggregator/clients/san.rst new file mode 100644 index 0000000000..38c2580e77 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/clients/san.rst @@ -0,0 +1,44 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. |san_client_link| replace:: :c:func:`san_client_link` +.. |san_dgpu_notifier_register| replace:: :c:func:`san_dgpu_notifier_register` +.. |san_dgpu_notifier_unregister| replace:: :c:func:`san_dgpu_notifier_unregister` + +=================== +Surface ACPI Notify +=================== + +The Surface ACPI Notify (SAN) device provides the bridge between ACPI and +SAM controller. Specifically, ACPI code can execute requests and handle +battery and thermal events via this interface. In addition to this, events +relating to the discrete GPU (dGPU) of the Surface Book 2 can be sent from +ACPI code (note: the Surface Book 3 uses a different method for this). The +only currently known event sent via this interface is a dGPU power-on +notification. While this driver handles the former part internally, it only +relays the dGPU events to any other driver interested via its public API and +does not handle them. + +The public interface of this driver is split into two parts: Client +registration and notifier-block registration. + +A client to the SAN interface can be linked as consumer to the SAN device +via |san_client_link|. This can be used to ensure that the a client +receiving dGPU events does not miss any events due to the SAN interface not +being set up as this forces the client driver to unbind once the SAN driver +is unbound. + +Notifier-blocks can be registered by any device for as long as the module is +loaded, regardless of being linked as client or not. Registration is done +with |san_dgpu_notifier_register|. If the notifier is not needed any more, it +should be unregistered via |san_dgpu_notifier_unregister|. + +Consult the API documentation below for more details. + + +API Documentation +================= + +.. kernel-doc:: include/linux/surface_acpi_notify.h + +.. kernel-doc:: drivers/platform/surface/surface_acpi_notify.c + :export: diff --git a/Documentation/driver-api/surface_aggregator/index.rst b/Documentation/driver-api/surface_aggregator/index.rst new file mode 100644 index 0000000000..6f3e109490 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/index.rst @@ -0,0 +1,21 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +======================================= +Surface System Aggregator Module (SSAM) +======================================= + +.. toctree:: + :maxdepth: 2 + + overview + client + clients/index + ssh + internal + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/surface_aggregator/internal-api.rst b/Documentation/driver-api/surface_aggregator/internal-api.rst new file mode 100644 index 0000000000..639a67b5a3 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/internal-api.rst @@ -0,0 +1,67 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +========================== +Internal API Documentation +========================== + +.. contents:: + :depth: 2 + + +Packet Transport Layer +====================== + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_parser.h + :internal: + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_parser.c + :internal: + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_msgb.h + :internal: + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_packet_layer.h + :internal: + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_packet_layer.c + :internal: + + +Request Transport Layer +======================= + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_request_layer.h + :internal: + +.. kernel-doc:: drivers/platform/surface/aggregator/ssh_request_layer.c + :internal: + + +Controller +========== + +.. kernel-doc:: drivers/platform/surface/aggregator/controller.h + :internal: + +.. kernel-doc:: drivers/platform/surface/aggregator/controller.c + :internal: + + +Client Device Bus +================= + +.. kernel-doc:: drivers/platform/surface/aggregator/bus.c + :internal: + + +Core +==== + +.. kernel-doc:: drivers/platform/surface/aggregator/core.c + :internal: + + +Trace Helpers +============= + +.. kernel-doc:: drivers/platform/surface/aggregator/trace.h diff --git a/Documentation/driver-api/surface_aggregator/internal.rst b/Documentation/driver-api/surface_aggregator/internal.rst new file mode 100644 index 0000000000..8c7c80c9f4 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/internal.rst @@ -0,0 +1,578 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. |ssh_ptl| replace:: :c:type:`struct ssh_ptl <ssh_ptl>` +.. |ssh_ptl_submit| replace:: :c:func:`ssh_ptl_submit` +.. |ssh_ptl_cancel| replace:: :c:func:`ssh_ptl_cancel` +.. |ssh_ptl_shutdown| replace:: :c:func:`ssh_ptl_shutdown` +.. |ssh_ptl_rx_rcvbuf| replace:: :c:func:`ssh_ptl_rx_rcvbuf` +.. |ssh_rtl| replace:: :c:type:`struct ssh_rtl <ssh_rtl>` +.. |ssh_rtl_submit| replace:: :c:func:`ssh_rtl_submit` +.. |ssh_rtl_cancel| replace:: :c:func:`ssh_rtl_cancel` +.. |ssh_rtl_shutdown| replace:: :c:func:`ssh_rtl_shutdown` +.. |ssh_packet| replace:: :c:type:`struct ssh_packet <ssh_packet>` +.. |ssh_packet_get| replace:: :c:func:`ssh_packet_get` +.. |ssh_packet_put| replace:: :c:func:`ssh_packet_put` +.. |ssh_packet_ops| replace:: :c:type:`struct ssh_packet_ops <ssh_packet_ops>` +.. |ssh_packet_base_priority| replace:: :c:type:`enum ssh_packet_base_priority <ssh_packet_base_priority>` +.. |ssh_packet_flags| replace:: :c:type:`enum ssh_packet_flags <ssh_packet_flags>` +.. |SSH_PACKET_PRIORITY| replace:: :c:func:`SSH_PACKET_PRIORITY` +.. |ssh_frame| replace:: :c:type:`struct ssh_frame <ssh_frame>` +.. |ssh_command| replace:: :c:type:`struct ssh_command <ssh_command>` +.. |ssh_request| replace:: :c:type:`struct ssh_request <ssh_request>` +.. |ssh_request_get| replace:: :c:func:`ssh_request_get` +.. |ssh_request_put| replace:: :c:func:`ssh_request_put` +.. |ssh_request_ops| replace:: :c:type:`struct ssh_request_ops <ssh_request_ops>` +.. |ssh_request_init| replace:: :c:func:`ssh_request_init` +.. |ssh_request_flags| replace:: :c:type:`enum ssh_request_flags <ssh_request_flags>` +.. |ssam_controller| replace:: :c:type:`struct ssam_controller <ssam_controller>` +.. |ssam_device| replace:: :c:type:`struct ssam_device <ssam_device>` +.. |ssam_device_driver| replace:: :c:type:`struct ssam_device_driver <ssam_device_driver>` +.. |ssam_client_bind| replace:: :c:func:`ssam_client_bind` +.. |ssam_client_link| replace:: :c:func:`ssam_client_link` +.. |ssam_request_sync| replace:: :c:type:`struct ssam_request_sync <ssam_request_sync>` +.. |ssam_event_registry| replace:: :c:type:`struct ssam_event_registry <ssam_event_registry>` +.. |ssam_event_id| replace:: :c:type:`struct ssam_event_id <ssam_event_id>` +.. |ssam_nf| replace:: :c:type:`struct ssam_nf <ssam_nf>` +.. |ssam_nf_refcount_inc| replace:: :c:func:`ssam_nf_refcount_inc` +.. |ssam_nf_refcount_dec| replace:: :c:func:`ssam_nf_refcount_dec` +.. |ssam_notifier_register| replace:: :c:func:`ssam_notifier_register` +.. |ssam_notifier_unregister| replace:: :c:func:`ssam_notifier_unregister` +.. |ssam_cplt| replace:: :c:type:`struct ssam_cplt <ssam_cplt>` +.. |ssam_event_queue| replace:: :c:type:`struct ssam_event_queue <ssam_event_queue>` +.. |ssam_request_sync_submit| replace:: :c:func:`ssam_request_sync_submit` + +===================== +Core Driver Internals +===================== + +Architectural overview of the Surface System Aggregator Module (SSAM) core +and Surface Serial Hub (SSH) driver. For the API documentation, refer to: + +.. toctree:: + :maxdepth: 2 + + internal-api + + +Overview +======== + +The SSAM core implementation is structured in layers, somewhat following the +SSH protocol structure: + +Lower-level packet transport is implemented in the *packet transport layer +(PTL)*, directly building on top of the serial device (serdev) +infrastructure of the kernel. As the name indicates, this layer deals with +the packet transport logic and handles things like packet validation, packet +acknowledgment (ACKing), packet (retransmission) timeouts, and relaying +packet payloads to higher-level layers. + +Above this sits the *request transport layer (RTL)*. This layer is centered +around command-type packet payloads, i.e. requests (sent from host to EC), +responses of the EC to those requests, and events (sent from EC to host). +It, specifically, distinguishes events from request responses, matches +responses to their corresponding requests, and implements request timeouts. + +The *controller* layer is building on top of this and essentially decides +how request responses and, especially, events are dealt with. It provides an +event notifier system, handles event activation/deactivation, provides a +workqueue for event and asynchronous request completion, and also manages +the message counters required for building command messages (``SEQ``, +``RQID``). This layer basically provides a fundamental interface to the SAM +EC for use in other kernel drivers. + +While the controller layer already provides an interface for other kernel +drivers, the client *bus* extends this interface to provide support for +native SSAM devices, i.e. devices that are not defined in ACPI and not +implemented as platform devices, via |ssam_device| and |ssam_device_driver| +simplify management of client devices and client drivers. + +Refer to Documentation/driver-api/surface_aggregator/client.rst for +documentation regarding the client device/driver API and interface options +for other kernel drivers. It is recommended to familiarize oneself with +that chapter and the Documentation/driver-api/surface_aggregator/ssh.rst +before continuing with the architectural overview below. + + +Packet Transport Layer +====================== + +The packet transport layer is represented via |ssh_ptl| and is structured +around the following key concepts: + +Packets +------- + +Packets are the fundamental transmission unit of the SSH protocol. They are +managed by the packet transport layer, which is essentially the lowest layer +of the driver and is built upon by other components of the SSAM core. +Packets to be transmitted by the SSAM core are represented via |ssh_packet| +(in contrast, packets received by the core do not have any specific +structure and are managed entirely via the raw |ssh_frame|). + +This structure contains the required fields to manage the packet inside the +transport layer, as well as a reference to the buffer containing the data to +be transmitted (i.e. the message wrapped in |ssh_frame|). Most notably, it +contains an internal reference count, which is used for managing its +lifetime (accessible via |ssh_packet_get| and |ssh_packet_put|). When this +counter reaches zero, the ``release()`` callback provided to the packet via +its |ssh_packet_ops| reference is executed, which may then deallocate the +packet or its enclosing structure (e.g. |ssh_request|). + +In addition to the ``release`` callback, the |ssh_packet_ops| reference also +provides a ``complete()`` callback, which is run once the packet has been +completed and provides the status of this completion, i.e. zero on success +or a negative errno value in case of an error. Once the packet has been +submitted to the packet transport layer, the ``complete()`` callback is +always guaranteed to be executed before the ``release()`` callback, i.e. the +packet will always be completed, either successfully, with an error, or due +to cancellation, before it will be released. + +The state of a packet is managed via its ``state`` flags +(|ssh_packet_flags|), which also contains the packet type. In particular, +the following bits are noteworthy: + +* ``SSH_PACKET_SF_LOCKED_BIT``: This bit is set when completion, either + through error or success, is imminent. It indicates that no further + references of the packet should be taken and any existing references + should be dropped as soon as possible. The process setting this bit is + responsible for removing any references to this packet from the packet + queue and pending set. + +* ``SSH_PACKET_SF_COMPLETED_BIT``: This bit is set by the process running the + ``complete()`` callback and is used to ensure that this callback only runs + once. + +* ``SSH_PACKET_SF_QUEUED_BIT``: This bit is set when the packet is queued on + the packet queue and cleared when it is dequeued. + +* ``SSH_PACKET_SF_PENDING_BIT``: This bit is set when the packet is added to + the pending set and cleared when it is removed from it. + +Packet Queue +------------ + +The packet queue is the first of the two fundamental collections in the +packet transport layer. It is a priority queue, with priority of the +respective packets based on the packet type (major) and number of tries +(minor). See |SSH_PACKET_PRIORITY| for more details on the priority value. + +All packets to be transmitted by the transport layer must be submitted to +this queue via |ssh_ptl_submit|. Note that this includes control packets +sent by the transport layer itself. Internally, data packets can be +re-submitted to this queue due to timeouts or NAK packets sent by the EC. + +Pending Set +----------- + +The pending set is the second of the two fundamental collections in the +packet transport layer. It stores references to packets that have already +been transmitted, but wait for acknowledgment (e.g. the corresponding ACK +packet) by the EC. + +Note that a packet may both be pending and queued if it has been +re-submitted due to a packet acknowledgment timeout or NAK. On such a +re-submission, packets are not removed from the pending set. + +Transmitter Thread +------------------ + +The transmitter thread is responsible for most of the actual work regarding +packet transmission. In each iteration, it (waits for and) checks if the +next packet on the queue (if any) can be transmitted and, if so, removes it +from the queue and increments its counter for the number of transmission +attempts, i.e. tries. If the packet is sequenced, i.e. requires an ACK by +the EC, the packet is added to the pending set. Next, the packet's data is +submitted to the serdev subsystem. In case of an error or timeout during +this submission, the packet is completed by the transmitter thread with the +status value of the callback set accordingly. In case the packet is +unsequenced, i.e. does not require an ACK by the EC, the packet is completed +with success on the transmitter thread. + +Transmission of sequenced packets is limited by the number of concurrently +pending packets, i.e. a limit on how many packets may be waiting for an ACK +from the EC in parallel. This limit is currently set to one (see +Documentation/driver-api/surface_aggregator/ssh.rst for the reasoning behind +this). Control packets (i.e. ACK and NAK) can always be transmitted. + +Receiver Thread +--------------- + +Any data received from the EC is put into a FIFO buffer for further +processing. This processing happens on the receiver thread. The receiver +thread parses and validates the received message into its |ssh_frame| and +corresponding payload. It prepares and submits the necessary ACK (and on +validation error or invalid data NAK) packets for the received messages. + +This thread also handles further processing, such as matching ACK messages +to the corresponding pending packet (via sequence ID) and completing it, as +well as initiating re-submission of all currently pending packets on +receival of a NAK message (re-submission in case of a NAK is similar to +re-submission due to timeout, see below for more details on that). Note that +the successful completion of a sequenced packet will always run on the +receiver thread (whereas any failure-indicating completion will run on the +process where the failure occurred). + +Any payload data is forwarded via a callback to the next upper layer, i.e. +the request transport layer. + +Timeout Reaper +-------------- + +The packet acknowledgment timeout is a per-packet timeout for sequenced +packets, started when the respective packet begins (re-)transmission (i.e. +this timeout is armed once per transmission attempt on the transmitter +thread). It is used to trigger re-submission or, when the number of tries +has been exceeded, cancellation of the packet in question. + +This timeout is handled via a dedicated reaper task, which is essentially a +work item (re-)scheduled to run when the next packet is set to time out. The +work item then checks the set of pending packets for any packets that have +exceeded the timeout and, if there are any remaining packets, re-schedules +itself to the next appropriate point in time. + +If a timeout has been detected by the reaper, the packet will either be +re-submitted if it still has some remaining tries left, or completed with +``-ETIMEDOUT`` as status if not. Note that re-submission, in this case and +triggered by receival of a NAK, means that the packet is added to the queue +with a now incremented number of tries, yielding a higher priority. The +timeout for the packet will be disabled until the next transmission attempt +and the packet remains on the pending set. + +Note that due to transmission and packet acknowledgment timeouts, the packet +transport layer is always guaranteed to make progress, if only through +timing out packets, and will never fully block. + +Concurrency and Locking +----------------------- + +There are two main locks in the packet transport layer: One guarding access +to the packet queue and one guarding access to the pending set. These +collections may only be accessed and modified under the respective lock. If +access to both collections is needed, the pending lock must be acquired +before the queue lock to avoid deadlocks. + +In addition to guarding the collections, after initial packet submission +certain packet fields may only be accessed under one of the locks. +Specifically, the packet priority must only be accessed while holding the +queue lock and the packet timestamp must only be accessed while holding the +pending lock. + +Other parts of the packet transport layer are guarded independently. State +flags are managed by atomic bit operations and, if necessary, memory +barriers. Modifications to the timeout reaper work item and expiration date +are guarded by their own lock. + +The reference of the packet to the packet transport layer (``ptl``) is +somewhat special. It is either set when the upper layer request is submitted +or, if there is none, when the packet is first submitted. After it is set, +it will not change its value. Functions that may run concurrently with +submission, i.e. cancellation, can not rely on the ``ptl`` reference to be +set. Access to it in these functions is guarded by ``READ_ONCE()``, whereas +setting ``ptl`` is equally guarded with ``WRITE_ONCE()`` for symmetry. + +Some packet fields may be read outside of the respective locks guarding +them, specifically priority and state for tracing. In those cases, proper +access is ensured by employing ``WRITE_ONCE()`` and ``READ_ONCE()``. Such +read-only access is only allowed when stale values are not critical. + +With respect to the interface for higher layers, packet submission +(|ssh_ptl_submit|), packet cancellation (|ssh_ptl_cancel|), data receival +(|ssh_ptl_rx_rcvbuf|), and layer shutdown (|ssh_ptl_shutdown|) may always be +executed concurrently with respect to each other. Note that packet +submission may not run concurrently with itself for the same packet. +Equally, shutdown and data receival may also not run concurrently with +themselves (but may run concurrently with each other). + + +Request Transport Layer +======================= + +The request transport layer is represented via |ssh_rtl| and builds on top +of the packet transport layer. It deals with requests, i.e. SSH packets sent +by the host containing a |ssh_command| as frame payload. This layer +separates responses to requests from events, which are also sent by the EC +via a |ssh_command| payload. While responses are handled in this layer, +events are relayed to the next upper layer, i.e. the controller layer, via +the corresponding callback. The request transport layer is structured around +the following key concepts: + +Request +------- + +Requests are packets with a command-type payload, sent from host to EC to +query data from or trigger an action on it (or both simultaneously). They +are represented by |ssh_request|, wrapping the underlying |ssh_packet| +storing its message data (i.e. SSH frame with command payload). Note that +all top-level representations, e.g. |ssam_request_sync| are built upon this +struct. + +As |ssh_request| extends |ssh_packet|, its lifetime is also managed by the +reference counter inside the packet struct (which can be accessed via +|ssh_request_get| and |ssh_request_put|). Once the counter reaches zero, the +``release()`` callback of the |ssh_request_ops| reference of the request is +called. + +Requests can have an optional response that is equally sent via a SSH +message with command-type payload (from EC to host). The party constructing +the request must know if a response is expected and mark this in the request +flags provided to |ssh_request_init|, so that the request transport layer +can wait for this response. + +Similar to |ssh_packet|, |ssh_request| also has a ``complete()`` callback +provided via its request ops reference and is guaranteed to be completed +before it is released once it has been submitted to the request transport +layer via |ssh_rtl_submit|. For a request without a response, successful +completion will occur once the underlying packet has been successfully +transmitted by the packet transport layer (i.e. from within the packet +completion callback). For a request with response, successful completion +will occur once the response has been received and matched to the request +via its request ID (which happens on the packet layer's data-received +callback running on the receiver thread). If the request is completed with +an error, the status value will be set to the corresponding (negative) errno +value. + +The state of a request is again managed via its ``state`` flags +(|ssh_request_flags|), which also encode the request type. In particular, +the following bits are noteworthy: + +* ``SSH_REQUEST_SF_LOCKED_BIT``: This bit is set when completion, either + through error or success, is imminent. It indicates that no further + references of the request should be taken and any existing references + should be dropped as soon as possible. The process setting this bit is + responsible for removing any references to this request from the request + queue and pending set. + +* ``SSH_REQUEST_SF_COMPLETED_BIT``: This bit is set by the process running the + ``complete()`` callback and is used to ensure that this callback only runs + once. + +* ``SSH_REQUEST_SF_QUEUED_BIT``: This bit is set when the request is queued on + the request queue and cleared when it is dequeued. + +* ``SSH_REQUEST_SF_PENDING_BIT``: This bit is set when the request is added to + the pending set and cleared when it is removed from it. + +Request Queue +------------- + +The request queue is the first of the two fundamental collections in the +request transport layer. In contrast to the packet queue of the packet +transport layer, it is not a priority queue and the simple first come first +serve principle applies. + +All requests to be transmitted by the request transport layer must be +submitted to this queue via |ssh_rtl_submit|. Once submitted, requests may +not be re-submitted, and will not be re-submitted automatically on timeout. +Instead, the request is completed with a timeout error. If desired, the +caller can create and submit a new request for another try, but it must not +submit the same request again. + +Pending Set +----------- + +The pending set is the second of the two fundamental collections in the +request transport layer. This collection stores references to all pending +requests, i.e. requests awaiting a response from the EC (similar to what the +pending set of the packet transport layer does for packets). + +Transmitter Task +---------------- + +The transmitter task is scheduled when a new request is available for +transmission. It checks if the next request on the request queue can be +transmitted and, if so, submits its underlying packet to the packet +transport layer. This check ensures that only a limited number of +requests can be pending, i.e. waiting for a response, at the same time. If +the request requires a response, the request is added to the pending set +before its packet is submitted. + +Packet Completion Callback +-------------------------- + +The packet completion callback is executed once the underlying packet of a +request has been completed. In case of an error completion, the +corresponding request is completed with the error value provided in this +callback. + +On successful packet completion, further processing depends on the request. +If the request expects a response, it is marked as transmitted and the +request timeout is started. If the request does not expect a response, it is +completed with success. + +Data-Received Callback +---------------------- + +The data received callback notifies the request transport layer of data +being received by the underlying packet transport layer via a data-type +frame. In general, this is expected to be a command-type payload. + +If the request ID of the command is one of the request IDs reserved for +events (one to ``SSH_NUM_EVENTS``, inclusively), it is forwarded to the +event callback registered in the request transport layer. If the request ID +indicates a response to a request, the respective request is looked up in +the pending set and, if found and marked as transmitted, completed with +success. + +Timeout Reaper +-------------- + +The request-response-timeout is a per-request timeout for requests expecting +a response. It is used to ensure that a request does not wait indefinitely +on a response from the EC and is started after the underlying packet has +been successfully completed. + +This timeout is, similar to the packet acknowledgment timeout on the packet +transport layer, handled via a dedicated reaper task. This task is +essentially a work-item (re-)scheduled to run when the next request is set +to time out. The work item then scans the set of pending requests for any +requests that have timed out and completes them with ``-ETIMEDOUT`` as +status. Requests will not be re-submitted automatically. Instead, the issuer +of the request must construct and submit a new request, if so desired. + +Note that this timeout, in combination with packet transmission and +acknowledgment timeouts, guarantees that the request layer will always make +progress, even if only through timing out packets, and never fully block. + +Concurrency and Locking +----------------------- + +Similar to the packet transport layer, there are two main locks in the +request transport layer: One guarding access to the request queue and one +guarding access to the pending set. These collections may only be accessed +and modified under the respective lock. + +Other parts of the request transport layer are guarded independently. State +flags are (again) managed by atomic bit operations and, if necessary, memory +barriers. Modifications to the timeout reaper work item and expiration date +are guarded by their own lock. + +Some request fields may be read outside of the respective locks guarding +them, specifically the state for tracing. In those cases, proper access is +ensured by employing ``WRITE_ONCE()`` and ``READ_ONCE()``. Such read-only +access is only allowed when stale values are not critical. + +With respect to the interface for higher layers, request submission +(|ssh_rtl_submit|), request cancellation (|ssh_rtl_cancel|), and layer +shutdown (|ssh_rtl_shutdown|) may always be executed concurrently with +respect to each other. Note that request submission may not run concurrently +with itself for the same request (and also may only be called once per +request). Equally, shutdown may also not run concurrently with itself. + + +Controller Layer +================ + +The controller layer extends on the request transport layer to provide an +easy-to-use interface for client drivers. It is represented by +|ssam_controller| and the SSH driver. While the lower level transport layers +take care of transmitting and handling packets and requests, the controller +layer takes on more of a management role. Specifically, it handles device +initialization, power management, and event handling, including event +delivery and registration via the (event) completion system (|ssam_cplt|). + +Event Registration +------------------ + +In general, an event (or rather a class of events) has to be explicitly +requested by the host before the EC will send it (HID input events seem to +be the exception). This is done via an event-enable request (similarly, +events should be disabled via an event-disable request once no longer +desired). + +The specific request used to enable (or disable) an event is given via an +event registry, i.e. the governing authority of this event (so to speak), +represented by |ssam_event_registry|. As parameters to this request, the +target category and, depending on the event registry, instance ID of the +event to be enabled must be provided. This (optional) instance ID must be +zero if the registry does not use it. Together, target category and instance +ID form the event ID, represented by |ssam_event_id|. In short, both, event +registry and event ID, are required to uniquely identify a respective class +of events. + +Note that a further *request ID* parameter must be provided for the +enable-event request. This parameter does not influence the class of events +being enabled, but instead is set as the request ID (RQID) on each event of +this class sent by the EC. It is used to identify events (as a limited +number of request IDs is reserved for use in events only, specifically one +to ``SSH_NUM_EVENTS`` inclusively) and also map events to their specific +class. Currently, the controller always sets this parameter to the target +category specified in |ssam_event_id|. + +As multiple client drivers may rely on the same (or overlapping) classes of +events and enable/disable calls are strictly binary (i.e. on/off), the +controller has to manage access to these events. It does so via reference +counting, storing the counter inside an RB-tree based mapping with event +registry and ID as key (there is no known list of valid event registry and +event ID combinations). See |ssam_nf|, |ssam_nf_refcount_inc|, and +|ssam_nf_refcount_dec| for details. + +This management is done together with notifier registration (described in +the next section) via the top-level |ssam_notifier_register| and +|ssam_notifier_unregister| functions. + +Event Delivery +-------------- + +To receive events, a client driver has to register an event notifier via +|ssam_notifier_register|. This increments the reference counter for that +specific class of events (as detailed in the previous section), enables the +class on the EC (if it has not been enabled already), and installs the +provided notifier callback. + +Notifier callbacks are stored in lists, with one (RCU) list per target +category (provided via the event ID; NB: there is a fixed known number of +target categories). There is no known association from the combination of +event registry and event ID to the command data (target ID, target category, +command ID, and instance ID) that can be provided by an event class, apart +from target category and instance ID given via the event ID. + +Note that due to the way notifiers are (or rather have to be) stored, client +drivers may receive events that they have not requested and need to account +for them. Specifically, they will, by default, receive all events from the +same target category. To simplify dealing with this, filtering of events by +target ID (provided via the event registry) and instance ID (provided via +the event ID) can be requested when registering a notifier. This filtering +is applied when iterating over the notifiers at the time they are executed. + +All notifier callbacks are executed on a dedicated workqueue, the so-called +completion workqueue. After an event has been received via the callback +installed in the request layer (running on the receiver thread of the packet +transport layer), it will be put on its respective event queue +(|ssam_event_queue|). From this event queue the completion work item of that +queue (running on the completion workqueue) will pick up the event and +execute the notifier callback. This is done to avoid blocking on the +receiver thread. + +There is one event queue per combination of target ID and target category. +This is done to ensure that notifier callbacks are executed in sequence for +events of the same target ID and target category. Callbacks can be executed +in parallel for events with a different combination of target ID and target +category. + +Concurrency and Locking +----------------------- + +Most of the concurrency related safety guarantees of the controller are +provided by the lower-level request transport layer. In addition to this, +event (un-)registration is guarded by its own lock. + +Access to the controller state is guarded by the state lock. This lock is a +read/write semaphore. The reader part can be used to ensure that the state +does not change while functions depending on the state to stay the same +(e.g. |ssam_notifier_register|, |ssam_notifier_unregister|, +|ssam_request_sync_submit|, and derivatives) are executed and this guarantee +is not already provided otherwise (e.g. through |ssam_client_bind| or +|ssam_client_link|). The writer part guards any transitions that will change +the state, i.e. initialization, destruction, suspension, and resumption. + +The controller state may be accessed (read-only) outside the state lock for +smoke-testing against invalid API usage (e.g. in |ssam_request_sync_submit|). +Note that such checks are not supposed to (and will not) protect against all +invalid usages, but rather aim to help catch them. In those cases, proper +variable access is ensured by employing ``WRITE_ONCE()`` and ``READ_ONCE()``. + +Assuming any preconditions on the state not changing have been satisfied, +all non-initialization and non-shutdown functions may run concurrently with +each other. This includes |ssam_notifier_register|, |ssam_notifier_unregister|, +|ssam_request_sync_submit|, as well as all functions building on top of those. diff --git a/Documentation/driver-api/surface_aggregator/overview.rst b/Documentation/driver-api/surface_aggregator/overview.rst new file mode 100644 index 0000000000..26415e1ab7 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/overview.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +======== +Overview +======== + +The Surface/System Aggregator Module (SAM, SSAM) is an (arguably *the*) +embedded controller (EC) on Microsoft Surface devices. It has been originally +introduced on 4th generation devices (Surface Pro 4, Surface Book 1), but +its responsibilities and feature-set have since been expanded significantly +with the following generations. + + +Features and Integration +======================== + +Not much is currently known about SAM on 4th generation devices (Surface Pro +4, Surface Book 1), due to the use of a different communication interface +between host and EC (as detailed below). On 5th (Surface Pro 2017, Surface +Book 2, Surface Laptop 1) and later generation devices, SAM is responsible +for providing battery information (both current status and static values, +such as maximum capacity etc.), as well as an assortment of temperature +sensors (e.g. skin temperature) and cooling/performance-mode setting to the +host. On the Surface Book 2, specifically, it additionally provides an +interface for properly handling clipboard detachment (i.e. separating the +display part from the keyboard part of the device), on the Surface Laptop 1 +and 2 it is required for keyboard HID input. This HID subsystem has been +restructured for 7th generation devices and on those, specifically Surface +Laptop 3 and Surface Book 3, is responsible for all major HID input (i.e. +keyboard and touchpad). + +While features have not changed much on a coarse level since the 5th +generation, internal interfaces have undergone some rather large changes. On +5th and 6th generation devices, both battery and temperature information is +exposed to ACPI via a shim driver (referred to as Surface ACPI Notify, or +SAN), translating ACPI generic serial bus write-/read-accesses to SAM +requests. On 7th generation devices, this additional layer is gone and these +devices require a driver hooking directly into the SAM interface. Equally, +on newer generations, less devices are declared in ACPI, making them a bit +harder to discover and requiring us to hard-code a sort of device registry. +Due to this, a SSAM bus and subsystem with client devices +(:c:type:`struct ssam_device <ssam_device>`) has been implemented. + + +Communication +============= + +The type of communication interface between host and EC depends on the +generation of the Surface device. On 4th generation devices, host and EC +communicate via HID, specifically using a HID-over-I2C device, whereas on +5th and later generations, communication takes place via a USART serial +device. In accordance to the drivers found on other operating systems, we +refer to the serial device and its driver as Surface Serial Hub (SSH). When +needed, we differentiate between both types of SAM by referring to them as +SAM-over-SSH and SAM-over-HID. + +Currently, this subsystem only supports SAM-over-SSH. The SSH communication +interface is described in more detail below. The HID interface has not been +reverse engineered yet and it is, at the moment, unclear how many (and +which) concepts of the SSH interface detailed below can be transferred to +it. + +Surface Serial Hub +------------------ + +As already elaborated above, the Surface Serial Hub (SSH) is the +communication interface for SAM on 5th- and all later-generation Surface +devices. On the highest level, communication can be separated into two main +types: Requests, messages sent from host to EC that may trigger a direct +response from the EC (explicitly associated with the request), and events +(sometimes also referred to as notifications), sent from EC to host without +being a direct response to a previous request. We may also refer to requests +without response as commands. In general, events need to be enabled via one +of multiple dedicated requests before they are sent by the EC. + +See Documentation/driver-api/surface_aggregator/ssh.rst for a +more technical protocol documentation and +Documentation/driver-api/surface_aggregator/internal.rst for an +overview of the internal driver architecture. diff --git a/Documentation/driver-api/surface_aggregator/ssh.rst b/Documentation/driver-api/surface_aggregator/ssh.rst new file mode 100644 index 0000000000..b955b67383 --- /dev/null +++ b/Documentation/driver-api/surface_aggregator/ssh.rst @@ -0,0 +1,346 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. |u8| replace:: :c:type:`u8 <u8>` +.. |u16| replace:: :c:type:`u16 <u16>` +.. |TYPE| replace:: ``TYPE`` +.. |LEN| replace:: ``LEN`` +.. |SEQ| replace:: ``SEQ`` +.. |SYN| replace:: ``SYN`` +.. |NAK| replace:: ``NAK`` +.. |ACK| replace:: ``ACK`` +.. |DATA| replace:: ``DATA`` +.. |DATA_SEQ| replace:: ``DATA_SEQ`` +.. |DATA_NSQ| replace:: ``DATA_NSQ`` +.. |TC| replace:: ``TC`` +.. |TID| replace:: ``TID`` +.. |SID| replace:: ``SID`` +.. |IID| replace:: ``IID`` +.. |RQID| replace:: ``RQID`` +.. |CID| replace:: ``CID`` + +=========================== +Surface Serial Hub Protocol +=========================== + +The Surface Serial Hub (SSH) is the central communication interface for the +embedded Surface Aggregator Module controller (SAM or EC), found on newer +Surface generations. We will refer to this protocol and interface as +SAM-over-SSH, as opposed to SAM-over-HID for the older generations. + +On Surface devices with SAM-over-SSH, SAM is connected to the host via UART +and defined in ACPI as device with ID ``MSHW0084``. On these devices, +significant functionality is provided via SAM, including access to battery +and power information and events, thermal read-outs and events, and many +more. For Surface Laptops, keyboard input is handled via HID directed +through SAM, on the Surface Laptop 3 and Surface Book 3 this also includes +touchpad input. + +Note that the standard disclaimer for this subsystem also applies to this +document: All of this has been reverse-engineered and may thus be erroneous +and/or incomplete. + +All CRCs used in the following are two-byte ``crc_ccitt_false(0xffff, ...)``. +All multi-byte values are little-endian, there is no implicit padding between +values. + + +SSH Packet Protocol: Definitions +================================ + +The fundamental communication unit of the SSH protocol is a frame +(:c:type:`struct ssh_frame <ssh_frame>`). A frame consists of the following +fields, packed together and in order: + +.. flat-table:: SSH Frame + :widths: 1 1 4 + :header-rows: 1 + + * - Field + - Type + - Description + + * - |TYPE| + - |u8| + - Type identifier of the frame. + + * - |LEN| + - |u16| + - Length of the payload associated with the frame. + + * - |SEQ| + - |u8| + - Sequence ID (see explanation below). + +Each frame structure is followed by a CRC over this structure. The CRC over +the frame structure (|TYPE|, |LEN|, and |SEQ| fields) is placed directly +after the frame structure and before the payload. The payload is followed by +its own CRC (over all payload bytes). If the payload is not present (i.e. +the frame has ``LEN=0``), the CRC of the payload is still present and will +evaluate to ``0xffff``. The |LEN| field does not include any of the CRCs, it +equals the number of bytes between the CRC of the frame and the CRC of the +payload. + +Additionally, the following fixed two-byte sequences are used: + +.. flat-table:: SSH Byte Sequences + :widths: 1 1 4 + :header-rows: 1 + + * - Name + - Value + - Description + + * - |SYN| + - ``[0xAA, 0x55]`` + - Synchronization bytes. + +A message consists of |SYN|, followed by the frame (|TYPE|, |LEN|, |SEQ| and +CRC) and, if specified in the frame (i.e. ``LEN > 0``), payload bytes, +followed finally, regardless if the payload is present, the payload CRC. The +messages corresponding to an exchange are, in part, identified by having the +same sequence ID (|SEQ|), stored inside the frame (more on this in the next +section). The sequence ID is a wrapping counter. + +A frame can have the following types +(:c:type:`enum ssh_frame_type <ssh_frame_type>`): + +.. flat-table:: SSH Frame Types + :widths: 1 1 4 + :header-rows: 1 + + * - Name + - Value + - Short Description + + * - |NAK| + - ``0x04`` + - Sent on error in previously received message. + + * - |ACK| + - ``0x40`` + - Sent to acknowledge receival of |DATA| frame. + + * - |DATA_SEQ| + - ``0x80`` + - Sent to transfer data. Sequenced. + + * - |DATA_NSQ| + - ``0x00`` + - Same as |DATA_SEQ|, but does not need to be ACKed. + +Both |NAK|- and |ACK|-type frames are used to control flow of messages and +thus do not carry a payload. |DATA_SEQ|- and |DATA_NSQ|-type frames on the +other hand must carry a payload. The flow sequence and interaction of +different frame types will be described in more depth in the next section. + + +SSH Packet Protocol: Flow Sequence +================================== + +Each exchange begins with |SYN|, followed by a |DATA_SEQ|- or +|DATA_NSQ|-type frame, followed by its CRC, payload, and payload CRC. In +case of a |DATA_NSQ|-type frame, the exchange is then finished. In case of a +|DATA_SEQ|-type frame, the receiving party has to acknowledge receival of +the frame by responding with a message containing an |ACK|-type frame with +the same sequence ID of the |DATA| frame. In other words, the sequence ID of +the |ACK| frame specifies the |DATA| frame to be acknowledged. In case of an +error, e.g. an invalid CRC, the receiving party responds with a message +containing an |NAK|-type frame. As the sequence ID of the previous data +frame, for which an error is indicated via the |NAK| frame, cannot be relied +upon, the sequence ID of the |NAK| frame should not be used and is set to +zero. After receival of an |NAK| frame, the sending party should re-send all +outstanding (non-ACKed) messages. + +Sequence IDs are not synchronized between the two parties, meaning that they +are managed independently for each party. Identifying the messages +corresponding to a single exchange thus relies on the sequence ID as well as +the type of the message, and the context. Specifically, the sequence ID is +used to associate an ``ACK`` with its ``DATA_SEQ``-type frame, but not +``DATA_SEQ``- or ``DATA_NSQ``-type frames with other ``DATA``- type frames. + +An example exchange might look like this: + +:: + + tx: -- SYN FRAME(D) CRC(F) PAYLOAD CRC(P) ----------------------------- + rx: ------------------------------------- SYN FRAME(A) CRC(F) CRC(P) -- + +where both frames have the same sequence ID (``SEQ``). Here, ``FRAME(D)`` +indicates a |DATA_SEQ|-type frame, ``FRAME(A)`` an ``ACK``-type frame, +``CRC(F)`` the CRC over the previous frame, ``CRC(P)`` the CRC over the +previous payload. In case of an error, the exchange would look like this: + +:: + + tx: -- SYN FRAME(D) CRC(F) PAYLOAD CRC(P) ----------------------------- + rx: ------------------------------------- SYN FRAME(N) CRC(F) CRC(P) -- + +upon which the sender should re-send the message. ``FRAME(N)`` indicates an +|NAK|-type frame. Note that the sequence ID of the |NAK|-type frame is fixed +to zero. For |DATA_NSQ|-type frames, both exchanges are the same: + +:: + + tx: -- SYN FRAME(DATA_NSQ) CRC(F) PAYLOAD CRC(P) ---------------------- + rx: ------------------------------------------------------------------- + +Here, an error can be detected, but not corrected or indicated to the +sending party. These exchanges are symmetric, i.e. switching ``rx`` and +``tx`` results again in a valid exchange. Currently, no longer exchanges are +known. + + +Commands: Requests, Responses, and Events +========================================= + +Commands are sent as payload inside a data frame. Currently, this is the +only known payload type of |DATA| frames, with a payload-type value of +``0x80`` (:c:type:`SSH_PLD_TYPE_CMD <ssh_payload_type>`). + +The command-type payload (:c:type:`struct ssh_command <ssh_command>`) +consists of an eight-byte command structure, followed by optional and +variable length command data. The length of this optional data is derived +from the frame payload length given in the corresponding frame, i.e. it is +``frame.len - sizeof(struct ssh_command)``. The command struct contains the +following fields, packed together and in order: + +.. flat-table:: SSH Command + :widths: 1 1 4 + :header-rows: 1 + + * - Field + - Type + - Description + + * - |TYPE| + - |u8| + - Type of the payload. For commands always ``0x80``. + + * - |TC| + - |u8| + - Target category. + + * - |TID| + - |u8| + - Target ID for commands/messages. + + * - |SID| + - |u8| + - Source ID for commands/messages. + + * - |IID| + - |u8| + - Instance ID. + + * - |RQID| + - |u16| + - Request ID. + + * - |CID| + - |u8| + - Command ID. + +The command struct and data, in general, does not contain any failure +detection mechanism (e.g. CRCs), this is solely done on the frame level. + +Command-type payloads are used by the host to send commands and requests to +the EC as well as by the EC to send responses and events back to the host. +We differentiate between requests (sent by the host), responses (sent by the +EC in response to a request), and events (sent by the EC without a preceding +request). + +Commands and events are uniquely identified by their target category +(``TC``) and command ID (``CID``). The target category specifies a general +category for the command (e.g. system in general, vs. battery and AC, vs. +temperature, and so on), while the command ID specifies the command inside +that category. Only the combination of |TC| + |CID| is unique. Additionally, +commands have an instance ID (``IID``), which is used to differentiate +between different sub-devices. For example ``TC=3`` ``CID=1`` is a +request to get the temperature on a thermal sensor, where |IID| specifies +the respective sensor. If the instance ID is not used, it should be set to +zero. If instance IDs are used, they, in general, start with a value of one, +whereas zero may be used for instance independent queries, if applicable. A +response to a request should have the same target category, command ID, and +instance ID as the corresponding request. + +Responses are matched to their corresponding request via the request ID +(``RQID``) field. This is a 16 bit wrapping counter similar to the sequence +ID on the frames. Note that the sequence ID of the frames for a +request-response pair does not match. Only the request ID has to match. +Frame-protocol wise these are two separate exchanges, and may even be +separated, e.g. by an event being sent after the request but before the +response. Not all commands produce a response, and this is not detectable by +|TC| + |CID|. It is the responsibility of the issuing party to wait for a +response (or signal this to the communication framework, as is done in +SAN/ACPI via the ``SNC`` flag). + +Events are identified by unique and reserved request IDs. These IDs should +not be used by the host when sending a new request. They are used on the +host to, first, detect events and, second, match them with a registered +event handler. Request IDs for events are chosen by the host and directed to +the EC when setting up and enabling an event source (via the +enable-event-source request). The EC then uses the specified request ID for +events sent from the respective source. Note that an event should still be +identified by its target category, command ID, and, if applicable, instance +ID, as a single event source can send multiple different event types. In +general, however, a single target category should map to a single reserved +event request ID. + +Furthermore, requests, responses, and events have an associated target ID +(``TID``) and source ID (``SID``). These two fields indicate where a message +originates from (``SID``) and what the intended target of the message is +(``TID``). Note that a response to a specific request therefore has the source +and target IDs swapped when compared to the original request (i.e. the request +target is the response source and the request source is the response target). +See (:c:type:`enum ssh_request_id <ssh_request_id>`) for possible values of +both. + +Note that, even though requests and events should be uniquely identifiable by +target category and command ID alone, the EC may require specific target ID and +instance ID values to accept a command. A command that is accepted for +``TID=1``, for example, may not be accepted for ``TID=2`` and vice versa. While +this may not always hold in reality, you can think of different target/source +IDs indicating different physical ECs with potentially different feature sets. + + +Limitations and Observations +============================ + +The protocol can, in theory, handle up to ``U8_MAX`` frames in parallel, +with up to ``U16_MAX`` pending requests (neglecting request IDs reserved for +events). In practice, however, this is more limited. From our testing +(although via a python and thus a user-space program), it seems that the EC +can handle up to four requests (mostly) reliably in parallel at a certain +time. With five or more requests in parallel, consistent discarding of +commands (ACKed frame but no command response) has been observed. For five +simultaneous commands, this reproducibly resulted in one command being +dropped and four commands being handled. + +However, it has also been noted that, even with three requests in parallel, +occasional frame drops happen. Apart from this, with a limit of three +pending requests, no dropped commands (i.e. command being dropped but frame +carrying command being ACKed) have been observed. In any case, frames (and +possibly also commands) should be re-sent by the host if a certain timeout +is exceeded. This is done by the EC for frames with a timeout of one second, +up to two re-tries (i.e. three transmissions in total). The limit of +re-tries also applies to received NAKs, and, in a worst case scenario, can +lead to entire messages being dropped. + +While this also seems to work fine for pending data frames as long as no +transmission failures occur, implementation and handling of these seems to +depend on the assumption that there is only one non-acknowledged data frame. +In particular, the detection of repeated frames relies on the last sequence +number. This means that, if a frame that has been successfully received by +the EC is sent again, e.g. due to the host not receiving an |ACK|, the EC +will only detect this if it has the sequence ID of the last frame received +by the EC. As an example: Sending two frames with ``SEQ=0`` and ``SEQ=1`` +followed by a repetition of ``SEQ=0`` will not detect the second ``SEQ=0`` +frame as such, and thus execute the command in this frame each time it has +been received, i.e. twice in this example. Sending ``SEQ=0``, ``SEQ=1`` and +then repeating ``SEQ=1`` will detect the second ``SEQ=1`` as repetition of +the first one and ignore it, thus executing the contained command only once. + +In conclusion, this suggests a limit of at most one pending un-ACKed frame +(per party, effectively leading to synchronous communication regarding +frames) and at most three pending commands. The limit to synchronous frame +transfers seems to be consistent with behavior observed on Windows. diff --git a/Documentation/driver-api/switchtec.rst b/Documentation/driver-api/switchtec.rst new file mode 100644 index 0000000000..7611fdc53e --- /dev/null +++ b/Documentation/driver-api/switchtec.rst @@ -0,0 +1,102 @@ +======================== +Linux Switchtec Support +======================== + +Microsemi's "Switchtec" line of PCI switch devices is already +supported by the kernel with standard PCI switch drivers. However, the +Switchtec device advertises a special management endpoint which +enables some additional functionality. This includes: + +* Packet and Byte Counters +* Firmware Upgrades +* Event and Error logs +* Querying port link status +* Custom user firmware commands + +The switchtec kernel module implements this functionality. + + +Interface +========= + +The primary means of communicating with the Switchtec management firmware is +through the Memory-mapped Remote Procedure Call (MRPC) interface. +Commands are submitted to the interface with a 4-byte command +identifier and up to 1KB of command specific data. The firmware will +respond with a 4-byte return code and up to 1KB of command-specific +data. The interface only processes a single command at a time. + + +Userspace Interface +=================== + +The MRPC interface will be exposed to userspace through a simple char +device: /dev/switchtec#, one for each management endpoint in the system. + +The char device has the following semantics: + +* A write must consist of at least 4 bytes and no more than 1028 bytes. + The first 4 bytes will be interpreted as the Command ID and the + remainder will be used as the input data. A write will send the + command to the firmware to begin processing. + +* Each write must be followed by exactly one read. Any double write will + produce an error and any read that doesn't follow a write will + produce an error. + +* A read will block until the firmware completes the command and return + the 4-byte Command Return Value plus up to 1024 bytes of output + data. (The length will be specified by the size parameter of the read + call -- reading less than 4 bytes will produce an error.) + +* The poll call will also be supported for userspace applications that + need to do other things while waiting for the command to complete. + +The following IOCTLs are also supported by the device: + +* SWITCHTEC_IOCTL_FLASH_INFO - Retrieve firmware length and number + of partitions in the device. + +* SWITCHTEC_IOCTL_FLASH_PART_INFO - Retrieve address and lengeth for + any specified partition in flash. + +* SWITCHTEC_IOCTL_EVENT_SUMMARY - Read a structure of bitmaps + indicating all uncleared events. + +* SWITCHTEC_IOCTL_EVENT_CTL - Get the current count, clear and set flags + for any event. This ioctl takes in a switchtec_ioctl_event_ctl struct + with the event_id, index and flags set (index being the partition or PFF + number for non-global events). It returns whether the event has + occurred, the number of times and any event specific data. The flags + can be used to clear the count or enable and disable actions to + happen when the event occurs. + By using the SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL flag, + you can set an event to trigger a poll command to return with + POLLPRI. In this way, userspace can wait for events to occur. + +* SWITCHTEC_IOCTL_PFF_TO_PORT and SWITCHTEC_IOCTL_PORT_TO_PFF convert + between PCI Function Framework number (used by the event system) + and Switchtec Logic Port ID and Partition number (which is more + user friendly). + + +Non-Transparent Bridge (NTB) Driver +=================================== + +An NTB hardware driver is provided for the Switchtec hardware in +ntb_hw_switchtec. Currently, it only supports switches configured with +exactly 2 NT partitions and zero or more non-NT partitions. It also requires +the following configuration settings: + +* Both NT partitions must be able to access each other's GAS spaces. + Thus, the bits in the GAS Access Vector under Management Settings + must be set to support this. +* Kernel configuration MUST include support for NTB (CONFIG_NTB needs + to be set) + +NT EP BAR 2 will be dynamically configured as a Direct Window, and +the configuration file does not need to configure it explicitly. + +Please refer to Documentation/driver-api/ntb.rst in Linux source tree for an overall +understanding of the Linux NTB stack. ntb_hw_switchtec works as an NTB +Hardware Driver in this stack. diff --git a/Documentation/driver-api/sync_file.rst b/Documentation/driver-api/sync_file.rst new file mode 100644 index 0000000000..496fb2c3b3 --- /dev/null +++ b/Documentation/driver-api/sync_file.rst @@ -0,0 +1,86 @@ +=================== +Sync File API Guide +=================== + +:Author: Gustavo Padovan <gustavo at padovan dot org> + +This document serves as a guide for device drivers writers on what the +sync_file API is, and how drivers can support it. Sync file is the carrier of +the fences(struct dma_fence) that are needed to synchronize between drivers or +across process boundaries. + +The sync_file API is meant to be used to send and receive fence information +to/from userspace. It enables userspace to do explicit fencing, where instead +of attaching a fence to the buffer a producer driver (such as a GPU or V4L +driver) sends the fence related to the buffer to userspace via a sync_file. + +The sync_file then can be sent to the consumer (DRM driver for example), that +will not use the buffer for anything before the fence(s) signals, i.e., the +driver that issued the fence is not using/processing the buffer anymore, so it +signals that the buffer is ready to use. And vice-versa for the consumer -> +producer part of the cycle. + +Sync files allows userspace awareness on buffer sharing synchronization between +drivers. + +Sync file was originally added in the Android kernel but current Linux Desktop +can benefit a lot from it. + +in-fences and out-fences +------------------------ + +Sync files can go either to or from userspace. When a sync_file is sent from +the driver to userspace we call the fences it contains 'out-fences'. They are +related to a buffer that the driver is processing or is going to process, so +the driver creates an out-fence to be able to notify, through +dma_fence_signal(), when it has finished using (or processing) that buffer. +Out-fences are fences that the driver creates. + +On the other hand if the driver receives fence(s) through a sync_file from +userspace we call these fence(s) 'in-fences'. Receiving in-fences means that +we need to wait for the fence(s) to signal before using any buffer related to +the in-fences. + +Creating Sync Files +------------------- + +When a driver needs to send an out-fence userspace it creates a sync_file. + +Interface:: + + struct sync_file *sync_file_create(struct dma_fence *fence); + +The caller pass the out-fence and gets back the sync_file. That is just the +first step, next it needs to install an fd on sync_file->file. So it gets an +fd:: + + fd = get_unused_fd_flags(O_CLOEXEC); + +and installs it on sync_file->file:: + + fd_install(fd, sync_file->file); + +The sync_file fd now can be sent to userspace. + +If the creation process fail, or the sync_file needs to be released by any +other reason fput(sync_file->file) should be used. + +Receiving Sync Files from Userspace +----------------------------------- + +When userspace needs to send an in-fence to the driver it passes file descriptor +of the Sync File to the kernel. The kernel can then retrieve the fences +from it. + +Interface:: + + struct dma_fence *sync_file_get_fence(int fd); + + +The returned reference is owned by the caller and must be disposed of +afterwards using dma_fence_put(). In case of error, a NULL is returned instead. + +References: + +1. struct sync_file in include/linux/sync_file.h +2. All interfaces mentioned above defined in include/linux/sync_file.h diff --git a/Documentation/driver-api/target.rst b/Documentation/driver-api/target.rst new file mode 100644 index 0000000000..c70ca25171 --- /dev/null +++ b/Documentation/driver-api/target.rst @@ -0,0 +1,52 @@ +================================= +target and iSCSI Interfaces Guide +================================= + +Introduction and Overview +========================= + +TBD + +Target core device interfaces +============================= + +This section is blank because no kerneldoc comments have been added to +drivers/target/target_core_device.c. + +Target core transport interfaces +================================ + +.. kernel-doc:: drivers/target/target_core_transport.c + :export: + +Target-supported userspace I/O +============================== + +.. kernel-doc:: drivers/target/target_core_user.c + :doc: Userspace I/O + +.. kernel-doc:: include/uapi/linux/target_core_user.h + :doc: Ring Design + +iSCSI helper functions +====================== + +.. kernel-doc:: drivers/scsi/libiscsi.c + :export: + + +iSCSI boot information +====================== + +.. kernel-doc:: drivers/scsi/iscsi_boot_sysfs.c + :export: + +iSCSI TCP interfaces +==================== + +.. kernel-doc:: drivers/scsi/iscsi_tcp.c + :internal: + +.. kernel-doc:: drivers/scsi/libiscsi_tcp.c + :export: + diff --git a/Documentation/driver-api/thermal/cpu-cooling-api.rst b/Documentation/driver-api/thermal/cpu-cooling-api.rst new file mode 100644 index 0000000000..645d914c45 --- /dev/null +++ b/Documentation/driver-api/thermal/cpu-cooling-api.rst @@ -0,0 +1,107 @@ +======================= +CPU cooling APIs How To +======================= + +Written by Amit Daniel Kachhap <amit.kachhap@linaro.org> + +Updated: 6 Jan 2015 + +Copyright (c) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) + +0. Introduction +=============== + +The generic cpu cooling(freq clipping) provides registration/unregistration APIs +to the caller. The binding of the cooling devices to the trip point is left for +the user. The registration APIs returns the cooling device pointer. + +1. cpu cooling APIs +=================== + +1.1 cpufreq registration/unregistration APIs +-------------------------------------------- + + :: + + struct thermal_cooling_device + *cpufreq_cooling_register(struct cpumask *clip_cpus) + + This interface function registers the cpufreq cooling device with the name + "thermal-cpufreq-%x". This api can support multiple instances of cpufreq + cooling devices. + + clip_cpus: + cpumask of cpus where the frequency constraints will happen. + + :: + + struct thermal_cooling_device + *of_cpufreq_cooling_register(struct cpufreq_policy *policy) + + This interface function registers the cpufreq cooling device with + the name "thermal-cpufreq-%x" linking it with a device tree node, in + order to bind it via the thermal DT code. This api can support multiple + instances of cpufreq cooling devices. + + policy: + CPUFreq policy. + + + :: + + void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) + + This interface function unregisters the "thermal-cpufreq-%x" cooling device. + + cdev: Cooling device pointer which has to be unregistered. + +2. Power models +=============== + +The power API registration functions provide a simple power model for +CPUs. The current power is calculated as dynamic power (static power isn't +supported currently). This power model requires that the operating-points of +the CPUs are registered using the kernel's opp library and the +`cpufreq_frequency_table` is assigned to the `struct device` of the +cpu. If you are using CONFIG_CPUFREQ_DT then the +`cpufreq_frequency_table` should already be assigned to the cpu +device. + +The dynamic power consumption of a processor depends on many factors. +For a given processor implementation the primary factors are: + +- The time the processor spends running, consuming dynamic power, as + compared to the time in idle states where dynamic consumption is + negligible. Herein we refer to this as 'utilisation'. +- The voltage and frequency levels as a result of DVFS. The DVFS + level is a dominant factor governing power consumption. +- In running time the 'execution' behaviour (instruction types, memory + access patterns and so forth) causes, in most cases, a second order + variation. In pathological cases this variation can be significant, + but typically it is of a much lesser impact than the factors above. + +A high level dynamic power consumption model may then be represented as:: + + Pdyn = f(run) * Voltage^2 * Frequency * Utilisation + +f(run) here represents the described execution behaviour and its +result has a units of Watts/Hz/Volt^2 (this often expressed in +mW/MHz/uVolt^2) + +The detailed behaviour for f(run) could be modelled on-line. However, +in practice, such an on-line model has dependencies on a number of +implementation specific processor support and characterisation +factors. Therefore, in initial implementation that contribution is +represented as a constant coefficient. This is a simplification +consistent with the relative contribution to overall power variation. + +In this simplified representation our model becomes:: + + Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation + +Where `capacitance` is a constant that represents an indicative +running time dynamic power coefficient in fundamental units of +mW/MHz/uVolt^2. Typical values for mobile CPUs might lie in range +from 100 to 500. For reference, the approximate values for the SoC in +ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and +140 for the Cortex-A53 cluster. diff --git a/Documentation/driver-api/thermal/cpu-idle-cooling.rst b/Documentation/driver-api/thermal/cpu-idle-cooling.rst new file mode 100644 index 0000000000..c2a7ca6768 --- /dev/null +++ b/Documentation/driver-api/thermal/cpu-idle-cooling.rst @@ -0,0 +1,199 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +CPU Idle Cooling +================ + +Situation: +---------- + +Under certain circumstances a SoC can reach a critical temperature +limit and is unable to stabilize the temperature around a temperature +control. When the SoC has to stabilize the temperature, the kernel can +act on a cooling device to mitigate the dissipated power. When the +critical temperature is reached, a decision must be taken to reduce +the temperature, that, in turn impacts performance. + +Another situation is when the silicon temperature continues to +increase even after the dynamic leakage is reduced to its minimum by +clock gating the component. This runaway phenomenon can continue due +to the static leakage. The only solution is to power down the +component, thus dropping the dynamic and static leakage that will +allow the component to cool down. + +Last but not least, the system can ask for a specific power budget but +because of the OPP density, we can only choose an OPP with a power +budget lower than the requested one and under-utilize the CPU, thus +losing performance. In other words, one OPP under-utilizes the CPU +with a power less than the requested power budget and the next OPP +exceeds the power budget. An intermediate OPP could have been used if +it were present. + +Solutions: +---------- + +If we can remove the static and the dynamic leakage for a specific +duration in a controlled period, the SoC temperature will +decrease. Acting on the idle state duration or the idle cycle +injection period, we can mitigate the temperature by modulating the +power budget. + +The Operating Performance Point (OPP) density has a great influence on +the control precision of cpufreq, however different vendors have a +plethora of OPP density, and some have large power gap between OPPs, +that will result in loss of performance during thermal control and +loss of power in other scenarios. + +At a specific OPP, we can assume that injecting idle cycle on all CPUs +belong to the same cluster, with a duration greater than the cluster +idle state target residency, we lead to dropping the static and the +dynamic leakage for this period (modulo the energy needed to enter +this state). So the sustainable power with idle cycles has a linear +relation with the OPP’s sustainable power and can be computed with a +coefficient similar to:: + + Power(IdleCycle) = Coef x Power(OPP) + +Idle Injection: +--------------- + +The base concept of the idle injection is to force the CPU to go to an +idle state for a specified time each control cycle, it provides +another way to control CPU power and heat in addition to +cpufreq. Ideally, if all CPUs belonging to the same cluster, inject +their idle cycles synchronously, the cluster can reach its power down +state with a minimum power consumption and reduce the static leakage +to almost zero. However, these idle cycles injection will add extra +latencies as the CPUs will have to wakeup from a deep sleep state. + +We use a fixed duration of idle injection that gives an acceptable +performance penalty and a fixed latency. Mitigation can be increased +or decreased by modulating the duty cycle of the idle injection. + +:: + + ^ + | + | + |------- ------- + |_______|_______________________|_______|___________ + + <------> + idle <----------------------> + running + + <-----------------------------> + duty cycle 25% + + +The implementation of the cooling device bases the number of states on +the duty cycle percentage. When no mitigation is happening the cooling +device state is zero, meaning the duty cycle is 0%. + +When the mitigation begins, depending on the governor's policy, a +starting state is selected. With a fixed idle duration and the duty +cycle (aka the cooling device state), the running duration can be +computed. + +The governor will change the cooling device state thus the duty cycle +and this variation will modulate the cooling effect. + +:: + + ^ + | + | + |------- ------- + |_______|_______________|_______|___________ + + <------> + idle <--------------> + running + + <---------------------> + duty cycle 33% + + + ^ + | + | + |------- ------- + |_______|_______|_______|___________ + + <------> + idle <------> + running + + <-------------> + duty cycle 50% + +The idle injection duration value must comply with the constraints: + +- It is less than or equal to the latency we tolerate when the + mitigation begins. It is platform dependent and will depend on the + user experience, reactivity vs performance trade off we want. This + value should be specified. + +- It is greater than the idle state’s target residency we want to go + for thermal mitigation, otherwise we end up consuming more energy. + +Power considerations +-------------------- + +When we reach the thermal trip point, we have to sustain a specified +power for a specific temperature but at this time we consume:: + + Power = Capacitance x Voltage^2 x Frequency x Utilisation + +... which is more than the sustainable power (or there is something +wrong in the system setup). The ‘Capacitance’ and ‘Utilisation’ are a +fixed value, ‘Voltage’ and the ‘Frequency’ are fixed artificially +because we don’t want to change the OPP. We can group the +‘Capacitance’ and the ‘Utilisation’ into a single term which is the +‘Dynamic Power Coefficient (Cdyn)’ Simplifying the above, we have:: + + Pdyn = Cdyn x Voltage^2 x Frequency + +The power allocator governor will ask us somehow to reduce our power +in order to target the sustainable power defined in the device +tree. So with the idle injection mechanism, we want an average power +(Ptarget) resulting in an amount of time running at full power on a +specific OPP and idle another amount of time. That could be put in a +equation:: + + P(opp)target = ((Trunning x (P(opp)running) + (Tidle x P(opp)idle)) / + (Trunning + Tidle) + + ... + + Tidle = Trunning x ((P(opp)running / P(opp)target) - 1) + +At this point if we know the running period for the CPU, that gives us +the idle injection we need. Alternatively if we have the idle +injection duration, we can compute the running duration with:: + + Trunning = Tidle / ((P(opp)running / P(opp)target) - 1) + +Practically, if the running power is less than the targeted power, we +end up with a negative time value, so obviously the equation usage is +bound to a power reduction, hence a higher OPP is needed to have the +running power greater than the targeted power. + +However, in this demonstration we ignore three aspects: + + * The static leakage is not defined here, we can introduce it in the + equation but assuming it will be zero most of the time as it is + difficult to get the values from the SoC vendors + + * The idle state wake up latency (or entry + exit latency) is not + taken into account, it must be added in the equation in order to + rigorously compute the idle injection + + * The injected idle duration must be greater than the idle state + target residency, otherwise we end up consuming more energy and + potentially invert the mitigation effect + +So the final equation is:: + + Trunning = (Tidle - Twakeup ) x + (((P(opp)dyn + P(opp)static ) - P(opp)target) / P(opp)target ) diff --git a/Documentation/driver-api/thermal/exynos_thermal.rst b/Documentation/driver-api/thermal/exynos_thermal.rst new file mode 100644 index 0000000000..764df4ab58 --- /dev/null +++ b/Documentation/driver-api/thermal/exynos_thermal.rst @@ -0,0 +1,90 @@ +======================== +Kernel driver exynos_tmu +======================== + +Supported chips: + +* ARM Samsung Exynos4, Exynos5 series of SoC + + Datasheet: Not publicly available + +Authors: Donggeun Kim <dg77.kim@samsung.com> +Authors: Amit Daniel <amit.daniel@samsung.com> + +TMU controller Description: +--------------------------- + +This driver allows to read temperature inside Samsung Exynos4/5 series of SoC. + +The chip only exposes the measured 8-bit temperature code value +through a register. +Temperature can be taken from the temperature code. +There are three equations converting from temperature to temperature code. + +The three equations are: + 1. Two point trimming:: + + Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1 + + 2. One point trimming:: + + Tc = T + TI1 - 25 + + 3. No trimming:: + + Tc = T + 50 + + Tc: + Temperature code, T: Temperature, + TI1: + Trimming info for 25 degree Celsius (stored at TRIMINFO register) + Temperature code measured at 25 degree Celsius which is unchanged + TI2: + Trimming info for 85 degree Celsius (stored at TRIMINFO register) + Temperature code measured at 85 degree Celsius which is unchanged + +TMU(Thermal Management Unit) in Exynos4/5 generates interrupt +when temperature exceeds pre-defined levels. +The maximum number of configurable threshold is five. +The threshold levels are defined as follows:: + + Level_0: current temperature > trigger_level_0 + threshold + Level_1: current temperature > trigger_level_1 + threshold + Level_2: current temperature > trigger_level_2 + threshold + Level_3: current temperature > trigger_level_3 + threshold + +The threshold and each trigger_level are set +through the corresponding registers. + +When an interrupt occurs, this driver notify kernel thermal framework +with the function exynos_report_trigger. +Although an interrupt condition for level_0 can be set, +it can be used to synchronize the cooling action. + +TMU driver description: +----------------------- + +The exynos thermal driver is structured as:: + + Kernel Core thermal framework + (thermal_core.c, step_wise.c, cpufreq_cooling.c) + ^ + | + | + TMU configuration data -----> TMU Driver <----> Exynos Core thermal wrapper + (exynos_tmu_data.c) (exynos_tmu.c) (exynos_thermal_common.c) + (exynos_tmu_data.h) (exynos_tmu.h) (exynos_thermal_common.h) + +a) TMU configuration data: + This consist of TMU register offsets/bitfields + described through structure exynos_tmu_registers. Also several + other platform data (struct exynos_tmu_platform_data) members + are used to configure the TMU. +b) TMU driver: + This component initialises the TMU controller and sets different + thresholds. It invokes core thermal implementation with the call + exynos_report_trigger. +c) Exynos Core thermal wrapper: + This provides 3 wrapper function to use the + Kernel core thermal framework. They are exynos_unregister_thermal, + exynos_register_thermal and exynos_report_trigger. diff --git a/Documentation/driver-api/thermal/exynos_thermal_emulation.rst b/Documentation/driver-api/thermal/exynos_thermal_emulation.rst new file mode 100644 index 0000000000..c21d10838b --- /dev/null +++ b/Documentation/driver-api/thermal/exynos_thermal_emulation.rst @@ -0,0 +1,61 @@ +===================== +Exynos Emulation Mode +===================== + +Copyright (C) 2012 Samsung Electronics + +Written by Jonghwa Lee <jonghwa3.lee@samsung.com> + +Description +----------- + +Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal +management unit. Thermal emulation mode supports software debug for +TMU's operation. User can set temperature manually with software code +and TMU will read current temperature from user value not from sensor's +value. + +Enabling CONFIG_THERMAL_EMULATION option will make this support +available. When it's enabled, sysfs node will be created as +/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp. + +The sysfs node, 'emul_node', will contain value 0 for the initial state. +When you input any temperature you want to update to sysfs node, it +automatically enable emulation mode and current temperature will be +changed into it. + +(Exynos also supports user changeable delay time which would be used to +delay of changing temperature. However, this node only uses same delay +of real sensing time, 938us.) + +Exynos emulation mode requires synchronous of value changing and +enabling. It means when you want to update the any value of delay or +next temperature, then you have to enable emulation mode at the same +time. (Or you have to keep the mode enabling.) If you don't, it fails to +change the value to updated one and just use last succeessful value +repeatedly. That's why this node gives users the right to change +termerpature only. Just one interface makes it more simply to use. + +Disabling emulation mode only requires writing value 0 to sysfs node. + +:: + + + TEMP 120 | + | + 100 | + | + 80 | + | +----------- + 60 | | | + | +-------------| | + 40 | | | | + | | | | + 20 | | | +---------- + | | | | | + 0 |______________|_____________|__________|__________|_________ + A A A A TIME + |<----->| |<----->| |<----->| | + | 938us | | | | | | + emulation : 0 50 | 70 | 20 | 0 + current temp: sensor 50 70 20 sensor diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst new file mode 100644 index 0000000000..a886028014 --- /dev/null +++ b/Documentation/driver-api/thermal/index.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======= +Thermal +======= + +.. toctree:: + :maxdepth: 1 + + cpu-cooling-api + cpu-idle-cooling + sysfs-api + power_allocator + + exynos_thermal + exynos_thermal_emulation + nouveau_thermal + x86_pkg_temperature_thermal + intel_dptf diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst new file mode 100644 index 0000000000..9ab4316322 --- /dev/null +++ b/Documentation/driver-api/thermal/intel_dptf.rst @@ -0,0 +1,317 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================================== +Intel(R) Dynamic Platform and Thermal Framework Sysfs Interface +=============================================================== + +:Copyright: © 2022 Intel Corporation + +:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> + +Introduction +------------ + +Intel(R) Dynamic Platform and Thermal Framework (DPTF) is a platform +level hardware/software solution for power and thermal management. + +As a container for multiple power/thermal technologies, DPTF provides +a coordinated approach for different policies to effect the hardware +state of a system. + +Since it is a platform level framework, this has several components. +Some parts of the technology is implemented in the firmware and uses +ACPI and PCI devices to expose various features for monitoring and +control. Linux has a set of kernel drivers exposing hardware interface +to user space. This allows user space thermal solutions like +"Linux Thermal Daemon" to read platform specific thermal and power +tables to deliver adequate performance while keeping the system under +thermal limits. + +DPTF ACPI Drivers interface +---------------------------- + +:file:`/sys/bus/platform/devices/<N>/uuids`, where <N> +=INT3400|INTC1040|INTC1041|INTC10A0 + +``available_uuids`` (RO) + A set of UUIDs strings presenting available policies + which should be notified to the firmware when the + user space can support those policies. + + UUID strings: + + "42A441D6-AE6A-462b-A84B-4A8CE79027D3" : Passive 1 + + "3A95C389-E4B8-4629-A526-C52C88626BAE" : Active + + "97C68AE7-15FA-499c-B8C9-5DA81D606E0A" : Critical + + "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D" : Adaptive performance + + "5349962F-71E6-431D-9AE8-0A635B710AEE" : Emergency call + + "9E04115A-AE87-4D1C-9500-0F3E340BFE75" : Passive 2 + + "F5A35014-C209-46A4-993A-EB56DE7530A1" : Power Boss + + "6ED722A7-9240-48A5-B479-31EEF723D7CF" : Virtual Sensor + + "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531" : Cooling mode + + "BE84BABF-C4D4-403D-B495-3128FD44dAC1" : HDC + +``current_uuid`` (RW) + User space can write strings from available UUIDs, one at a + time. + +:file:`/sys/bus/platform/devices/<N>/`, where <N> +=INT3400|INTC1040|INTC1041|INTC10A0 + +``imok`` (WO) + User space daemon write 1 to respond to firmware event + for sending keep alive notification. User space receives + THERMAL_EVENT_KEEP_ALIVE kobject uevent notification when + firmware calls for user space to respond with imok ACPI + method. + +``odvp*`` (RO) + Firmware thermal status variable values. Thermal tables + calls for different processing based on these variable + values. + +``data_vault`` (RO) + Binary thermal table. Refer to + https:/github.com/intel/thermal_daemon for decoding + thermal table. + +``production_mode`` (RO) + When different from zero, manufacturer locked thermal configuration + from further changes. + +ACPI Thermal Relationship table interface +------------------------------------------ + +:file:`/dev/acpi_thermal_rel` + + This device provides IOCTL interface to read standard ACPI + thermal relationship tables via ACPI methods _TRT and _ART. + These IOCTLs are defined in + drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h + + IOCTLs: + + ACPI_THERMAL_GET_TRT_LEN: Get length of TRT table + + ACPI_THERMAL_GET_ART_LEN: Get length of ART table + + ACPI_THERMAL_GET_TRT_COUNT: Number of records in TRT table + + ACPI_THERMAL_GET_ART_COUNT: Number of records in ART table + + ACPI_THERMAL_GET_TRT: Read binary TRT table, length to read is + provided via argument to ioctl(). + + ACPI_THERMAL_GET_ART: Read binary ART table, length to read is + provided via argument to ioctl(). + +DPTF ACPI Sensor drivers +------------------------- + +DPTF Sensor drivers are presented as standard thermal sysfs thermal_zone. + + +DPTF ACPI Cooling drivers +-------------------------- + +DPTF cooling drivers are presented as standard thermal sysfs cooling_device. + + +DPTF Processor thermal PCI Driver interface +-------------------------------------------- + +:file:`/sys/bus/pci/devices/0000\:00\:04.0/power_limits/` + +Refer to Documentation/power/powercap/powercap.rst for powercap +ABI. + +``power_limit_0_max_uw`` (RO) + Maximum powercap sysfs constraint_0_power_limit_uw for Intel RAPL + +``power_limit_0_step_uw`` (RO) + Power limit increment/decrements for Intel RAPL constraint 0 power limit + +``power_limit_0_min_uw`` (RO) + Minimum powercap sysfs constraint_0_power_limit_uw for Intel RAPL + +``power_limit_0_tmin_us`` (RO) + Minimum powercap sysfs constraint_0_time_window_us for Intel RAPL + +``power_limit_0_tmax_us`` (RO) + Maximum powercap sysfs constraint_0_time_window_us for Intel RAPL + +``power_limit_1_max_uw`` (RO) + Maximum powercap sysfs constraint_1_power_limit_uw for Intel RAPL + +``power_limit_1_step_uw`` (RO) + Power limit increment/decrements for Intel RAPL constraint 1 power limit + +``power_limit_1_min_uw`` (RO) + Minimum powercap sysfs constraint_1_power_limit_uw for Intel RAPL + +``power_limit_1_tmin_us`` (RO) + Minimum powercap sysfs constraint_1_time_window_us for Intel RAPL + +``power_limit_1_tmax_us`` (RO) + Maximum powercap sysfs constraint_1_time_window_us for Intel RAPL + +:file:`/sys/bus/pci/devices/0000\:00\:04.0/` + +``tcc_offset_degree_celsius`` (RW) + TCC offset from the critical temperature where hardware will throttle + CPU. + +:file:`/sys/bus/pci/devices/0000\:00\:04.0/workload_request` + +``workload_available_types`` (RO) + Available workload types. User space can specify one of the workload type + it is currently executing via workload_type. For example: idle, bursty, + sustained etc. + +``workload_type`` (RW) + User space can specify any one of the available workload type using + this interface. + +DPTF Processor thermal RFIM interface +-------------------------------------------- + +RFIM interface allows adjustment of FIVR (Fully Integrated Voltage Regulator), +DDR (Double Data Rate) and DLVR (Digital Linear Voltage Regulator) +frequencies to avoid RF interference with WiFi and 5G. + +Switching voltage regulators (VR) generate radiated EMI or RFI at the +fundamental frequency and its harmonics. Some harmonics may interfere +with very sensitive wireless receivers such as Wi-Fi and cellular that +are integrated into host systems like notebook PCs. One of mitigation +methods is requesting SOC integrated VR (IVR) switching frequency to a +small % and shift away the switching noise harmonic interference from +radio channels. OEM or ODMs can use the driver to control SOC IVR +operation within the range where it does not impact IVR performance. + +Some products use DLVR instead of FIVR as switching voltage regulator. +In this case attributes of DLVR must be adjusted instead of FIVR. + +While shifting the frequencies additional clock noise can be introduced, +which is compensated by adjusting Spread spectrum percent. This helps +to reduce the clock noise to meet regulatory compliance. This spreading +% increases bandwidth of signal transmission and hence reduces the +effects of interference, noise and signal fading. + +DRAM devices of DDR IO interface and their power plane can generate EMI +at the data rates. Similar to IVR control mechanism, Intel offers a +mechanism by which DDR data rates can be changed if several conditions +are met: there is strong RFI interference because of DDR; CPU power +management has no other restriction in changing DDR data rates; +PC ODMs enable this feature (real time DDR RFI Mitigation referred to as +DDR-RFIM) for Wi-Fi from BIOS. + + +FIVR attributes + +:file:`/sys/bus/pci/devices/0000\:00\:04.0/fivr/` + +``vco_ref_code_lo`` (RW) + The VCO reference code is an 11-bit field and controls the FIVR + switching frequency. This is the 3-bit LSB field. + +``vco_ref_code_hi`` (RW) + The VCO reference code is an 11-bit field and controls the FIVR + switching frequency. This is the 8-bit MSB field. + +``spread_spectrum_pct`` (RW) + Set the FIVR spread spectrum clocking percentage + +``spread_spectrum_clk_enable`` (RW) + Enable/disable of the FIVR spread spectrum clocking feature + +``rfi_vco_ref_code`` (RW) + This field is a read only status register which reflects the + current FIVR switching frequency + +``fivr_fffc_rev`` (RW) + This field indicated the revision of the FIVR HW. + + +DVFS attributes + +:file:`/sys/bus/pci/devices/0000\:00\:04.0/dvfs/` + +``rfi_restriction_run_busy`` (RW) + Request the restriction of specific DDR data rate and set this + value 1. Self reset to 0 after operation. + +``rfi_restriction_err_code`` (RW) + 0 :Request is accepted, 1:Feature disabled, + 2: the request restricts more points than it is allowed + +``rfi_restriction_data_rate_Delta`` (RW) + Restricted DDR data rate for RFI protection: Lower Limit + +``rfi_restriction_data_rate_Base`` (RW) + Restricted DDR data rate for RFI protection: Upper Limit + +``ddr_data_rate_point_0`` (RO) + DDR data rate selection 1st point + +``ddr_data_rate_point_1`` (RO) + DDR data rate selection 2nd point + +``ddr_data_rate_point_2`` (RO) + DDR data rate selection 3rd point + +``ddr_data_rate_point_3`` (RO) + DDR data rate selection 4th point + +``rfi_disable (RW)`` + Disable DDR rate change feature + +DLVR attributes + +:file:`/sys/bus/pci/devices/0000\:00\:04.0/dlvr/` + +``dlvr_hardware_rev`` (RO) + DLVR hardware revision. + +``dlvr_freq_mhz`` (RO) + Current DLVR PLL frequency in MHz. + +``dlvr_freq_select`` (RW) + Sets DLVR PLL clock frequency. Once set, and enabled via + dlvr_rfim_enable, the dlvr_freq_mhz will show the current + DLVR PLL frequency. + +``dlvr_pll_busy`` (RO) + PLL can't accept frequency change when set. + +``dlvr_rfim_enable`` (RW) + 0: Disable RF frequency hopping, 1: Enable RF frequency hopping. + +``dlvr_spread_spectrum_pct`` (RW) + Sets DLVR spread spectrum percent value. + +``dlvr_control_mode`` (RW) + Specifies how frequencies are spread using spread spectrum. + 0: Down spread, + 1: Spread in the Center. + +``dlvr_control_lock`` (RW) + 1: future writes are ignored. + +DPTF Power supply and Battery Interface +---------------------------------------- + +Refer to Documentation/ABI/testing/sysfs-platform-dptf + +DPTF Fan Control +---------------------------------------- + +Refer to Documentation/admin-guide/acpi/fan_performance_states.rst diff --git a/Documentation/driver-api/thermal/nouveau_thermal.rst b/Documentation/driver-api/thermal/nouveau_thermal.rst new file mode 100644 index 0000000000..aa10db6df3 --- /dev/null +++ b/Documentation/driver-api/thermal/nouveau_thermal.rst @@ -0,0 +1,96 @@ +===================== +Kernel driver nouveau +===================== + +Supported chips: + +* NV43+ + +Authors: Martin Peres (mupuf) <martin.peres@free.fr> + +Description +----------- + +This driver allows to read the GPU core temperature, drive the GPU fan and +set temperature alarms. + +Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau +cannot access any of the i2c external monitoring chips it may find. If you +have one of those, temperature and/or fan management through Nouveau's HWMON +interface is likely not to work. This document may then not cover your situation +entirely. + +Temperature management +---------------------- + +Temperature is exposed under as a read-only HWMON attribute temp1_input. + +In order to protect the GPU from overheating, Nouveau supports 4 configurable +temperature thresholds: + + * Fan_boost: + Fan speed is set to 100% when reaching this temperature; + * Downclock: + The GPU will be downclocked to reduce its power dissipation; + * Critical: + The GPU is put on hold to further lower power dissipation; + * Shutdown: + Shut the computer down to protect your GPU. + +WARNING: + Some of these thresholds may not be used by Nouveau depending + on your chipset. + +The default value for these thresholds comes from the GPU's vbios. These +thresholds can be configured thanks to the following HWMON attributes: + + * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst; + * Downclock: temp1_max and temp1_max_hyst; + * Critical: temp1_crit and temp1_crit_hyst; + * Shutdown: temp1_emergency and temp1_emergency_hyst. + +NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget +to multiply! + +Fan management +-------------- + +Not all cards have a drivable fan. If you do, then the following HWMON +attributes should be available: + + * pwm1_enable: + Current fan management mode (NONE, MANUAL or AUTO); + * pwm1: + Current PWM value (power percentage); + * pwm1_min: + The minimum PWM speed allowed; + * pwm1_max: + The maximum PWM speed allowed (bypassed when hitting Fan_boost); + +You may also have the following attribute: + + * fan1_input: + Speed in RPM of your fan. + +Your fan can be driven in different modes: + + * 0: The fan is left untouched; + * 1: The fan can be driven in manual (use pwm1 to change the speed); + * 2; The fan is driven automatically depending on the temperature. + +NOTE: + Be sure to use the manual mode if you want to drive the fan speed manually + +NOTE2: + When operating in manual mode outside the vbios-defined + [PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate + depending on your hardware. + +Bug reports +----------- + +Thermal management on Nouveau is new and may not work on all cards. If you have +inquiries, please ping mupuf on IRC (#nouveau, OFTC). + +Bug reports should be filled on Freedesktop's bug tracker. Please follow +https://nouveau.freedesktop.org/wiki/Bugs diff --git a/Documentation/driver-api/thermal/power_allocator.rst b/Documentation/driver-api/thermal/power_allocator.rst new file mode 100644 index 0000000000..aa5f66552d --- /dev/null +++ b/Documentation/driver-api/thermal/power_allocator.rst @@ -0,0 +1,281 @@ +================================= +Power allocator governor tunables +================================= + +Trip points +----------- + +The governor works optimally with the following two passive trip points: + +1. "switch on" trip point: temperature above which the governor + control loop starts operating. This is the first passive trip + point of the thermal zone. + +2. "desired temperature" trip point: it should be higher than the + "switch on" trip point. This the target temperature the governor + is controlling for. This is the last passive trip point of the + thermal zone. + +PID Controller +-------------- + +The power allocator governor implements a +Proportional-Integral-Derivative controller (PID controller) with +temperature as the control input and power as the controlled output: + + P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power + +where + - e = desired_temperature - current_temperature + - err_integral is the sum of previous errors + - diff_err = e - previous_error + +It is similar to the one depicted below:: + + k_d + | + current_temp | + | v + | +----------+ +---+ + | +----->| diff_err |-->| X |------+ + | | +----------+ +---+ | + | | | tdp actor + | | k_i | | get_requested_power() + | | | | | | | + | | | | | | | ... + v | v v v v v + +---+ | +-------+ +---+ +---+ +---+ +----------+ + | S |-----+----->| sum e |----->| X |--->| S |-->| S |-->|power | + +---+ | +-------+ +---+ +---+ +---+ |allocation| + ^ | ^ +----------+ + | | | | | + | | +---+ | | | + | +------->| X |-------------------+ v v + | +---+ granted performance + desired_temperature ^ + | + | + k_po/k_pu + +Sustainable power +----------------- + +An estimate of the sustainable dissipatable power (in mW) should be +provided while registering the thermal zone. This estimates the +sustained power that can be dissipated at the desired control +temperature. This is the maximum sustained power for allocation at +the desired maximum temperature. The actual sustained power can vary +for a number of reasons. The closed loop controller will take care of +variations such as environmental conditions, and some factors related +to the speed-grade of the silicon. `sustainable_power` is therefore +simply an estimate, and may be tuned to affect the aggressiveness of +the thermal ramp. For reference, the sustainable power of a 4" phone +is typically 2000mW, while on a 10" tablet is around 4500mW (may vary +depending on screen size). It is possible to have the power value +expressed in an abstract scale. The sustained power should be aligned +to the scale used by the related cooling devices. + +If you are using device tree, do add it as a property of the +thermal-zone. For example:: + + thermal-zones { + soc_thermal { + polling-delay = <1000>; + polling-delay-passive = <100>; + sustainable-power = <2500>; + ... + +Instead, if the thermal zone is registered from the platform code, pass a +`thermal_zone_params` that has a `sustainable_power`. If no +`thermal_zone_params` were being passed, then something like below +will suffice:: + + static const struct thermal_zone_params tz_params = { + .sustainable_power = 3500, + }; + +and then pass `tz_params` as the 5th parameter to +`thermal_zone_device_register()` + +k_po and k_pu +------------- + +The implementation of the PID controller in the power allocator +thermal governor allows the configuration of two proportional term +constants: `k_po` and `k_pu`. `k_po` is the proportional term +constant during temperature overshoot periods (current temperature is +above "desired temperature" trip point). Conversely, `k_pu` is the +proportional term constant during temperature undershoot periods +(current temperature below "desired temperature" trip point). + +These controls are intended as the primary mechanism for configuring +the permitted thermal "ramp" of the system. For instance, a lower +`k_pu` value will provide a slower ramp, at the cost of capping +available capacity at a low temperature. On the other hand, a high +value of `k_pu` will result in the governor granting very high power +while temperature is low, and may lead to temperature overshooting. + +The default value for `k_pu` is:: + + 2 * sustainable_power / (desired_temperature - switch_on_temp) + +This means that at `switch_on_temp` the output of the controller's +proportional term will be 2 * `sustainable_power`. The default value +for `k_po` is:: + + sustainable_power / (desired_temperature - switch_on_temp) + +Focusing on the proportional and feed forward values of the PID +controller equation we have:: + + P_max = k_p * e + sustainable_power + +The proportional term is proportional to the difference between the +desired temperature and the current one. When the current temperature +is the desired one, then the proportional component is zero and +`P_max` = `sustainable_power`. That is, the system should operate in +thermal equilibrium under constant load. `sustainable_power` is only +an estimate, which is the reason for closed-loop control such as this. + +Expanding `k_pu` we get:: + + P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) + + sustainable_power + +where: + + - T_set is the desired temperature + - T is the current temperature + - T_on is the switch on temperature + +When the current temperature is the switch_on temperature, the above +formula becomes:: + + P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) + + sustainable_power = 2 * sustainable_power + sustainable_power = + 3 * sustainable_power + +Therefore, the proportional term alone linearly decreases power from +3 * `sustainable_power` to `sustainable_power` as the temperature +rises from the switch on temperature to the desired temperature. + +k_i and integral_cutoff +----------------------- + +`k_i` configures the PID loop's integral term constant. This term +allows the PID controller to compensate for long term drift and for +the quantized nature of the output control: cooling devices can't set +the exact power that the governor requests. When the temperature +error is below `integral_cutoff`, errors are accumulated in the +integral term. This term is then multiplied by `k_i` and the result +added to the output of the controller. Typically `k_i` is set low (1 +or 2) and `integral_cutoff` is 0. + +k_d +--- + +`k_d` configures the PID loop's derivative term constant. It's +recommended to leave it as the default: 0. + +Cooling device power API +======================== + +Cooling devices controlled by this governor must supply the additional +"power" API in their `cooling_device_ops`. It consists on three ops: + +1. :: + + int get_requested_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *power); + + +@cdev: + The `struct thermal_cooling_device` pointer +@tz: + thermal zone in which we are currently operating +@power: + pointer in which to store the calculated power + +`get_requested_power()` calculates the power requested by the device +in milliwatts and stores it in @power . It should return 0 on +success, -E* on failure. This is currently used by the power +allocator governor to calculate how much power to give to each cooling +device. + +2. :: + + int state2power(struct thermal_cooling_device *cdev, struct + thermal_zone_device *tz, unsigned long state, + u32 *power); + +@cdev: + The `struct thermal_cooling_device` pointer +@tz: + thermal zone in which we are currently operating +@state: + A cooling device state +@power: + pointer in which to store the equivalent power + +Convert cooling device state @state into power consumption in +milliwatts and store it in @power. It should return 0 on success, -E* +on failure. This is currently used by thermal core to calculate the +maximum power that an actor can consume. + +3. :: + + int power2state(struct thermal_cooling_device *cdev, u32 power, + unsigned long *state); + +@cdev: + The `struct thermal_cooling_device` pointer +@power: + power in milliwatts +@state: + pointer in which to store the resulting state + +Calculate a cooling device state that would make the device consume at +most @power mW and store it in @state. It should return 0 on success, +-E* on failure. This is currently used by the thermal core to convert +a given power set by the power allocator governor to a state that the +cooling device can set. It is a function because this conversion may +depend on external factors that may change so this function should the +best conversion given "current circumstances". + +Cooling device weights +---------------------- + +Weights are a mechanism to bias the allocation among cooling +devices. They express the relative power efficiency of different +cooling devices. Higher weight can be used to express higher power +efficiency. Weighting is relative such that if each cooling device +has a weight of one they are considered equal. This is particularly +useful in heterogeneous systems where two cooling devices may perform +the same kind of compute, but with different efficiency. For example, +a system with two different types of processors. + +If the thermal zone is registered using +`thermal_zone_device_register()` (i.e., platform code), then weights +are passed as part of the thermal zone's `thermal_bind_parameters`. +If the platform is registered using device tree, then they are passed +as the `contribution` property of each map in the `cooling-maps` node. + +Limitations of the power allocator governor +=========================================== + +The power allocator governor's PID controller works best if there is a +periodic tick. If you have a driver that calls +`thermal_zone_device_update()` (or anything that ends up calling the +governor's `throttle()` function) repetitively, the governor response +won't be very good. Note that this is not particular to this +governor, step-wise will also misbehave if you call its throttle() +faster than the normal thermal framework tick (due to interrupts for +example) as it will overreact. + +Energy Model requirements +========================= + +Another important thing is the consistent scale of the power values +provided by the cooling devices. All of the cooling devices in a single +thermal zone should have power values reported either in milli-Watts +or scaled to the same 'abstract scale'. diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst new file mode 100644 index 0000000000..6c1175c6af --- /dev/null +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -0,0 +1,495 @@ +=================================== +Generic Thermal Sysfs driver How To +=================================== + +Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> + +Updated: 2 January 2008 + +Copyright (c) 2008 Intel Corporation + + +0. Introduction +=============== + +The generic thermal sysfs provides a set of interfaces for thermal zone +devices (sensors) and thermal cooling devices (fan, processor...) to register +with the thermal management solution and to be a part of it. + +This how-to focuses on enabling new thermal zone and cooling devices to +participate in thermal management. +This solution is platform independent and any type of thermal zone devices +and cooling devices should be able to make use of the infrastructure. + +The main task of the thermal sysfs driver is to expose thermal zone attributes +as well as cooling device attributes to the user space. +An intelligent thermal management application can make decisions based on +inputs from thermal zone attributes (the current temperature and trip point +temperature) and throttle appropriate devices. + +- `[0-*]` denotes any positive number starting from 0 +- `[1-*]` denotes any positive number starting from 1 + +1. thermal sysfs driver interface functions +=========================================== + +1.1 thermal zone device interface +--------------------------------- + + :: + + struct thermal_zone_device + *thermal_zone_device_register(char *type, + int trips, int mask, void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay)) + + This interface function adds a new thermal zone device (sensor) to + /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the + thermal cooling devices registered at the same time. + + type: + the thermal zone type. + trips: + the total number of trip points this thermal zone supports. + mask: + Bit string: If 'n'th bit is set, then trip point 'n' is writable. + devdata: + device private data + ops: + thermal zone device call-backs. + + .bind: + bind the thermal zone device with a thermal cooling device. + .unbind: + unbind the thermal zone device with a thermal cooling device. + .get_temp: + get the current temperature of the thermal zone. + .set_trips: + set the trip points window. Whenever the current temperature + is updated, the trip points immediately below and above the + current temperature are found. + .get_mode: + get the current mode (enabled/disabled) of the thermal zone. + + - "enabled" means the kernel thermal management is + enabled. + - "disabled" will prevent kernel thermal driver action + upon trip points so that user applications can take + charge of thermal management. + .set_mode: + set the mode (enabled/disabled) of the thermal zone. + .get_trip_type: + get the type of certain trip point. + .get_trip_temp: + get the temperature above which the certain trip point + will be fired. + .set_emul_temp: + set the emulation temperature which helps in debugging + different threshold temperature points. + tzp: + thermal zone platform parameters. + passive_delay: + number of milliseconds to wait between polls when + performing passive cooling. + polling_delay: + number of milliseconds to wait between polls when checking + whether trip points have been crossed (0 for interrupt driven systems). + + :: + + void thermal_zone_device_unregister(struct thermal_zone_device *tz) + + This interface function removes the thermal zone device. + It deletes the corresponding entry from /sys/class/thermal folder and + unbinds all the thermal cooling devices it uses. + + :: + + struct thermal_zone_device + *thermal_zone_of_sensor_register(struct device *dev, int sensor_id, + void *data, + const struct thermal_zone_of_device_ops *ops) + + This interface adds a new sensor to a DT thermal zone. + This function will search the list of thermal zones described in + device tree and look for the zone that refer to the sensor device + pointed by dev->of_node as temperature providers. For the zone + pointing to the sensor node, the sensor will be added to the DT + thermal zone device. + + The parameters for this interface are: + + dev: + Device node of sensor containing valid node pointer in + dev->of_node. + sensor_id: + a sensor identifier, in case the sensor IP has more + than one sensors + data: + a private pointer (owned by the caller) that will be + passed back, when a temperature reading is needed. + ops: + `struct thermal_zone_of_device_ops *`. + + ============== ======================================= + get_temp a pointer to a function that reads the + sensor temperature. This is mandatory + callback provided by sensor driver. + set_trips a pointer to a function that sets a + temperature window. When this window is + left the driver must inform the thermal + core via thermal_zone_device_update. + get_trend a pointer to a function that reads the + sensor temperature trend. + set_emul_temp a pointer to a function that sets + sensor emulated temperature. + ============== ======================================= + + The thermal zone temperature is provided by the get_temp() function + pointer of thermal_zone_of_device_ops. When called, it will + have the private pointer @data back. + + It returns error pointer if fails otherwise valid thermal zone device + handle. Caller should check the return handle with IS_ERR() for finding + whether success or not. + + :: + + void thermal_zone_of_sensor_unregister(struct device *dev, + struct thermal_zone_device *tzd) + + This interface unregisters a sensor from a DT thermal zone which was + successfully added by interface thermal_zone_of_sensor_register(). + This function removes the sensor callbacks and private data from the + thermal zone device registered with thermal_zone_of_sensor_register() + interface. It will also silent the zone by remove the .get_temp() and + get_trend() thermal zone device callbacks. + + :: + + struct thermal_zone_device + *devm_thermal_zone_of_sensor_register(struct device *dev, + int sensor_id, + void *data, + const struct thermal_zone_of_device_ops *ops) + + This interface is resource managed version of + thermal_zone_of_sensor_register(). + + All details of thermal_zone_of_sensor_register() described in + section 1.1.3 is applicable here. + + The benefit of using this interface to register sensor is that it + is not require to explicitly call thermal_zone_of_sensor_unregister() + in error path or during driver unbinding as this is done by driver + resource manager. + + :: + + void devm_thermal_zone_of_sensor_unregister(struct device *dev, + struct thermal_zone_device *tzd) + + This interface is resource managed version of + thermal_zone_of_sensor_unregister(). + All details of thermal_zone_of_sensor_unregister() described in + section 1.1.4 is applicable here. + Normally this function will not need to be called and the resource + management code will ensure that the resource is freed. + + :: + + int thermal_zone_get_slope(struct thermal_zone_device *tz) + + This interface is used to read the slope attribute value + for the thermal zone device, which might be useful for platform + drivers for temperature calculations. + + :: + + int thermal_zone_get_offset(struct thermal_zone_device *tz) + + This interface is used to read the offset attribute value + for the thermal zone device, which might be useful for platform + drivers for temperature calculations. + +1.2 thermal cooling device interface +------------------------------------ + + + :: + + struct thermal_cooling_device + *thermal_cooling_device_register(char *name, + void *devdata, struct thermal_cooling_device_ops *) + + This interface function adds a new thermal cooling device (fan/processor/...) + to /sys/class/thermal/ folder as `cooling_device[0-*]`. It tries to bind itself + to all the thermal zone devices registered at the same time. + + name: + the cooling device name. + devdata: + device private data. + ops: + thermal cooling devices call-backs. + + .get_max_state: + get the Maximum throttle state of the cooling device. + .get_cur_state: + get the Currently requested throttle state of the + cooling device. + .set_cur_state: + set the Current throttle state of the cooling device. + + :: + + void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) + + This interface function removes the thermal cooling device. + It deletes the corresponding entry from /sys/class/thermal folder and + unbinds itself from all the thermal zone devices using it. + +1.3 interface for binding a thermal zone device with a thermal cooling device +----------------------------------------------------------------------------- + + :: + + int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, unsigned int weight); + + This interface function binds a thermal cooling device to a particular trip + point of a thermal zone device. + + This function is usually called in the thermal zone device .bind callback. + + tz: + the thermal zone device + cdev: + thermal cooling device + trip: + indicates which trip point in this thermal zone the cooling device + is associated with. + upper: + the Maximum cooling state for this trip point. + THERMAL_NO_LIMIT means no upper limit, + and the cooling device can be in max_state. + lower: + the Minimum cooling state can be used for this trip point. + THERMAL_NO_LIMIT means no lower limit, + and the cooling device can be in cooling state 0. + weight: + the influence of this cooling device in this thermal + zone. See 1.4.1 below for more information. + + :: + + int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev); + + This interface function unbinds a thermal cooling device from a particular + trip point of a thermal zone device. This function is usually called in + the thermal zone device .unbind callback. + + tz: + the thermal zone device + cdev: + thermal cooling device + trip: + indicates which trip point in this thermal zone the cooling device + is associated with. + +1.4 Thermal Zone Parameters +--------------------------- + + :: + + struct thermal_zone_params + + This structure defines the platform level parameters for a thermal zone. + This data, for each thermal zone should come from the platform layer. + This is an optional feature where some platforms can choose not to + provide this data. + + .governor_name: + Name of the thermal governor used for this zone + .no_hwmon: + a boolean to indicate if the thermal to hwmon sysfs interface + is required. when no_hwmon == false, a hwmon sysfs interface + will be created. when no_hwmon == true, nothing will be done. + In case the thermal_zone_params is NULL, the hwmon interface + will be created (for backward compatibility). + +2. sysfs attributes structure +============================= + +== ================ +RO read only value +WO write only value +RW read/write value +== ================ + +Thermal sysfs attributes will be represented under /sys/class/thermal. +Hwmon sysfs I/F extension is also available under /sys/class/hwmon +if hwmon is compiled in or built as a module. + +Thermal zone device sys I/F, created once it's registered:: + + /sys/class/thermal/thermal_zone[0-*]: + |---type: Type of the thermal zone + |---temp: Current temperature + |---mode: Working mode of the thermal zone + |---policy: Thermal governor used for this zone + |---available_policies: Available thermal governors for this zone + |---trip_point_[0-*]_temp: Trip point temperature + |---trip_point_[0-*]_type: Trip point type + |---trip_point_[0-*]_hyst: Hysteresis value for this trip point + |---emul_temp: Emulated temperature set node + |---sustainable_power: Sustainable dissipatable power + |---k_po: Proportional term during temperature overshoot + |---k_pu: Proportional term during temperature undershoot + |---k_i: PID's integral term in the power allocator gov + |---k_d: PID's derivative term in the power allocator + |---integral_cutoff: Offset above which errors are accumulated + |---slope: Slope constant applied as linear extrapolation + |---offset: Offset constant applied as linear extrapolation + +Thermal cooling device sys I/F, created once it's registered:: + + /sys/class/thermal/cooling_device[0-*]: + |---type: Type of the cooling device(processor/fan/...) + |---max_state: Maximum cooling state of the cooling device + |---cur_state: Current cooling state of the cooling device + |---stats: Directory containing cooling device's statistics + |---stats/reset: Writing any value resets the statistics + |---stats/time_in_state_ms: Time (msec) spent in various cooling states + |---stats/total_trans: Total number of times cooling state is changed + |---stats/trans_table: Cooling state transition table + + +Then next two dynamic attributes are created/removed in pairs. They represent +the relationship between a thermal zone and its associated cooling device. +They are created/removed for each successful execution of +thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device. + +:: + + /sys/class/thermal/thermal_zone[0-*]: + |---cdev[0-*]: [0-*]th cooling device in current thermal zone + |---cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with + |---cdev[0-*]_weight: Influence of the cooling device in + this thermal zone + +Besides the thermal zone device sysfs I/F and cooling device sysfs I/F, +the generic thermal driver also creates a hwmon sysfs I/F for each _type_ +of thermal zone device. E.g. the generic thermal driver registers one hwmon +class device and build the associated hwmon sysfs I/F for all the registered +ACPI thermal zones. + +Please read Documentation/ABI/testing/sysfs-class-thermal for thermal +zone and cooling device attribute details. + +:: + + /sys/class/hwmon/hwmon[0-*]: + |---name: The type of the thermal zone devices + |---temp[1-*]_input: The current temperature of thermal zone [1-*] + |---temp[1-*]_critical: The critical trip point of thermal zone [1-*] + +Please read Documentation/hwmon/sysfs-interface.rst for additional information. + +3. A simple implementation +========================== + +ACPI thermal zone may support multiple trip points like critical, hot, +passive, active. If an ACPI thermal zone supports critical, passive, +active[0] and active[1] at the same time, it may register itself as a +thermal_zone_device (thermal_zone1) with 4 trip points in all. +It has one processor and one fan, which are both registered as +thermal_cooling_device. Both are considered to have the same +effectiveness in cooling the thermal zone. + +If the processor is listed in _PSL method, and the fan is listed in _AL0 +method, the sys I/F structure will be built like this:: + + /sys/class/thermal: + |thermal_zone1: + |---type: acpitz + |---temp: 37000 + |---mode: enabled + |---policy: step_wise + |---available_policies: step_wise fair_share + |---trip_point_0_temp: 100000 + |---trip_point_0_type: critical + |---trip_point_1_temp: 80000 + |---trip_point_1_type: passive + |---trip_point_2_temp: 70000 + |---trip_point_2_type: active0 + |---trip_point_3_temp: 60000 + |---trip_point_3_type: active1 + |---cdev0: --->/sys/class/thermal/cooling_device0 + |---cdev0_trip_point: 1 /* cdev0 can be used for passive */ + |---cdev0_weight: 1024 + |---cdev1: --->/sys/class/thermal/cooling_device3 + |---cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ + |---cdev1_weight: 1024 + + |cooling_device0: + |---type: Processor + |---max_state: 8 + |---cur_state: 0 + + |cooling_device3: + |---type: Fan + |---max_state: 2 + |---cur_state: 0 + + /sys/class/hwmon: + |hwmon0: + |---name: acpitz + |---temp1_input: 37000 + |---temp1_crit: 100000 + +4. Export Symbol APIs +===================== + +4.1. get_tz_trend +----------------- + +This function returns the trend of a thermal zone, i.e the rate of change +of temperature of the thermal zone. Ideally, the thermal sensor drivers +are supposed to implement the callback. If they don't, the thermal +framework calculated the trend by comparing the previous and the current +temperature values. + +4.2. get_thermal_instance +------------------------- + +This function returns the thermal_instance corresponding to a given +{thermal_zone, cooling_device, trip_point} combination. Returns NULL +if such an instance does not exist. + +4.3. thermal_cdev_update +------------------------ + +This function serves as an arbitrator to set the state of a cooling +device. It sets the cooling device to the deepest cooling state if +possible. + +5. thermal_emergency_poweroff +============================= + +On an event of critical trip temperature crossing the thermal framework +shuts down the system by calling hw_protection_shutdown(). The +hw_protection_shutdown() first attempts to perform an orderly shutdown +but accepts a delay after which it proceeds doing a forced power-off +or as last resort an emergency_restart. + +The delay should be carefully profiled so as to give adequate time for +orderly poweroff. + +If the delay is set to 0 emergency poweroff will not be supported. So a +carefully profiled non-zero positive value is a must for emergency +poweroff to be triggered. diff --git a/Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst b/Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst new file mode 100644 index 0000000000..2ac42ccd23 --- /dev/null +++ b/Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst @@ -0,0 +1,55 @@ +=================================== +Kernel driver: x86_pkg_temp_thermal +=================================== + +Supported chips: + +* x86: with package level thermal management + +(Verify using: CPUID.06H:EAX[bit 6] =1) + +Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> + +Reference +--------- + +Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013): +Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT + +Description +----------- + +This driver register CPU digital temperature package level sensor as a thermal +zone with maximum two user mode configurable trip points. Number of trip points +depends on the capability of the package. Once the trip point is violated, +user mode can receive notification via thermal notification mechanism and can +take any action to control temperature. + + +Threshold management +-------------------- +Each package will register as a thermal zone under /sys/class/thermal. + +Example:: + + /sys/class/thermal/thermal_zone1 + +This contains two trip points: + +- trip_point_0_temp +- trip_point_1_temp + +User can set any temperature between 0 to TJ-Max temperature. Temperature units +are in milli-degree Celsius. Refer to "Documentation/driver-api/thermal/sysfs-api.rst" for +thermal sys-fs details. + +Any value other than 0 in these trip points, can trigger thermal notifications. +Setting 0, stops sending thermal notifications. + +Thermal notifications: +To get kobject-uevent notifications, set the thermal zone +policy to "user_space". + +For example:: + + echo -n "user_space" > policy diff --git a/Documentation/driver-api/tty/index.rst b/Documentation/driver-api/tty/index.rst new file mode 100644 index 0000000000..2d32606a42 --- /dev/null +++ b/Documentation/driver-api/tty/index.rst @@ -0,0 +1,73 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=== +TTY +=== + +Teletypewriter (TTY) layer takes care of all those serial devices. Including +the virtual ones like pseudoterminal (PTY). + +TTY structures +============== + +There are several major TTY structures. Every TTY device in a system has a +corresponding struct tty_port. These devices are maintained by a TTY driver +which is struct tty_driver. This structure describes the driver but also +contains a reference to operations which could be performed on the TTYs. It is +struct tty_operations. Then, upon open, a struct tty_struct is allocated and +lives until the final close. During this time, several callbacks from struct +tty_operations are invoked by the TTY layer. + +Every character received by the kernel (both from devices and users) is passed +through a preselected :doc:`tty_ldisc` (in +short ldisc; in C, struct tty_ldisc_ops). Its task is to transform characters +as defined by a particular ldisc or by user too. The default one is n_tty, +implementing echoes, signal handling, jobs control, special characters +processing, and more. The transformed characters are passed further to +user/device, depending on the source. + +In-detail description of the named TTY structures is in separate documents: + +.. toctree:: + :maxdepth: 2 + + tty_driver + tty_port + tty_struct + tty_ldisc + tty_buffer + tty_internals + +Writing TTY Driver +================== + +Before one starts writing a TTY driver, they must consider +:doc:`Serial <../serial/driver>` and :doc:`USB Serial <../../usb/usb-serial>` +layers first. Drivers for serial devices can often use one of these specific +layers to implement a serial driver. Only special devices should be handled +directly by the TTY Layer. If you are about to write such a driver, read on. + +A *typical* sequence a TTY driver performs is as follows: + +#. Allocate and register a TTY driver (module init) +#. Create and register TTY devices as they are probed (probe function) +#. Handle TTY operations and events like interrupts (TTY core invokes the + former, the device the latter) +#. Remove devices as they are going away (remove function) +#. Unregister and free the TTY driver (module exit) + +Steps regarding driver, i.e. 1., 3., and 5. are described in detail in +:doc:`tty_driver`. For the other two (devices handling), look into +:doc:`tty_port`. + +Other Documentation +=================== + +Miscellaneous documentation can be further found in these documents: + +.. toctree:: + :maxdepth: 2 + + moxa-smartio + n_gsm + n_tty diff --git a/Documentation/driver-api/tty/moxa-smartio.rst b/Documentation/driver-api/tty/moxa-smartio.rst new file mode 100644 index 0000000000..af25bc5cc3 --- /dev/null +++ b/Documentation/driver-api/tty/moxa-smartio.rst @@ -0,0 +1,197 @@ +============================================================= +MOXA Smartio/Industio Family Device Driver Installation Guide +============================================================= + +Copyright (C) 2008, Moxa Inc. +Copyright (C) 2021, Jiri Slaby + +.. Content + + 1. Introduction + 2. System Requirement + 3. Installation + 3.1 Hardware installation + 3.2 Device naming convention + 4. Utilities + 5. Setserial + 6. Troubleshooting + +1. Introduction +^^^^^^^^^^^^^^^ + + The Smartio/Industio/UPCI family Linux driver supports following multiport + boards: + + - 2 ports multiport board + CP-102U, CP-102UL, CP-102UF + CP-132U-I, CP-132UL, + CP-132, CP-132I, CP132S, CP-132IS, + (CP-102, CP-102S) + + - 4 ports multiport board + CP-104EL, + CP-104UL, CP-104JU, + CP-134U, CP-134U-I, + C104H/PCI, C104HS/PCI, + CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL, + (C114HI, CT-114I), + POS-104UL, + CB-114, + CB-134I + + - 8 ports multiport board + CP-118EL, CP-168EL, + CP-118U, CP-168U, + C168H/PCI, + CB-108 + + If a compatibility problem occurs, please contact Moxa at + support@moxa.com.tw. + + In addition to device driver, useful utilities are also provided in this + version. They are: + + - msdiag + Diagnostic program for displaying installed Moxa + Smartio/Industio boards. + - msmon + Monitor program to observe data count and line status signals. + - msterm A simple terminal program which is useful in testing serial + ports. + + All the drivers and utilities are published in form of source code under + GNU General Public License in this version. Please refer to GNU General + Public License announcement in each source code file for more detail. + + In Moxa's Web sites, you may always find the latest driver at + https://www.moxa.com/. + + This version of driver can be installed as a Loadable Module (Module driver) + or built-in into kernel (Static driver). Before you install the driver, + please refer to hardware installation procedure in the User's Manual. + + We assume the user should be familiar with following documents: + + - Serial-HOWTO + - Kernel-HOWTO + +2. System Requirement +^^^^^^^^^^^^^^^^^^^^^ + + - Maximum 4 boards can be installed in combination + +3. Installation +^^^^^^^^^^^^^^^ + +3.1 Hardware installation +========================= + +PCI/UPCI board +-------------- + + You may need to adjust IRQ usage in BIOS to avoid IRQ conflict with other + ISA devices. Please refer to hardware installation procedure in User's + Manual in advance. + +PCI IRQ Sharing +--------------- + + Each port within the same multiport board shares the same IRQ. Up to + 4 Moxa Smartio/Industio PCI Family multiport boards can be installed + together on one system and they can share the same IRQ. + + + +3.2 Device naming convention +============================ + + The device node is named "ttyMxx". + +Device naming when more than 2 boards installed +----------------------------------------------- + + Naming convention for each Smartio/Industio multiport board is + pre-defined as below. + + ============ =============== + Board Num. Device node + 1st board ttyM0 - ttyM7 + 2nd board ttyM8 - ttyM15 + 3rd board ttyM16 - ttyM23 + 4th board ttyM24 - ttyM31 + ============ =============== + +4. Utilities +^^^^^^^^^^^^ + + There are 3 utilities contained in this driver. They are msdiag, msmon and + msterm. These 3 utilities are released in form of source code. They should + be compiled into executable file and copied into /usr/bin. + +msdiag - Diagnostic +=================== + + This utility provides the function to display what Moxa Smartio/Industio + board was found by the driver in the system. + +msmon - Port Monitoring +======================= + + This utility gives the user a quick view about all the MOXA ports' + activities. One can easily learn each port's total received/transmitted + (Rx/Tx) character count since the time when the monitoring is started. + + Rx/Tx throughputs per second are also reported in interval basis (e.g. + the last 5 seconds) and in average basis (since the time the monitoring + is started). You can reset all ports' count by <HOME> key. <+> <-> + (plus/minus) keys to change the displaying time interval. Press <ENTER> + on the port, that cursor stay, to view the port's communication + parameters, signal status, and input/output queue. + +msterm - Terminal Emulation +=========================== + + This utility provides data sending and receiving ability of all tty ports, + especially for MOXA ports. It is quite useful for testing simple + application, for example, sending AT command to a modem connected to the + port or used as a terminal for login purpose. Note that this is only a + dumb terminal emulation without handling full screen operation. + +5. Setserial +^^^^^^^^^^^^ + + Supported Setserial parameters are listed as below. + + ============== ============================================================= + uart set UART type(16450 --> disable FIFO, 16550A --> enable FIFO) + close_delay set the amount of time (in 1/100 of a second) that DTR + should be kept low while being closed. + closing_wait set the amount of time (in 1/100 of a second) that the + serial port should wait for data to be drained while + being closed, before the receiver is disabled. + spd_hi Use 57.6kb when the application requests 38.4kb. + spd_vhi Use 115.2kb when the application requests 38.4kb. + spd_shi Use 230.4kb when the application requests 38.4kb. + spd_warp Use 460.8kb when the application requests 38.4kb. + spd_normal Use 38.4kb when the application requests 38.4kb. + spd_cust Use the custom divisor to set the speed when the + application requests 38.4kb. + divisor This option sets the custom division. + baud_base This option sets the base baud rate. + ============== ============================================================= + +6. Troubleshooting +^^^^^^^^^^^^^^^^^^ + + The boot time error messages and solutions are stated as clearly as + possible. If all the possible solutions fail, please contact our technical + support team to get more help. + + + Error msg: + More than 4 Moxa Smartio/Industio family boards found. Fifth board + and after are ignored. + + Solution: + To avoid this problem, please unplug fifth and after board, because Moxa + driver supports up to 4 boards. diff --git a/Documentation/driver-api/tty/n_gsm.rst b/Documentation/driver-api/tty/n_gsm.rst new file mode 100644 index 0000000000..120317ec99 --- /dev/null +++ b/Documentation/driver-api/tty/n_gsm.rst @@ -0,0 +1,192 @@ +============================== +GSM 0710 tty multiplexor HOWTO +============================== + +.. contents:: :local: + +This line discipline implements the GSM 07.10 multiplexing protocol +detailed in the following 3GPP document: + + https://www.3gpp.org/ftp/Specs/archive/07_series/07.10/0710-720.zip + +This document give some hints on how to use this driver with GPRS and 3G +modems connected to a physical serial port. + +How to use it +============= + +Config Initiator +---------------- + +#. Initialize the modem in 0710 mux mode (usually ``AT+CMUX=`` command) through + its serial port. Depending on the modem used, you can pass more or less + parameters to this command. + +#. Switch the serial line to using the n_gsm line discipline by using + ``TIOCSETD`` ioctl. + +#. Configure the mux using ``GSMIOC_GETCONF_EXT``/``GSMIOC_SETCONF_EXT`` ioctl if needed. + +#. Configure the mux using ``GSMIOC_GETCONF``/``GSMIOC_SETCONF`` ioctl. + +#. Configure DLCs using ``GSMIOC_GETCONF_DLCI``/``GSMIOC_SETCONF_DLCI`` ioctl for non-defaults. + +#. Obtain base gsmtty number for the used serial port. + + Major parts of the initialization program + (a good starting point is util-linux-ng/sys-utils/ldattach.c):: + + #include <stdio.h> + #include <stdint.h> + #include <linux/gsmmux.h> + #include <linux/tty.h> + + #define DEFAULT_SPEED B115200 + #define SERIAL_PORT /dev/ttyS0 + + int ldisc = N_GSM0710; + struct gsm_config c; + struct gsm_config_ext ce; + struct gsm_dlci_config dc; + struct termios configuration; + uint32_t first; + + /* open the serial port connected to the modem */ + fd = open(SERIAL_PORT, O_RDWR | O_NOCTTY | O_NDELAY); + + /* configure the serial port : speed, flow control ... */ + + /* send the AT commands to switch the modem to CMUX mode + and check that it's successful (should return OK) */ + write(fd, "AT+CMUX=0\r", 10); + + /* experience showed that some modems need some time before + being able to answer to the first MUX packet so a delay + may be needed here in some case */ + sleep(3); + + /* use n_gsm line discipline */ + ioctl(fd, TIOCSETD, &ldisc); + + /* get n_gsm extended configuration */ + ioctl(fd, GSMIOC_GETCONF_EXT, &ce); + /* use keep-alive once every 5s for modem connection supervision */ + ce.keep_alive = 500; + /* set the new extended configuration */ + ioctl(fd, GSMIOC_SETCONF_EXT, &ce); + /* get n_gsm configuration */ + ioctl(fd, GSMIOC_GETCONF, &c); + /* we are initiator and need encoding 0 (basic) */ + c.initiator = 1; + c.encapsulation = 0; + /* our modem defaults to a maximum size of 127 bytes */ + c.mru = 127; + c.mtu = 127; + /* set the new configuration */ + ioctl(fd, GSMIOC_SETCONF, &c); + /* get DLC 1 configuration */ + dc.channel = 1; + ioctl(fd, GSMIOC_GETCONF_DLCI, &dc); + /* the first user channel gets a higher priority */ + dc.priority = 1; + /* set the new DLC 1 specific configuration */ + ioctl(fd, GSMIOC_SETCONF_DLCI, &dc); + /* get first gsmtty device node */ + ioctl(fd, GSMIOC_GETFIRST, &first); + printf("first muxed line: /dev/gsmtty%i\n", first); + + /* and wait for ever to keep the line discipline enabled */ + daemon(0,0); + pause(); + +#. Use these devices as plain serial ports. + + For example, it's possible: + + - to use *gnokii* to send / receive SMS on ``ttygsm1`` + - to use *ppp* to establish a datalink on ``ttygsm2`` + +#. First close all virtual ports before closing the physical port. + + Note that after closing the physical port the modem is still in multiplexing + mode. This may prevent a successful re-opening of the port later. To avoid + this situation either reset the modem if your hardware allows that or send + a disconnect command frame manually before initializing the multiplexing mode + for the second time. The byte sequence for the disconnect command frame is:: + + 0xf9, 0x03, 0xef, 0x03, 0xc3, 0x16, 0xf9 + +Config Requester +---------------- + +#. Receive ``AT+CMUX=`` command through its serial port, initialize mux mode + config. + +#. Switch the serial line to using the *n_gsm* line discipline by using + ``TIOCSETD`` ioctl. + +#. Configure the mux using ``GSMIOC_GETCONF_EXT``/``GSMIOC_SETCONF_EXT`` + ioctl if needed. + +#. Configure the mux using ``GSMIOC_GETCONF``/``GSMIOC_SETCONF`` ioctl. + +#. Configure DLCs using ``GSMIOC_GETCONF_DLCI``/``GSMIOC_SETCONF_DLCI`` ioctl for non-defaults. + +#. Obtain base gsmtty number for the used serial port:: + + #include <stdio.h> + #include <stdint.h> + #include <linux/gsmmux.h> + #include <linux/tty.h> + #define DEFAULT_SPEED B115200 + #define SERIAL_PORT /dev/ttyS0 + + int ldisc = N_GSM0710; + struct gsm_config c; + struct gsm_config_ext ce; + struct gsm_dlci_config dc; + struct termios configuration; + uint32_t first; + + /* open the serial port */ + fd = open(SERIAL_PORT, O_RDWR | O_NOCTTY | O_NDELAY); + + /* configure the serial port : speed, flow control ... */ + + /* get serial data and check "AT+CMUX=command" parameter ... */ + + /* use n_gsm line discipline */ + ioctl(fd, TIOCSETD, &ldisc); + + /* get n_gsm extended configuration */ + ioctl(fd, GSMIOC_GETCONF_EXT, &ce); + /* use keep-alive once every 5s for peer connection supervision */ + ce.keep_alive = 500; + /* set the new extended configuration */ + ioctl(fd, GSMIOC_SETCONF_EXT, &ce); + /* get n_gsm configuration */ + ioctl(fd, GSMIOC_GETCONF, &c); + /* we are requester and need encoding 0 (basic) */ + c.initiator = 0; + c.encapsulation = 0; + /* our modem defaults to a maximum size of 127 bytes */ + c.mru = 127; + c.mtu = 127; + /* set the new configuration */ + ioctl(fd, GSMIOC_SETCONF, &c); + /* get DLC 1 configuration */ + dc.channel = 1; + ioctl(fd, GSMIOC_GETCONF_DLCI, &dc); + /* the first user channel gets a higher priority */ + dc.priority = 1; + /* set the new DLC 1 specific configuration */ + ioctl(fd, GSMIOC_SETCONF_DLCI, &dc); + /* get first gsmtty device node */ + ioctl(fd, GSMIOC_GETFIRST, &first); + printf("first muxed line: /dev/gsmtty%i\n", first); + + /* and wait for ever to keep the line discipline enabled */ + daemon(0,0); + pause(); + +11-03-08 - Eric Bénard - <eric@eukrea.com> diff --git a/Documentation/driver-api/tty/n_tty.rst b/Documentation/driver-api/tty/n_tty.rst new file mode 100644 index 0000000000..15b70faee7 --- /dev/null +++ b/Documentation/driver-api/tty/n_tty.rst @@ -0,0 +1,22 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===== +N_TTY +===== + +.. contents:: :local: + +The default (and fallback) :doc:`TTY line discipline <tty_ldisc>`. It tries to +handle characters as per POSIX. + +External Functions +================== + +.. kernel-doc:: drivers/tty/n_tty.c + :export: + +Internal Functions +================== + +.. kernel-doc:: drivers/tty/n_tty.c + :internal: diff --git a/Documentation/driver-api/tty/tty_buffer.rst b/Documentation/driver-api/tty/tty_buffer.rst new file mode 100644 index 0000000000..4b5ca1776d --- /dev/null +++ b/Documentation/driver-api/tty/tty_buffer.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========== +TTY Buffer +========== + +.. contents:: :local: + +Here, we document functions for taking care of tty buffer and their flipping. +Drivers are supposed to fill the buffer by one of those functions below and +then flip the buffer, so that the data are passed to :doc:`line discipline +<tty_ldisc>` for further processing. + +Flip Buffer Management +====================== + +.. kernel-doc:: drivers/tty/tty_buffer.c + :identifiers: tty_prepare_flip_string + tty_flip_buffer_push tty_ldisc_receive_buf + +.. kernel-doc:: include/linux/tty_flip.h + :identifiers: tty_insert_flip_string_fixed_flag tty_insert_flip_string_flags + tty_insert_flip_char + +---- + +Other Functions +=============== + +.. kernel-doc:: drivers/tty/tty_buffer.c + :identifiers: tty_buffer_space_avail tty_buffer_set_limit + +---- + +Buffer Locking +============== + +These are used only in special circumstances. Avoid them. + +.. kernel-doc:: drivers/tty/tty_buffer.c + :identifiers: tty_buffer_lock_exclusive tty_buffer_unlock_exclusive + +---- + +Internal Functions +================== + +.. kernel-doc:: drivers/tty/tty_buffer.c + :internal: diff --git a/Documentation/driver-api/tty/tty_driver.rst b/Documentation/driver-api/tty/tty_driver.rst new file mode 100644 index 0000000000..cc529f8634 --- /dev/null +++ b/Documentation/driver-api/tty/tty_driver.rst @@ -0,0 +1,128 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================= +TTY Driver and TTY Operations +============================= + +.. contents:: :local: + +Allocation +========== + +The first thing a driver needs to do is to allocate a struct tty_driver. This +is done by tty_alloc_driver() (or __tty_alloc_driver()). Next, the newly +allocated structure is filled with information. See `TTY Driver Reference`_ at +the end of this document on what actually shall be filled in. + +The allocation routines expect a number of devices the driver can handle at +most and flags. Flags are those starting ``TTY_DRIVER_`` listed and described +in `TTY Driver Flags`_ below. + +When the driver is about to be freed, tty_driver_kref_put() is called on that. +It will decrements the reference count and if it reaches zero, the driver is +freed. + +For reference, both allocation and deallocation functions are explained here in +detail: + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: __tty_alloc_driver tty_driver_kref_put + +TTY Driver Flags +---------------- + +Here comes the documentation of flags accepted by tty_alloc_driver() (or +__tty_alloc_driver()): + +.. kernel-doc:: include/linux/tty_driver.h + :doc: TTY Driver Flags + +---- + +Registration +============ + +When a struct tty_driver is allocated and filled in, it can be registered using +tty_register_driver(). It is recommended to pass ``TTY_DRIVER_DYNAMIC_DEV`` in +flags of tty_alloc_driver(). If it is not passed, *all* devices are also +registered during tty_register_driver() and the following paragraph of +registering devices can be skipped for such drivers. However, the struct +tty_port part in `Registering Devices`_ is still relevant there. + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_register_driver tty_unregister_driver + +Registering Devices +------------------- + +Every TTY device shall be backed by a struct tty_port. Usually, TTY drivers +embed tty_port into device's private structures. Further details about handling +tty_port can be found in :doc:`tty_port`. The driver is also recommended to use +tty_port's reference counting by tty_port_get() and tty_port_put(). The final +put is supposed to free the tty_port including the device's private struct. + +Unless ``TTY_DRIVER_DYNAMIC_DEV`` was passed as flags to tty_alloc_driver(), +TTY driver is supposed to register every device discovered in the system +(the latter is preferred). This is performed by tty_register_device(). Or by +tty_register_device_attr() if the driver wants to expose some information +through struct attribute_group. Both of them register ``index``'th device and +upon return, the device can be opened. There are also preferred tty_port +variants described in `Linking Devices to Ports`_ later. It is up to driver to +manage free indices and choosing the right one. The TTY layer only refuses to +register more devices than passed to tty_alloc_driver(). + +When the device is opened, the TTY layer allocates struct tty_struct and starts +calling operations from :c:member:`tty_driver.ops`, see `TTY Operations +Reference`_. + +The registration routines are documented as follows: + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_register_device tty_register_device_attr + tty_unregister_device + +---- + +Linking Devices to Ports +------------------------ +As stated earlier, every TTY device shall have a struct tty_port assigned to +it. It must be known to the TTY layer at :c:member:`tty_driver.ops.install()` +at latest. There are few helpers to *link* the two. Ideally, the driver uses +tty_port_register_device() or tty_port_register_device_attr() instead of +tty_register_device() and tty_register_device_attr() at the registration time. +This way, the driver needs not care about linking later on. + +If that is not possible, the driver still can link the tty_port to a specific +index *before* the actual registration by tty_port_link_device(). If it still +does not fit, tty_port_install() can be used from the +:c:member:`tty_driver.ops.install` hook as a last resort. The last one is +dedicated mostly for in-memory devices like PTY where tty_ports are allocated +on demand. + +The linking routines are documented here: + +.. kernel-doc:: drivers/tty/tty_port.c + :identifiers: tty_port_link_device tty_port_register_device + tty_port_register_device_attr + +---- + +TTY Driver Reference +==================== + +All members of struct tty_driver are documented here. The required members are +noted at the end. struct tty_operations are documented next. + +.. kernel-doc:: include/linux/tty_driver.h + :identifiers: tty_driver + +---- + +TTY Operations Reference +======================== + +When a TTY is registered, these driver hooks can be invoked by the TTY layer: + +.. kernel-doc:: include/linux/tty_driver.h + :identifiers: tty_operations + diff --git a/Documentation/driver-api/tty/tty_internals.rst b/Documentation/driver-api/tty/tty_internals.rst new file mode 100644 index 0000000000..d0d4158203 --- /dev/null +++ b/Documentation/driver-api/tty/tty_internals.rst @@ -0,0 +1,31 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +TTY Internals +============= + +.. contents:: :local: + +Kopen +===== + +These functions serve for opening a TTY from the kernelspace: + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_kopen_exclusive tty_kopen_shared tty_kclose + +---- + +Exported Internal Functions +=========================== + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_release_struct tty_dev_name_to_number tty_get_icount + +---- + +Internal Functions +================== + +.. kernel-doc:: drivers/tty/tty_io.c + :internal: diff --git a/Documentation/driver-api/tty/tty_ldisc.rst b/Documentation/driver-api/tty/tty_ldisc.rst new file mode 100644 index 0000000000..5144751be8 --- /dev/null +++ b/Documentation/driver-api/tty/tty_ldisc.rst @@ -0,0 +1,85 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================== +TTY Line Discipline +=================== + +.. contents:: :local: + +TTY line discipline process all incoming and outgoing character from/to a tty +device. The default line discipline is :doc:`N_TTY <n_tty>`. It is also a +fallback if establishing any other discipline for a tty fails. If even N_TTY +fails, N_NULL takes over. That never fails, but also does not process any +characters -- it throws them away. + +Registration +============ + +Line disciplines are registered with tty_register_ldisc() passing the ldisc +structure. At the point of registration the discipline must be ready to use and +it is possible it will get used before the call returns success. If the call +returns an error then it won’t get called. Do not re-use ldisc numbers as they +are part of the userspace ABI and writing over an existing ldisc will cause +demons to eat your computer. You must not re-register over the top of the line +discipline even with the same data or your computer again will be eaten by +demons. In order to remove a line discipline call tty_unregister_ldisc(). + +Heed this warning: the reference count field of the registered copies of the +tty_ldisc structure in the ldisc table counts the number of lines using this +discipline. The reference count of the tty_ldisc structure within a tty counts +the number of active users of the ldisc at this instant. In effect it counts +the number of threads of execution within an ldisc method (plus those about to +enter and exit although this detail matters not). + +.. kernel-doc:: drivers/tty/tty_ldisc.c + :identifiers: tty_register_ldisc tty_unregister_ldisc + +Other Functions +=============== + +.. kernel-doc:: drivers/tty/tty_ldisc.c + :identifiers: tty_set_ldisc tty_ldisc_flush + +Line Discipline Operations Reference +==================================== + +.. kernel-doc:: include/linux/tty_ldisc.h + :identifiers: tty_ldisc_ops + +Driver Access +============= + +Line discipline methods can call the methods of the underlying hardware driver. +These are documented as a part of struct tty_operations. + +TTY Flags +========= + +Line discipline methods have access to :c:member:`tty_struct.flags` field. See +:doc:`tty_struct`. + +Locking +======= + +Callers to the line discipline functions from the tty layer are required to +take line discipline locks. The same is true of calls from the driver side +but not yet enforced. + +.. kernel-doc:: drivers/tty/tty_ldisc.c + :identifiers: tty_ldisc_ref_wait tty_ldisc_ref tty_ldisc_deref + +While these functions are slightly slower than the old code they should have +minimal impact as most receive logic uses the flip buffers and they only +need to take a reference when they push bits up through the driver. + +A caution: The :c:member:`tty_ldisc_ops.open()`, +:c:member:`tty_ldisc_ops.close()` and :c:member:`tty_driver.set_ldisc()` +functions are called with the ldisc unavailable. Thus tty_ldisc_ref() will fail +in this situation if used within these functions. Ldisc and driver code +calling its own functions must be careful in this case. + +Internal Functions +================== + +.. kernel-doc:: drivers/tty/tty_ldisc.c + :internal: diff --git a/Documentation/driver-api/tty/tty_port.rst b/Documentation/driver-api/tty/tty_port.rst new file mode 100644 index 0000000000..5cb90e954f --- /dev/null +++ b/Documentation/driver-api/tty/tty_port.rst @@ -0,0 +1,70 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======== +TTY Port +======== + +.. contents:: :local: + +The TTY drivers are advised to use struct tty_port helpers as much as possible. +If the drivers implement :c:member:`tty_port.ops.activate()` and +:c:member:`tty_port.ops.shutdown()`, they can use tty_port_open(), +tty_port_close(), and tty_port_hangup() in respective +:c:member:`tty_struct.ops` hooks. + +The reference and details are contained in the `TTY Port Reference`_ and `TTY +Port Operations Reference`_ sections at the bottom. + +TTY Port Functions +================== + +Init & Destroy +-------------- + +.. kernel-doc:: drivers/tty/tty_port.c + :identifiers: tty_port_init tty_port_destroy + tty_port_get tty_port_put + +Open/Close/Hangup Helpers +------------------------- + +.. kernel-doc:: drivers/tty/tty_port.c + :identifiers: tty_port_install tty_port_open tty_port_block_til_ready + tty_port_close tty_port_close_start tty_port_close_end tty_port_hangup + tty_port_shutdown + +TTY Refcounting +--------------- + +.. kernel-doc:: drivers/tty/tty_port.c + :identifiers: tty_port_tty_get tty_port_tty_set + +TTY Helpers +----------- + +.. kernel-doc:: drivers/tty/tty_port.c + :identifiers: tty_port_tty_hangup tty_port_tty_wakeup + + +Modem Signals +------------- + +.. kernel-doc:: drivers/tty/tty_port.c + :identifiers: tty_port_carrier_raised tty_port_raise_dtr_rts + tty_port_lower_dtr_rts + +---- + +TTY Port Reference +================== + +.. kernel-doc:: include/linux/tty_port.h + :identifiers: tty_port + +---- + +TTY Port Operations Reference +============================= + +.. kernel-doc:: include/linux/tty_port.h + :identifiers: tty_port_operations diff --git a/Documentation/driver-api/tty/tty_struct.rst b/Documentation/driver-api/tty/tty_struct.rst new file mode 100644 index 0000000000..c72f5a4293 --- /dev/null +++ b/Documentation/driver-api/tty/tty_struct.rst @@ -0,0 +1,81 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========== +TTY Struct +========== + +.. contents:: :local: + +struct tty_struct is allocated by the TTY layer upon the first open of the TTY +device and released after the last close. The TTY layer passes this structure +to most of struct tty_operation's hooks. Members of tty_struct are documented +in `TTY Struct Reference`_ at the bottom. + +Initialization +============== + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_init_termios + +Name +==== + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_name + +Reference counting +================== + +.. kernel-doc:: include/linux/tty.h + :identifiers: tty_kref_get + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_kref_put + +Install +======= + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_standard_install + +Read & Write +============ + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_put_char + +Start & Stop +============ + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: start_tty stop_tty + +Wakeup +====== + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_wakeup + +Hangup +====== + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_hangup tty_vhangup tty_hung_up_p + +Misc +==== + +.. kernel-doc:: drivers/tty/tty_io.c + :identifiers: tty_do_resize + +TTY Struct Flags +================ + +.. kernel-doc:: include/linux/tty.h + :doc: TTY Struct Flags + +TTY Struct Reference +==================== + +.. kernel-doc:: include/linux/tty.h + :identifiers: tty_struct diff --git a/Documentation/driver-api/uio-howto.rst b/Documentation/driver-api/uio-howto.rst new file mode 100644 index 0000000000..907ffa3b38 --- /dev/null +++ b/Documentation/driver-api/uio-howto.rst @@ -0,0 +1,730 @@ +======================= +The Userspace I/O HOWTO +======================= + +:Author: Hans-Jürgen Koch Linux developer, Linutronix +:Date: 2006-12-11 + +About this document +=================== + +Translations +------------ + +If you know of any translations for this document, or you are interested +in translating it, please email me hjk@hansjkoch.de. + +Preface +------- + +For many types of devices, creating a Linux kernel driver is overkill. +All that is really needed is some way to handle an interrupt and provide +access to the memory space of the device. The logic of controlling the +device does not necessarily have to be within the kernel, as the device +does not need to take advantage of any of other resources that the +kernel provides. One such common class of devices that are like this are +for industrial I/O cards. + +To address this situation, the userspace I/O system (UIO) was designed. +For typical industrial I/O cards, only a very small kernel module is +needed. The main part of the driver will run in user space. This +simplifies development and reduces the risk of serious bugs within a +kernel module. + +Please note that UIO is not an universal driver interface. Devices that +are already handled well by other kernel subsystems (like networking or +serial or USB) are no candidates for an UIO driver. Hardware that is +ideally suited for an UIO driver fulfills all of the following: + +- The device has memory that can be mapped. The device can be + controlled completely by writing to this memory. + +- The device usually generates interrupts. + +- The device does not fit into one of the standard kernel subsystems. + +Acknowledgments +--------------- + +I'd like to thank Thomas Gleixner and Benedikt Spranger of Linutronix, +who have not only written most of the UIO code, but also helped greatly +writing this HOWTO by giving me all kinds of background information. + +Feedback +-------- + +Find something wrong with this document? (Or perhaps something right?) I +would love to hear from you. Please email me at hjk@hansjkoch.de. + +About UIO +========= + +If you use UIO for your card's driver, here's what you get: + +- only one small kernel module to write and maintain. + +- develop the main part of your driver in user space, with all the + tools and libraries you're used to. + +- bugs in your driver won't crash the kernel. + +- updates of your driver can take place without recompiling the kernel. + +How UIO works +------------- + +Each UIO device is accessed through a device file and several sysfs +attribute files. The device file will be called ``/dev/uio0`` for the +first device, and ``/dev/uio1``, ``/dev/uio2`` and so on for subsequent +devices. + +``/dev/uioX`` is used to access the address space of the card. Just use +:c:func:`mmap()` to access registers or RAM locations of your card. + +Interrupts are handled by reading from ``/dev/uioX``. A blocking +:c:func:`read()` from ``/dev/uioX`` will return as soon as an +interrupt occurs. You can also use :c:func:`select()` on +``/dev/uioX`` to wait for an interrupt. The integer value read from +``/dev/uioX`` represents the total interrupt count. You can use this +number to figure out if you missed some interrupts. + +For some hardware that has more than one interrupt source internally, +but not separate IRQ mask and status registers, there might be +situations where userspace cannot determine what the interrupt source +was if the kernel handler disables them by writing to the chip's IRQ +register. In such a case, the kernel has to disable the IRQ completely +to leave the chip's register untouched. Now the userspace part can +determine the cause of the interrupt, but it cannot re-enable +interrupts. Another cornercase is chips where re-enabling interrupts is +a read-modify-write operation to a combined IRQ status/acknowledge +register. This would be racy if a new interrupt occurred simultaneously. + +To address these problems, UIO also implements a write() function. It is +normally not used and can be ignored for hardware that has only a single +interrupt source or has separate IRQ mask and status registers. If you +need it, however, a write to ``/dev/uioX`` will call the +:c:func:`irqcontrol()` function implemented by the driver. You have +to write a 32-bit value that is usually either 0 or 1 to disable or +enable interrupts. If a driver does not implement +:c:func:`irqcontrol()`, :c:func:`write()` will return with +``-ENOSYS``. + +To handle interrupts properly, your custom kernel module can provide its +own interrupt handler. It will automatically be called by the built-in +handler. + +For cards that don't generate interrupts but need to be polled, there is +the possibility to set up a timer that triggers the interrupt handler at +configurable time intervals. This interrupt simulation is done by +calling :c:func:`uio_event_notify()` from the timer's event +handler. + +Each driver provides attributes that are used to read or write +variables. These attributes are accessible through sysfs files. A custom +kernel driver module can add its own attributes to the device owned by +the uio driver, but not added to the UIO device itself at this time. +This might change in the future if it would be found to be useful. + +The following standard attributes are provided by the UIO framework: + +- ``name``: The name of your device. It is recommended to use the name + of your kernel module for this. + +- ``version``: A version string defined by your driver. This allows the + user space part of your driver to deal with different versions of the + kernel module. + +- ``event``: The total number of interrupts handled by the driver since + the last time the device node was read. + +These attributes appear under the ``/sys/class/uio/uioX`` directory. +Please note that this directory might be a symlink, and not a real +directory. Any userspace code that accesses it must be able to handle +this. + +Each UIO device can make one or more memory regions available for memory +mapping. This is necessary because some industrial I/O cards require +access to more than one PCI memory region in a driver. + +Each mapping has its own directory in sysfs, the first mapping appears +as ``/sys/class/uio/uioX/maps/map0/``. Subsequent mappings create +directories ``map1/``, ``map2/``, and so on. These directories will only +appear if the size of the mapping is not 0. + +Each ``mapX/`` directory contains four read-only files that show +attributes of the memory: + +- ``name``: A string identifier for this mapping. This is optional, the + string can be empty. Drivers can set this to make it easier for + userspace to find the correct mapping. + +- ``addr``: The address of memory that can be mapped. + +- ``size``: The size, in bytes, of the memory pointed to by addr. + +- ``offset``: The offset, in bytes, that has to be added to the pointer + returned by :c:func:`mmap()` to get to the actual device memory. + This is important if the device's memory is not page aligned. + Remember that pointers returned by :c:func:`mmap()` are always + page aligned, so it is good style to always add this offset. + +From userspace, the different mappings are distinguished by adjusting +the ``offset`` parameter of the :c:func:`mmap()` call. To map the +memory of mapping N, you have to use N times the page size as your +offset:: + + offset = N * getpagesize(); + +Sometimes there is hardware with memory-like regions that can not be +mapped with the technique described here, but there are still ways to +access them from userspace. The most common example are x86 ioports. On +x86 systems, userspace can access these ioports using +:c:func:`ioperm()`, :c:func:`iopl()`, :c:func:`inb()`, +:c:func:`outb()`, and similar functions. + +Since these ioport regions can not be mapped, they will not appear under +``/sys/class/uio/uioX/maps/`` like the normal memory described above. +Without information about the port regions a hardware has to offer, it +becomes difficult for the userspace part of the driver to find out which +ports belong to which UIO device. + +To address this situation, the new directory +``/sys/class/uio/uioX/portio/`` was added. It only exists if the driver +wants to pass information about one or more port regions to userspace. +If that is the case, subdirectories named ``port0``, ``port1``, and so +on, will appear underneath ``/sys/class/uio/uioX/portio/``. + +Each ``portX/`` directory contains four read-only files that show name, +start, size, and type of the port region: + +- ``name``: A string identifier for this port region. The string is + optional and can be empty. Drivers can set it to make it easier for + userspace to find a certain port region. + +- ``start``: The first port of this region. + +- ``size``: The number of ports in this region. + +- ``porttype``: A string describing the type of port. + +Writing your own kernel module +============================== + +Please have a look at ``uio_cif.c`` as an example. The following +paragraphs explain the different sections of this file. + +struct uio_info +--------------- + +This structure tells the framework the details of your driver, Some of +the members are required, others are optional. + +- ``const char *name``: Required. The name of your driver as it will + appear in sysfs. I recommend using the name of your module for this. + +- ``const char *version``: Required. This string appears in + ``/sys/class/uio/uioX/version``. + +- ``struct uio_mem mem[ MAX_UIO_MAPS ]``: Required if you have memory + that can be mapped with :c:func:`mmap()`. For each mapping you + need to fill one of the ``uio_mem`` structures. See the description + below for details. + +- ``struct uio_port port[ MAX_UIO_PORTS_REGIONS ]``: Required if you + want to pass information about ioports to userspace. For each port + region you need to fill one of the ``uio_port`` structures. See the + description below for details. + +- ``long irq``: Required. If your hardware generates an interrupt, it's + your modules task to determine the irq number during initialization. + If you don't have a hardware generated interrupt but want to trigger + the interrupt handler in some other way, set ``irq`` to + ``UIO_IRQ_CUSTOM``. If you had no interrupt at all, you could set + ``irq`` to ``UIO_IRQ_NONE``, though this rarely makes sense. + +- ``unsigned long irq_flags``: Required if you've set ``irq`` to a + hardware interrupt number. The flags given here will be used in the + call to :c:func:`request_irq()`. + +- ``int (*mmap)(struct uio_info *info, struct vm_area_struct *vma)``: + Optional. If you need a special :c:func:`mmap()` + function, you can set it here. If this pointer is not NULL, your + :c:func:`mmap()` will be called instead of the built-in one. + +- ``int (*open)(struct uio_info *info, struct inode *inode)``: + Optional. You might want to have your own :c:func:`open()`, + e.g. to enable interrupts only when your device is actually used. + +- ``int (*release)(struct uio_info *info, struct inode *inode)``: + Optional. If you define your own :c:func:`open()`, you will + probably also want a custom :c:func:`release()` function. + +- ``int (*irqcontrol)(struct uio_info *info, s32 irq_on)``: + Optional. If you need to be able to enable or disable interrupts + from userspace by writing to ``/dev/uioX``, you can implement this + function. The parameter ``irq_on`` will be 0 to disable interrupts + and 1 to enable them. + +Usually, your device will have one or more memory regions that can be +mapped to user space. For each region, you have to set up a +``struct uio_mem`` in the ``mem[]`` array. Here's a description of the +fields of ``struct uio_mem``: + +- ``const char *name``: Optional. Set this to help identify the memory + region, it will show up in the corresponding sysfs node. + +- ``int memtype``: Required if the mapping is used. Set this to + ``UIO_MEM_PHYS`` if you have physical memory on your card to be + mapped. Use ``UIO_MEM_LOGICAL`` for logical memory (e.g. allocated + with :c:func:`__get_free_pages()` but not kmalloc()). There's also + ``UIO_MEM_VIRTUAL`` for virtual memory. + +- ``phys_addr_t addr``: Required if the mapping is used. Fill in the + address of your memory block. This address is the one that appears in + sysfs. + +- ``resource_size_t size``: Fill in the size of the memory block that + ``addr`` points to. If ``size`` is zero, the mapping is considered + unused. Note that you *must* initialize ``size`` with zero for all + unused mappings. + +- ``void *internal_addr``: If you have to access this memory region + from within your kernel module, you will want to map it internally by + using something like :c:func:`ioremap()`. Addresses returned by + this function cannot be mapped to user space, so you must not store + it in ``addr``. Use ``internal_addr`` instead to remember such an + address. + +Please do not touch the ``map`` element of ``struct uio_mem``! It is +used by the UIO framework to set up sysfs files for this mapping. Simply +leave it alone. + +Sometimes, your device can have one or more port regions which can not +be mapped to userspace. But if there are other possibilities for +userspace to access these ports, it makes sense to make information +about the ports available in sysfs. For each region, you have to set up +a ``struct uio_port`` in the ``port[]`` array. Here's a description of +the fields of ``struct uio_port``: + +- ``char *porttype``: Required. Set this to one of the predefined + constants. Use ``UIO_PORT_X86`` for the ioports found in x86 + architectures. + +- ``unsigned long start``: Required if the port region is used. Fill in + the number of the first port of this region. + +- ``unsigned long size``: Fill in the number of ports in this region. + If ``size`` is zero, the region is considered unused. Note that you + *must* initialize ``size`` with zero for all unused regions. + +Please do not touch the ``portio`` element of ``struct uio_port``! It is +used internally by the UIO framework to set up sysfs files for this +region. Simply leave it alone. + +Adding an interrupt handler +--------------------------- + +What you need to do in your interrupt handler depends on your hardware +and on how you want to handle it. You should try to keep the amount of +code in your kernel interrupt handler low. If your hardware requires no +action that you *have* to perform after each interrupt, then your +handler can be empty. + +If, on the other hand, your hardware *needs* some action to be performed +after each interrupt, then you *must* do it in your kernel module. Note +that you cannot rely on the userspace part of your driver. Your +userspace program can terminate at any time, possibly leaving your +hardware in a state where proper interrupt handling is still required. + +There might also be applications where you want to read data from your +hardware at each interrupt and buffer it in a piece of kernel memory +you've allocated for that purpose. With this technique you could avoid +loss of data if your userspace program misses an interrupt. + +A note on shared interrupts: Your driver should support interrupt +sharing whenever this is possible. It is possible if and only if your +driver can detect whether your hardware has triggered the interrupt or +not. This is usually done by looking at an interrupt status register. If +your driver sees that the IRQ bit is actually set, it will perform its +actions, and the handler returns IRQ_HANDLED. If the driver detects +that it was not your hardware that caused the interrupt, it will do +nothing and return IRQ_NONE, allowing the kernel to call the next +possible interrupt handler. + +If you decide not to support shared interrupts, your card won't work in +computers with no free interrupts. As this frequently happens on the PC +platform, you can save yourself a lot of trouble by supporting interrupt +sharing. + +Using uio_pdrv for platform devices +----------------------------------- + +In many cases, UIO drivers for platform devices can be handled in a +generic way. In the same place where you define your +``struct platform_device``, you simply also implement your interrupt +handler and fill your ``struct uio_info``. A pointer to this +``struct uio_info`` is then used as ``platform_data`` for your platform +device. + +You also need to set up an array of ``struct resource`` containing +addresses and sizes of your memory mappings. This information is passed +to the driver using the ``.resource`` and ``.num_resources`` elements of +``struct platform_device``. + +You now have to set the ``.name`` element of ``struct platform_device`` +to ``"uio_pdrv"`` to use the generic UIO platform device driver. This +driver will fill the ``mem[]`` array according to the resources given, +and register the device. + +The advantage of this approach is that you only have to edit a file you +need to edit anyway. You do not have to create an extra driver. + +Using uio_pdrv_genirq for platform devices +------------------------------------------ + +Especially in embedded devices, you frequently find chips where the irq +pin is tied to its own dedicated interrupt line. In such cases, where +you can be really sure the interrupt is not shared, we can take the +concept of ``uio_pdrv`` one step further and use a generic interrupt +handler. That's what ``uio_pdrv_genirq`` does. + +The setup for this driver is the same as described above for +``uio_pdrv``, except that you do not implement an interrupt handler. The +``.handler`` element of ``struct uio_info`` must remain ``NULL``. The +``.irq_flags`` element must not contain ``IRQF_SHARED``. + +You will set the ``.name`` element of ``struct platform_device`` to +``"uio_pdrv_genirq"`` to use this driver. + +The generic interrupt handler of ``uio_pdrv_genirq`` will simply disable +the interrupt line using :c:func:`disable_irq_nosync()`. After +doing its work, userspace can reenable the interrupt by writing +0x00000001 to the UIO device file. The driver already implements an +:c:func:`irq_control()` to make this possible, you must not +implement your own. + +Using ``uio_pdrv_genirq`` not only saves a few lines of interrupt +handler code. You also do not need to know anything about the chip's +internal registers to create the kernel part of the driver. All you need +to know is the irq number of the pin the chip is connected to. + +When used in a device-tree enabled system, the driver needs to be +probed with the ``"of_id"`` module parameter set to the ``"compatible"`` +string of the node the driver is supposed to handle. By default, the +node's name (without the unit address) is exposed as name for the +UIO device in userspace. To set a custom name, a property named +``"linux,uio-name"`` may be specified in the DT node. + +Using uio_dmem_genirq for platform devices +------------------------------------------ + +In addition to statically allocated memory ranges, they may also be a +desire to use dynamically allocated regions in a user space driver. In +particular, being able to access memory made available through the +dma-mapping API, may be particularly useful. The ``uio_dmem_genirq`` +driver provides a way to accomplish this. + +This driver is used in a similar manner to the ``"uio_pdrv_genirq"`` +driver with respect to interrupt configuration and handling. + +Set the ``.name`` element of ``struct platform_device`` to +``"uio_dmem_genirq"`` to use this driver. + +When using this driver, fill in the ``.platform_data`` element of +``struct platform_device``, which is of type +``struct uio_dmem_genirq_pdata`` and which contains the following +elements: + +- ``struct uio_info uioinfo``: The same structure used as the + ``uio_pdrv_genirq`` platform data + +- ``unsigned int *dynamic_region_sizes``: Pointer to list of sizes of + dynamic memory regions to be mapped into user space. + +- ``unsigned int num_dynamic_regions``: Number of elements in + ``dynamic_region_sizes`` array. + +The dynamic regions defined in the platform data will be appended to the +`` mem[] `` array after the platform device resources, which implies +that the total number of static and dynamic memory regions cannot exceed +``MAX_UIO_MAPS``. + +The dynamic memory regions will be allocated when the UIO device file, +``/dev/uioX`` is opened. Similar to static memory resources, the memory +region information for dynamic regions is then visible via sysfs at +``/sys/class/uio/uioX/maps/mapY/*``. The dynamic memory regions will be +freed when the UIO device file is closed. When no processes are holding +the device file open, the address returned to userspace is ~0. + +Writing a driver in userspace +============================= + +Once you have a working kernel module for your hardware, you can write +the userspace part of your driver. You don't need any special libraries, +your driver can be written in any reasonable language, you can use +floating point numbers and so on. In short, you can use all the tools +and libraries you'd normally use for writing a userspace application. + +Getting information about your UIO device +----------------------------------------- + +Information about all UIO devices is available in sysfs. The first thing +you should do in your driver is check ``name`` and ``version`` to make +sure you're talking to the right device and that its kernel driver has +the version you expect. + +You should also make sure that the memory mapping you need exists and +has the size you expect. + +There is a tool called ``lsuio`` that lists UIO devices and their +attributes. It is available here: + +http://www.osadl.org/projects/downloads/UIO/user/ + +With ``lsuio`` you can quickly check if your kernel module is loaded and +which attributes it exports. Have a look at the manpage for details. + +The source code of ``lsuio`` can serve as an example for getting +information about an UIO device. The file ``uio_helper.c`` contains a +lot of functions you could use in your userspace driver code. + +mmap() device memory +-------------------- + +After you made sure you've got the right device with the memory mappings +you need, all you have to do is to call :c:func:`mmap()` to map the +device's memory to userspace. + +The parameter ``offset`` of the :c:func:`mmap()` call has a special +meaning for UIO devices: It is used to select which mapping of your +device you want to map. To map the memory of mapping N, you have to use +N times the page size as your offset:: + + offset = N * getpagesize(); + +N starts from zero, so if you've got only one memory range to map, set +``offset = 0``. A drawback of this technique is that memory is always +mapped beginning with its start address. + +Waiting for interrupts +---------------------- + +After you successfully mapped your devices memory, you can access it +like an ordinary array. Usually, you will perform some initialization. +After that, your hardware starts working and will generate an interrupt +as soon as it's finished, has some data available, or needs your +attention because an error occurred. + +``/dev/uioX`` is a read-only file. A :c:func:`read()` will always +block until an interrupt occurs. There is only one legal value for the +``count`` parameter of :c:func:`read()`, and that is the size of a +signed 32 bit integer (4). Any other value for ``count`` causes +:c:func:`read()` to fail. The signed 32 bit integer read is the +interrupt count of your device. If the value is one more than the value +you read the last time, everything is OK. If the difference is greater +than one, you missed interrupts. + +You can also use :c:func:`select()` on ``/dev/uioX``. + +Generic PCI UIO driver +====================== + +The generic driver is a kernel module named uio_pci_generic. It can +work with any device compliant to PCI 2.3 (circa 2002) and any compliant +PCI Express device. Using this, you only need to write the userspace +driver, removing the need to write a hardware-specific kernel module. + +Making the driver recognize the device +-------------------------------------- + +Since the driver does not declare any device ids, it will not get loaded +automatically and will not automatically bind to any devices, you must +load it and allocate id to the driver yourself. For example:: + + modprobe uio_pci_generic + echo "8086 10f5" > /sys/bus/pci/drivers/uio_pci_generic/new_id + +If there already is a hardware specific kernel driver for your device, +the generic driver still won't bind to it, in this case if you want to +use the generic driver (why would you?) you'll have to manually unbind +the hardware specific driver and bind the generic driver, like this:: + + echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind + echo -n 0000:00:19.0 > /sys/bus/pci/drivers/uio_pci_generic/bind + +You can verify that the device has been bound to the driver by looking +for it in sysfs, for example like the following:: + + ls -l /sys/bus/pci/devices/0000:00:19.0/driver + +Which if successful should print:: + + .../0000:00:19.0/driver -> ../../../bus/pci/drivers/uio_pci_generic + +Note that the generic driver will not bind to old PCI 2.2 devices. If +binding the device failed, run the following command:: + + dmesg + +and look in the output for failure reasons. + +Things to know about uio_pci_generic +------------------------------------ + +Interrupts are handled using the Interrupt Disable bit in the PCI +command register and Interrupt Status bit in the PCI status register. +All devices compliant to PCI 2.3 (circa 2002) and all compliant PCI +Express devices should support these bits. uio_pci_generic detects +this support, and won't bind to devices which do not support the +Interrupt Disable Bit in the command register. + +On each interrupt, uio_pci_generic sets the Interrupt Disable bit. +This prevents the device from generating further interrupts until the +bit is cleared. The userspace driver should clear this bit before +blocking and waiting for more interrupts. + +Writing userspace driver using uio_pci_generic +------------------------------------------------ + +Userspace driver can use pci sysfs interface, or the libpci library that +wraps it, to talk to the device and to re-enable interrupts by writing +to the command register. + +Example code using uio_pci_generic +---------------------------------- + +Here is some sample userspace driver code using uio_pci_generic:: + + #include <stdlib.h> + #include <stdio.h> + #include <unistd.h> + #include <sys/types.h> + #include <sys/stat.h> + #include <fcntl.h> + #include <errno.h> + + int main() + { + int uiofd; + int configfd; + int err; + int i; + unsigned icount; + unsigned char command_high; + + uiofd = open("/dev/uio0", O_RDONLY); + if (uiofd < 0) { + perror("uio open:"); + return errno; + } + configfd = open("/sys/class/uio/uio0/device/config", O_RDWR); + if (configfd < 0) { + perror("config open:"); + return errno; + } + + /* Read and cache command value */ + err = pread(configfd, &command_high, 1, 5); + if (err != 1) { + perror("command config read:"); + return errno; + } + command_high &= ~0x4; + + for(i = 0;; ++i) { + /* Print out a message, for debugging. */ + if (i == 0) + fprintf(stderr, "Started uio test driver.\n"); + else + fprintf(stderr, "Interrupts: %d\n", icount); + + /****************************************/ + /* Here we got an interrupt from the + device. Do something to it. */ + /****************************************/ + + /* Re-enable interrupts. */ + err = pwrite(configfd, &command_high, 1, 5); + if (err != 1) { + perror("config write:"); + break; + } + + /* Wait for next interrupt. */ + err = read(uiofd, &icount, 4); + if (err != 4) { + perror("uio read:"); + break; + } + + } + return errno; + } + +Generic Hyper-V UIO driver +========================== + +The generic driver is a kernel module named uio_hv_generic. It +supports devices on the Hyper-V VMBus similar to uio_pci_generic on +PCI bus. + +Making the driver recognize the device +-------------------------------------- + +Since the driver does not declare any device GUID's, it will not get +loaded automatically and will not automatically bind to any devices, you +must load it and allocate id to the driver yourself. For example, to use +the network device class GUID:: + + modprobe uio_hv_generic + echo "f8615163-df3e-46c5-913f-f2d2f965ed0e" > /sys/bus/vmbus/drivers/uio_hv_generic/new_id + +If there already is a hardware specific kernel driver for the device, +the generic driver still won't bind to it, in this case if you want to +use the generic driver for a userspace library you'll have to manually unbind +the hardware specific driver and bind the generic driver, using the device specific GUID +like this:: + + echo -n ed963694-e847-4b2a-85af-bc9cfc11d6f3 > /sys/bus/vmbus/drivers/hv_netvsc/unbind + echo -n ed963694-e847-4b2a-85af-bc9cfc11d6f3 > /sys/bus/vmbus/drivers/uio_hv_generic/bind + +You can verify that the device has been bound to the driver by looking +for it in sysfs, for example like the following:: + + ls -l /sys/bus/vmbus/devices/ed963694-e847-4b2a-85af-bc9cfc11d6f3/driver + +Which if successful should print:: + + .../ed963694-e847-4b2a-85af-bc9cfc11d6f3/driver -> ../../../bus/vmbus/drivers/uio_hv_generic + +Things to know about uio_hv_generic +----------------------------------- + +On each interrupt, uio_hv_generic sets the Interrupt Disable bit. This +prevents the device from generating further interrupts until the bit is +cleared. The userspace driver should clear this bit before blocking and +waiting for more interrupts. + +When host rescinds a device, the interrupt file descriptor is marked down +and any reads of the interrupt file descriptor will return -EIO. Similar +to a closed socket or disconnected serial device. + +The vmbus device regions are mapped into uio device resources: + 0) Channel ring buffers: guest to host and host to guest + 1) Guest to host interrupt signalling pages + 2) Guest to host monitor page + 3) Network receive buffer region + 4) Network send buffer region + +If a subchannel is created by a request to host, then the uio_hv_generic +device driver will create a sysfs binary file for the per-channel ring buffer. +For example:: + + /sys/bus/vmbus/devices/3811fe4d-0fa0-4b62-981a-74fc1084c757/channels/21/ring + +Further information +=================== + +- `OSADL homepage. <http://www.osadl.org>`_ + +- `Linutronix homepage. <http://www.linutronix.de>`_ diff --git a/Documentation/driver-api/usb/URB.rst b/Documentation/driver-api/usb/URB.rst new file mode 100644 index 0000000000..a182c0f5e3 --- /dev/null +++ b/Documentation/driver-api/usb/URB.rst @@ -0,0 +1,290 @@ +.. _usb-urb: + +USB Request Block (URB) +~~~~~~~~~~~~~~~~~~~~~~~ + +:Revised: 2000-Dec-05 +:Again: 2002-Jul-06 +:Again: 2005-Sep-19 +:Again: 2017-Mar-29 + + +.. note:: + + The USB subsystem now has a substantial section at :ref:`usb-hostside-api` + section, generated from the current source code. + This particular documentation file isn't complete and may not be + updated to the last version; don't rely on it except for a quick + overview. + +Basic concept or 'What is an URB?' +================================== + +The basic idea of the new driver is message passing, the message itself is +called USB Request Block, or URB for short. + +- An URB consists of all relevant information to execute any USB transaction + and deliver the data and status back. + +- Execution of an URB is inherently an asynchronous operation, i.e. the + :c:func:`usb_submit_urb` call returns immediately after it has successfully + queued the requested action. + +- Transfers for one URB can be canceled with :c:func:`usb_unlink_urb` + at any time. + +- Each URB has a completion handler, which is called after the action + has been successfully completed or canceled. The URB also contains a + context-pointer for passing information to the completion handler. + +- Each endpoint for a device logically supports a queue of requests. + You can fill that queue, so that the USB hardware can still transfer + data to an endpoint while your driver handles completion of another. + This maximizes use of USB bandwidth, and supports seamless streaming + of data to (or from) devices when using periodic transfer modes. + + +The URB structure +================= + +Some of the fields in struct urb are:: + + struct urb + { + // (IN) device and pipe specify the endpoint queue + struct usb_device *dev; // pointer to associated USB device + unsigned int pipe; // endpoint information + + unsigned int transfer_flags; // URB_ISO_ASAP, URB_SHORT_NOT_OK, etc. + + // (IN) all urbs need completion routines + void *context; // context for completion routine + usb_complete_t complete; // pointer to completion routine + + // (OUT) status after each completion + int status; // returned status + + // (IN) buffer used for data transfers + void *transfer_buffer; // associated data buffer + u32 transfer_buffer_length; // data buffer length + int number_of_packets; // size of iso_frame_desc + + // (OUT) sometimes only part of CTRL/BULK/INTR transfer_buffer is used + u32 actual_length; // actual data buffer length + + // (IN) setup stage for CTRL (pass a struct usb_ctrlrequest) + unsigned char *setup_packet; // setup packet (control only) + + // Only for PERIODIC transfers (ISO, INTERRUPT) + // (IN/OUT) start_frame is set unless URB_ISO_ASAP isn't set + int start_frame; // start frame + int interval; // polling interval + + // ISO only: packets are only "best effort"; each can have errors + int error_count; // number of errors + struct usb_iso_packet_descriptor iso_frame_desc[0]; + }; + +Your driver must create the "pipe" value using values from the appropriate +endpoint descriptor in an interface that it's claimed. + + +How to get an URB? +================== + +URBs are allocated by calling :c:func:`usb_alloc_urb`:: + + struct urb *usb_alloc_urb(int isoframes, int mem_flags) + +Return value is a pointer to the allocated URB, 0 if allocation failed. +The parameter isoframes specifies the number of isochronous transfer frames +you want to schedule. For CTRL/BULK/INT, use 0. The mem_flags parameter +holds standard memory allocation flags, letting you control (among other +things) whether the underlying code may block or not. + +To free an URB, use :c:func:`usb_free_urb`:: + + void usb_free_urb(struct urb *urb) + +You may free an urb that you've submitted, but which hasn't yet been +returned to you in a completion callback. It will automatically be +deallocated when it is no longer in use. + + +What has to be filled in? +========================= + +Depending on the type of transaction, there are some inline functions +defined in ``linux/usb.h`` to simplify the initialization, such as +:c:func:`usb_fill_control_urb`, :c:func:`usb_fill_bulk_urb` and +:c:func:`usb_fill_int_urb`. In general, they need the usb device pointer, +the pipe (usual format from usb.h), the transfer buffer, the desired transfer +length, the completion handler, and its context. Take a look at the some +existing drivers to see how they're used. + +Flags: + +- For ISO there are two startup behaviors: Specified start_frame or ASAP. +- For ASAP set ``URB_ISO_ASAP`` in transfer_flags. + +If short packets should NOT be tolerated, set ``URB_SHORT_NOT_OK`` in +transfer_flags. + + +How to submit an URB? +===================== + +Just call :c:func:`usb_submit_urb`:: + + int usb_submit_urb(struct urb *urb, int mem_flags) + +The ``mem_flags`` parameter, such as ``GFP_ATOMIC``, controls memory +allocation, such as whether the lower levels may block when memory is tight. + +It immediately returns, either with status 0 (request queued) or some +error code, usually caused by the following: + +- Out of memory (``-ENOMEM``) +- Unplugged device (``-ENODEV``) +- Stalled endpoint (``-EPIPE``) +- Too many queued ISO transfers (``-EAGAIN``) +- Too many requested ISO frames (``-EFBIG``) +- Invalid INT interval (``-EINVAL``) +- More than one packet for INT (``-EINVAL``) + +After submission, ``urb->status`` is ``-EINPROGRESS``; however, you should +never look at that value except in your completion callback. + +For isochronous endpoints, your completion handlers should (re)submit +URBs to the same endpoint with the ``URB_ISO_ASAP`` flag, using +multi-buffering, to get seamless ISO streaming. + + +How to cancel an already running URB? +===================================== + +There are two ways to cancel an URB you've submitted but which hasn't +been returned to your driver yet. For an asynchronous cancel, call +:c:func:`usb_unlink_urb`:: + + int usb_unlink_urb(struct urb *urb) + +It removes the urb from the internal list and frees all allocated +HW descriptors. The status is changed to reflect unlinking. Note +that the URB will not normally have finished when :c:func:`usb_unlink_urb` +returns; you must still wait for the completion handler to be called. + +To cancel an URB synchronously, call :c:func:`usb_kill_urb`:: + + void usb_kill_urb(struct urb *urb) + +It does everything :c:func:`usb_unlink_urb` does, and in addition it waits +until after the URB has been returned and the completion handler +has finished. It also marks the URB as temporarily unusable, so +that if the completion handler or anyone else tries to resubmit it +they will get a ``-EPERM`` error. Thus you can be sure that when +:c:func:`usb_kill_urb` returns, the URB is totally idle. + +There is a lifetime issue to consider. An URB may complete at any +time, and the completion handler may free the URB. If this happens +while :c:func:`usb_unlink_urb` or :c:func:`usb_kill_urb` is running, it will +cause a memory-access violation. The driver is responsible for avoiding this, +which often means some sort of lock will be needed to prevent the URB +from being deallocated while it is still in use. + +On the other hand, since usb_unlink_urb may end up calling the +completion handler, the handler must not take any lock that is held +when usb_unlink_urb is invoked. The general solution to this problem +is to increment the URB's reference count while holding the lock, then +drop the lock and call usb_unlink_urb or usb_kill_urb, and then +decrement the URB's reference count. You increment the reference +count by calling :c:func`usb_get_urb`:: + + struct urb *usb_get_urb(struct urb *urb) + +(ignore the return value; it is the same as the argument) and +decrement the reference count by calling :c:func:`usb_free_urb`. Of course, +none of this is necessary if there's no danger of the URB being freed +by the completion handler. + + +What about the completion handler? +================================== + +The handler is of the following type:: + + typedef void (*usb_complete_t)(struct urb *) + +I.e., it gets the URB that caused the completion call. In the completion +handler, you should have a look at ``urb->status`` to detect any USB errors. +Since the context parameter is included in the URB, you can pass +information to the completion handler. + +Note that even when an error (or unlink) is reported, data may have been +transferred. That's because USB transfers are packetized; it might take +sixteen packets to transfer your 1KByte buffer, and ten of them might +have transferred successfully before the completion was called. + + +.. warning:: + + NEVER SLEEP IN A COMPLETION HANDLER. + + These are often called in atomic context. + +In the current kernel, completion handlers run with local interrupts +disabled, but in the future this will be changed, so don't assume that +local IRQs are always disabled inside completion handlers. + +How to do isochronous (ISO) transfers? +====================================== + +Besides the fields present on a bulk transfer, for ISO, you also +have to set ``urb->interval`` to say how often to make transfers; it's +often one per frame (which is once every microframe for highspeed devices). +The actual interval used will be a power of two that's no bigger than what +you specify. You can use the :c:func:`usb_fill_int_urb` macro to fill +most ISO transfer fields. + +For ISO transfers you also have to fill a :c:type:`usb_iso_packet_descriptor` +structure, allocated at the end of the URB by :c:func:`usb_alloc_urb`, for +each packet you want to schedule. + +The :c:func:`usb_submit_urb` call modifies ``urb->interval`` to the implemented +interval value that is less than or equal to the requested interval value. If +``URB_ISO_ASAP`` scheduling is used, ``urb->start_frame`` is also updated. + +For each entry you have to specify the data offset for this frame (base is +transfer_buffer), and the length you want to write/expect to read. +After completion, actual_length contains the actual transferred length and +status contains the resulting status for the ISO transfer for this frame. +It is allowed to specify a varying length from frame to frame (e.g. for +audio synchronisation/adaptive transfer rates). You can also use the length +0 to omit one or more frames (striping). + +For scheduling you can choose your own start frame or ``URB_ISO_ASAP``. As +explained earlier, if you always keep at least one URB queued and your +completion keeps (re)submitting a later URB, you'll get smooth ISO streaming +(if usb bandwidth utilization allows). + +If you specify your own start frame, make sure it's several frames in advance +of the current frame. You might want this model if you're synchronizing +ISO data with some other event stream. + + +How to start interrupt (INT) transfers? +======================================= + +Interrupt transfers, like isochronous transfers, are periodic, and happen +in intervals that are powers of two (1, 2, 4 etc) units. Units are frames +for full and low speed devices, and microframes for high speed ones. +You can use the :c:func:`usb_fill_int_urb` macro to fill INT transfer fields. + +The :c:func:`usb_submit_urb` call modifies ``urb->interval`` to the implemented +interval value that is less than or equal to the requested interval value. + +In Linux 2.6, unlike earlier versions, interrupt URBs are not automagically +restarted when they complete. They end when the completion handler is +called, just like other URBs. If you want an interrupt URB to be restarted, +your completion handler must resubmit it. +s diff --git a/Documentation/driver-api/usb/anchors.rst b/Documentation/driver-api/usb/anchors.rst new file mode 100644 index 0000000000..4b248e691b --- /dev/null +++ b/Documentation/driver-api/usb/anchors.rst @@ -0,0 +1,83 @@ +USB Anchors +~~~~~~~~~~~ + +What is anchor? +=============== + +A USB driver needs to support some callbacks requiring +a driver to cease all IO to an interface. To do so, a +driver has to keep track of the URBs it has submitted +to know they've all completed or to call usb_kill_urb +for them. The anchor is a data structure takes care of +keeping track of URBs and provides methods to deal with +multiple URBs. + +Allocation and Initialisation +============================= + +There's no API to allocate an anchor. It is simply declared +as struct usb_anchor. :c:func:`init_usb_anchor` must be called to +initialise the data structure. + +Deallocation +============ + +Once it has no more URBs associated with it, the anchor can be +freed with normal memory management operations. + +Association and disassociation of URBs with anchors +=================================================== + +An association of URBs to an anchor is made by an explicit +call to :c:func:`usb_anchor_urb`. The association is maintained until +an URB is finished by (successful) completion. Thus disassociation +is automatic. A function is provided to forcibly finish (kill) +all URBs associated with an anchor. +Furthermore, disassociation can be made with :c:func:`usb_unanchor_urb` + +Operations on multitudes of URBs +================================ + +:c:func:`usb_kill_anchored_urbs` +-------------------------------- + +This function kills all URBs associated with an anchor. The URBs +are called in the reverse temporal order they were submitted. +This way no data can be reordered. + +:c:func:`usb_unlink_anchored_urbs` +---------------------------------- + + +This function unlinks all URBs associated with an anchor. The URBs +are processed in the reverse temporal order they were submitted. +This is similar to :c:func:`usb_kill_anchored_urbs`, but it will not sleep. +Therefore no guarantee is made that the URBs have been unlinked when +the call returns. They may be unlinked later but will be unlinked in +finite time. + +:c:func:`usb_scuttle_anchored_urbs` +----------------------------------- + +All URBs of an anchor are unanchored en masse. + +:c:func:`usb_wait_anchor_empty_timeout` +--------------------------------------- + +This function waits for all URBs associated with an anchor to finish +or a timeout, whichever comes first. Its return value will tell you +whether the timeout was reached. + +:c:func:`usb_anchor_empty` +-------------------------- + +Returns true if no URBs are associated with an anchor. Locking +is the caller's responsibility. + +:c:func:`usb_get_from_anchor` +----------------------------- + +Returns the oldest anchored URB of an anchor. The URB is unanchored +and returned with a reference. As you may mix URBs to several +destinations in one anchor you have no guarantee the chronologically +first submitted URB is returned. diff --git a/Documentation/driver-api/usb/bulk-streams.rst b/Documentation/driver-api/usb/bulk-streams.rst new file mode 100644 index 0000000000..eeefe582f8 --- /dev/null +++ b/Documentation/driver-api/usb/bulk-streams.rst @@ -0,0 +1,83 @@ +USB bulk streams +~~~~~~~~~~~~~~~~ + +Background +========== + +Bulk endpoint streams were added in the USB 3.0 specification. Streams allow a +device driver to overload a bulk endpoint so that multiple transfers can be +queued at once. + +Streams are defined in sections 4.4.6.4 and 8.12.1.4 of the Universal Serial Bus +3.0 specification at https://www.usb.org/developers/docs/ The USB Attached SCSI +Protocol, which uses streams to queue multiple SCSI commands, can be found on +the T10 website (https://t10.org/). + + +Device-side implications +======================== + +Once a buffer has been queued to a stream ring, the device is notified (through +an out-of-band mechanism on another endpoint) that data is ready for that stream +ID. The device then tells the host which "stream" it wants to start. The host +can also initiate a transfer on a stream without the device asking, but the +device can refuse that transfer. Devices can switch between streams at any +time. + + +Driver implications +=================== + +:: + + int usb_alloc_streams(struct usb_interface *interface, + struct usb_host_endpoint **eps, unsigned int num_eps, + unsigned int num_streams, gfp_t mem_flags); + +Device drivers will call this API to request that the host controller driver +allocate memory so the driver can use up to num_streams stream IDs. They must +pass an array of usb_host_endpoints that need to be setup with similar stream +IDs. This is to ensure that a UASP driver will be able to use the same stream +ID for the bulk IN and OUT endpoints used in a Bi-directional command sequence. + +The return value is an error condition (if one of the endpoints doesn't support +streams, or the xHCI driver ran out of memory), or the number of streams the +host controller allocated for this endpoint. The xHCI host controller hardware +declares how many stream IDs it can support, and each bulk endpoint on a +SuperSpeed device will say how many stream IDs it can handle. Therefore, +drivers should be able to deal with being allocated less stream IDs than they +requested. + +Do NOT call this function if you have URBs enqueued for any of the endpoints +passed in as arguments. Do not call this function to request less than two +streams. + +Drivers will only be allowed to call this API once for the same endpoint +without calling usb_free_streams(). This is a simplification for the xHCI host +controller driver, and may change in the future. + + +Picking new Stream IDs to use +============================= + +Stream ID 0 is reserved, and should not be used to communicate with devices. If +usb_alloc_streams() returns with a value of N, you may use streams 1 though N. +To queue an URB for a specific stream, set the urb->stream_id value. If the +endpoint does not support streams, an error will be returned. + +Note that new API to choose the next stream ID will have to be added if the xHCI +driver supports secondary stream IDs. + + +Clean up +======== + +If a driver wishes to stop using streams to communicate with the device, it +should call:: + + void usb_free_streams(struct usb_interface *interface, + struct usb_host_endpoint **eps, unsigned int num_eps, + gfp_t mem_flags); + +All stream IDs will be deallocated when the driver releases the interface, to +ensure that drivers that don't support streams will be able to use the endpoint. diff --git a/Documentation/driver-api/usb/callbacks.rst b/Documentation/driver-api/usb/callbacks.rst new file mode 100644 index 0000000000..2b80cf54bc --- /dev/null +++ b/Documentation/driver-api/usb/callbacks.rst @@ -0,0 +1,157 @@ +USB core callbacks +~~~~~~~~~~~~~~~~~~ + +What callbacks will usbcore do? +=============================== + +Usbcore will call into a driver through callbacks defined in the driver +structure and through the completion handler of URBs a driver submits. +Only the former are in the scope of this document. These two kinds of +callbacks are completely independent of each other. Information on the +completion callback can be found in :ref:`usb-urb`. + +The callbacks defined in the driver structure are: + +1. Hotplugging callbacks: + + - @probe: + Called to see if the driver is willing to manage a particular + interface on a device. + + - @disconnect: + Called when the interface is no longer accessible, usually + because its device has been (or is being) disconnected or the + driver module is being unloaded. + +2. Odd backdoor through usbfs: + + - @ioctl: + Used for drivers that want to talk to userspace through + the "usbfs" filesystem. This lets devices provide ways to + expose information to user space regardless of where they + do (or don't) show up otherwise in the filesystem. + +3. Power management (PM) callbacks: + + - @suspend: + Called when the device is going to be suspended. + + - @resume: + Called when the device is being resumed. + + - @reset_resume: + Called when the suspended device has been reset instead + of being resumed. + +4. Device level operations: + + - @pre_reset: + Called when the device is about to be reset. + + - @post_reset: + Called after the device has been reset + +The ioctl interface (2) should be used only if you have a very good +reason. Sysfs is preferred these days. The PM callbacks are covered +separately in :ref:`usb-power-management`. + +Calling conventions +=================== + +All callbacks are mutually exclusive. There's no need for locking +against other USB callbacks. All callbacks are called from a task +context. You may sleep. However, it is important that all sleeps have a +small fixed upper limit in time. In particular you must not call out to +user space and await results. + +Hotplugging callbacks +===================== + +These callbacks are intended to associate and disassociate a driver with +an interface. A driver's bond to an interface is exclusive. + +The probe() callback +-------------------- + +:: + + int (*probe) (struct usb_interface *intf, + const struct usb_device_id *id); + +Accept or decline an interface. If you accept the device return 0, +otherwise -ENODEV or -ENXIO. Other error codes should be used only if a +genuine error occurred during initialisation which prevented a driver +from accepting a device that would else have been accepted. +You are strongly encouraged to use usbcore's facility, +usb_set_intfdata(), to associate a data structure with an interface, so +that you know which internal state and identity you associate with a +particular interface. The device will not be suspended and you may do IO +to the interface you are called for and endpoint 0 of the device. Device +initialisation that doesn't take too long is a good idea here. + +The disconnect() callback +------------------------- + +:: + + void (*disconnect) (struct usb_interface *intf); + +This callback is a signal to break any connection with an interface. +You are not allowed any IO to a device after returning from this +callback. You also may not do any other operation that may interfere +with another driver bound the interface, eg. a power management +operation. +If you are called due to a physical disconnection, all your URBs will be +killed by usbcore. Note that in this case disconnect will be called some +time after the physical disconnection. Thus your driver must be prepared +to deal with failing IO even prior to the callback. + +Device level callbacks +====================== + +pre_reset +--------- + +:: + + int (*pre_reset)(struct usb_interface *intf); + +A driver or user space is triggering a reset on the device which +contains the interface passed as an argument. Cease IO, wait for all +outstanding URBs to complete, and save any device state you need to +restore. No more URBs may be submitted until the post_reset method +is called. + +If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you +are in atomic context. + +post_reset +---------- + +:: + + int (*post_reset)(struct usb_interface *intf); + +The reset has completed. Restore any saved device state and begin +using the device again. + +If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you +are in atomic context. + +Call sequences +============== + +No callbacks other than probe will be invoked for an interface +that isn't bound to your driver. + +Probe will never be called for an interface bound to a driver. +Hence following a successful probe, disconnect will be called +before there is another probe for the same interface. + +Once your driver is bound to an interface, disconnect can be +called at any time except in between pre_reset and post_reset. +pre_reset is always followed by post_reset, even if the reset +failed or the device has been unplugged. + +suspend is always followed by one of: resume, reset_resume, or +disconnect. diff --git a/Documentation/driver-api/usb/dma.rst b/Documentation/driver-api/usb/dma.rst new file mode 100644 index 0000000000..d32c27e11b --- /dev/null +++ b/Documentation/driver-api/usb/dma.rst @@ -0,0 +1,136 @@ +USB DMA +~~~~~~~ + +In Linux 2.5 kernels (and later), USB device drivers have additional control +over how DMA may be used to perform I/O operations. The APIs are detailed +in the kernel usb programming guide (kerneldoc, from the source code). + +API overview +============ + +The big picture is that USB drivers can continue to ignore most DMA issues, +though they still must provide DMA-ready buffers (see +Documentation/core-api/dma-api-howto.rst). That's how they've worked through +the 2.4 (and earlier) kernels, or they can now be DMA-aware. + +DMA-aware usb drivers: + +- New calls enable DMA-aware drivers, letting them allocate dma buffers and + manage dma mappings for existing dma-ready buffers (see below). + +- URBs have an additional "transfer_dma" field, as well as a transfer_flags + bit saying if it's valid. (Control requests also have "setup_dma", but + drivers must not use it.) + +- "usbcore" will map this DMA address, if a DMA-aware driver didn't do + it first and set ``URB_NO_TRANSFER_DMA_MAP``. HCDs + don't manage dma mappings for URBs. + +- There's a new "generic DMA API", parts of which are usable by USB device + drivers. Never use dma_set_mask() on any USB interface or device; that + would potentially break all devices sharing that bus. + +Eliminating copies +================== + +It's good to avoid making CPUs copy data needlessly. The costs can add up, +and effects like cache-trashing can impose subtle penalties. + +- If you're doing lots of small data transfers from the same buffer all + the time, that can really burn up resources on systems which use an + IOMMU to manage the DMA mappings. It can cost MUCH more to set up and + tear down the IOMMU mappings with each request than perform the I/O! + + For those specific cases, USB has primitives to allocate less expensive + memory. They work like kmalloc and kfree versions that give you the right + kind of addresses to store in urb->transfer_buffer and urb->transfer_dma. + You'd also set ``URB_NO_TRANSFER_DMA_MAP`` in urb->transfer_flags:: + + void *usb_alloc_coherent (struct usb_device *dev, size_t size, + int mem_flags, dma_addr_t *dma); + + void usb_free_coherent (struct usb_device *dev, size_t size, + void *addr, dma_addr_t dma); + + Most drivers should **NOT** be using these primitives; they don't need + to use this type of memory ("dma-coherent"), and memory returned from + :c:func:`kmalloc` will work just fine. + + The memory buffer returned is "dma-coherent"; sometimes you might need to + force a consistent memory access ordering by using memory barriers. It's + not using a streaming DMA mapping, so it's good for small transfers on + systems where the I/O would otherwise thrash an IOMMU mapping. (See + Documentation/core-api/dma-api-howto.rst for definitions of "coherent" and + "streaming" DMA mappings.) + + Asking for 1/Nth of a page (as well as asking for N pages) is reasonably + space-efficient. + + On most systems the memory returned will be uncached, because the + semantics of dma-coherent memory require either bypassing CPU caches + or using cache hardware with bus-snooping support. While x86 hardware + has such bus-snooping, many other systems use software to flush cache + lines to prevent DMA conflicts. + +- Devices on some EHCI controllers could handle DMA to/from high memory. + + Unfortunately, the current Linux DMA infrastructure doesn't have a sane + way to expose these capabilities ... and in any case, HIGHMEM is mostly a + design wart specific to x86_32. So your best bet is to ensure you never + pass a highmem buffer into a USB driver. That's easy; it's the default + behavior. Just don't override it; e.g. with ``NETIF_F_HIGHDMA``. + + This may force your callers to do some bounce buffering, copying from + high memory to "normal" DMA memory. If you can come up with a good way + to fix this issue (for x86_32 machines with over 1 GByte of memory), + feel free to submit patches. + +Working with existing buffers +============================= + +Existing buffers aren't usable for DMA without first being mapped into the +DMA address space of the device. However, most buffers passed to your +driver can safely be used with such DMA mapping. (See the first section +of Documentation/core-api/dma-api-howto.rst, titled "What memory is DMA-able?") + +- When you're using scatterlists, you can map everything at once. On some + systems, this kicks in an IOMMU and turns the scatterlists into single + DMA transactions:: + + int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int nents); + + void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents); + + void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents); + + It's probably easier to use the new ``usb_sg_*()`` calls, which do the DMA + mapping and apply other tweaks to make scatterlist i/o be fast. + +- Some drivers may prefer to work with the model that they're mapping large + buffers, synchronizing their safe re-use. (If there's no re-use, then let + usbcore do the map/unmap.) Large periodic transfers make good examples + here, since it's cheaper to just synchronize the buffer than to unmap it + each time an urb completes and then re-map it on during resubmission. + + These calls all work with initialized urbs: ``urb->dev``, ``urb->pipe``, + ``urb->transfer_buffer``, and ``urb->transfer_buffer_length`` must all be + valid when these calls are used (``urb->setup_packet`` must be valid too + if urb is a control request):: + + struct urb *usb_buffer_map (struct urb *urb); + + void usb_buffer_dmasync (struct urb *urb); + + void usb_buffer_unmap (struct urb *urb); + + The calls manage ``urb->transfer_dma`` for you, and set + ``URB_NO_TRANSFER_DMA_MAP`` so that usbcore won't map or unmap the buffer. + They cannot be used for setup_packet buffers in control requests. + +Note that several of those interfaces are currently commented out, since +they don't have current users. See the source code. Other than the dmasync +calls (where the underlying DMA primitives have changed), most of them can +easily be commented back in if you want to use them. diff --git a/Documentation/driver-api/usb/dwc3.rst b/Documentation/driver-api/usb/dwc3.rst new file mode 100644 index 0000000000..e3d6a62099 --- /dev/null +++ b/Documentation/driver-api/usb/dwc3.rst @@ -0,0 +1,711 @@ +=============================================================== +Synopsys DesignWare Core SuperSpeed USB 3.0 Controller +=============================================================== + +:Author: Felipe Balbi <felipe.balbi@linux.intel.com> +:Date: April 2017 + +Introduction +============ + +The *Synopsys DesignWare Core SuperSpeed USB 3.0 Controller* +(hereinafter referred to as *DWC3*) is a USB SuperSpeed compliant +controller which can be configured in one of 4 ways: + + 1. Peripheral-only configuration + 2. Host-only configuration + 3. Dual-Role configuration + 4. Hub configuration + +Linux currently supports several versions of this controller. In all +likelihood, the version in your SoC is already supported. At the time +of this writing, known tested versions range from 2.02a to 3.10a. As a +rule of thumb, anything above 2.02a should work reliably well. + +Currently, we have many known users for this driver. In alphabetical +order: + + 1. Cavium + 2. Intel Corporation + 3. Qualcomm + 4. Rockchip + 5. ST + 6. Samsung + 7. Texas Instruments + 8. Xilinx + +Summary of Features +====================== + +For details about features supported by your version of DWC3, consult +your IP team and/or *Synopsys DesignWare Core SuperSpeed USB 3.0 +Controller Databook*. Following is a list of features supported by the +driver at the time of this writing: + + 1. Up to 16 bidirectional endpoints (including the control + pipe - ep0) + 2. Flexible endpoint configuration + 3. Simultaneous IN and OUT transfer support + 4. Scatter-list support + 5. Up to 256 TRBs [#trb]_ per endpoint + 6. Support for all transfer types (*Control*, *Bulk*, + *Interrupt*, and *Isochronous*) + 7. SuperSpeed Bulk Streams + 8. Link Power Management + 9. Trace Events for debugging + 10. DebugFS [#debugfs]_ interface + +These features have all been exercised with many of the **in-tree** +gadget drivers. We have verified both *ConfigFS* [#configfs]_ and +legacy gadget drivers. + +Driver Design +============== + +The DWC3 driver sits on the *drivers/usb/dwc3/* directory. All files +related to this driver are in this one directory. This makes it easy +for new-comers to read the code and understand how it behaves. + +Because of DWC3's configuration flexibility, the driver is a little +complex in some places but it should be rather straightforward to +understand. + +The biggest part of the driver refers to the Gadget API. + +Known Limitations +=================== + +Like any other HW, DWC3 has its own set of limitations. To avoid +constant questions about such problems, we decided to document them +here and have a single location to where we could point users. + +OUT Transfer Size Requirements +--------------------------------- + +According to Synopsys Databook, all OUT transfer TRBs [#trb]_ must +have their *size* field set to a value which is integer divisible by +the endpoint's *wMaxPacketSize*. This means that *e.g.* in order to +receive a Mass Storage *CBW* [#cbw]_, req->length must either be set +to a value that's divisible by *wMaxPacketSize* (1024 on SuperSpeed, +512 on HighSpeed, etc), or DWC3 driver must add a Chained TRB pointing +to a throw-away buffer for the remaining length. Without this, OUT +transfers will **NOT** start. + +Note that as of this writing, this won't be a problem because DWC3 is +fully capable of appending a chained TRB for the remaining length and +completely hide this detail from the gadget driver. It's still worth +mentioning because this seems to be the largest source of queries +about DWC3 and *non-working transfers*. + +TRB Ring Size Limitation +------------------------- + +We, currently, have a hard limit of 256 TRBs [#trb]_ per endpoint, +with the last TRB being a Link TRB [#link_trb]_ pointing back to the +first. This limit is arbitrary but it has the benefit of adding up to +exactly 4096 bytes, or 1 Page. + +DWC3 driver will try its best to cope with more than 255 requests and, +for the most part, it should work normally. However this is not +something that has been exercised very frequently. If you experience +any problems, see section **Reporting Bugs** below. + +Reporting Bugs +================ + +Whenever you encounter a problem with DWC3, first and foremost you +should make sure that: + + 1. You're running latest tag from `Linus' tree`_ + 2. You can reproduce the error without any out-of-tree changes + to DWC3 + 3. You have checked that it's not a fault on the host machine + +After all these are verified, then here's how to capture enough +information so we can be of any help to you. + +Required Information +--------------------- + +DWC3 relies exclusively on Trace Events for debugging. Everything is +exposed there, with some extra bits being exposed to DebugFS +[#debugfs]_. + +In order to capture DWC3's Trace Events you should run the following +commands **before** plugging the USB cable to a host machine: + +.. code-block:: sh + + # mkdir -p /d + # mkdir -p /t + # mount -t debugfs none /d + # mount -t tracefs none /t + # echo 81920 > /t/buffer_size_kb + # echo 1 > /t/events/dwc3/enable + +After this is done, you can connect your USB cable and reproduce the +problem. As soon as the fault is reproduced, make a copy of files +``trace`` and ``regdump``, like so: + +.. code-block:: sh + + # cp /t/trace /root/trace.txt + # cat /d/*dwc3*/regdump > /root/regdump.txt + +Make sure to compress ``trace.txt`` and ``regdump.txt`` in a tarball +and email it to `me`_ with `linux-usb`_ in Cc. If you want to be extra +sure that I'll help you, write your subject line in the following +format: + + **[BUG REPORT] usb: dwc3: Bug while doing XYZ** + +On the email body, make sure to detail what you doing, which gadget +driver you were using, how to reproduce the problem, what SoC you're +using, which OS (and its version) was running on the Host machine. + +With all this information, we should be able to understand what's +going on and be helpful to you. + +Debugging +=========== + +First and foremost a disclaimer:: + + DISCLAIMER: The information available on DebugFS and/or TraceFS can + change at any time at any Major Linux Kernel Release. If writing + scripts, do **NOT** assume information to be available in the + current format. + +With that out of the way, let's carry on. + +If you're willing to debug your own problem, you deserve a round of +applause :-) + +Anyway, there isn't much to say here other than Trace Events will be +really helpful in figuring out issues with DWC3. Also, access to +Synopsys Databook will be **really** valuable in this case. + +A USB Sniffer can be helpful at times but it's not entirely required, +there's a lot that can be understood without looking at the wire. + +Feel free to email `me`_ and Cc `linux-usb`_ if you need any help. + +``DebugFS`` +------------- + +``DebugFS`` is very good for gathering snapshots of what's going on +with DWC3 and/or any endpoint. + +On DWC3's ``DebugFS`` directory, you will find the following files and +directories: + +``ep[0..15]{in,out}/`` +``link_state`` +``regdump`` +``testmode`` + +``link_state`` +`````````````` + +When read, ``link_state`` will print out one of ``U0``, ``U1``, +``U2``, ``U3``, ``SS.Disabled``, ``RX.Detect``, ``SS.Inactive``, +``Polling``, ``Recovery``, ``Hot Reset``, ``Compliance``, +``Loopback``, ``Reset``, ``Resume`` or ``UNKNOWN link state``. + +This file can also be written to in order to force link to one of the +states above. + +``regdump`` +````````````` + +File name is self-explanatory. When read, ``regdump`` will print out a +register dump of DWC3. Note that this file can be grepped to find the +information you want. + +``testmode`` +`````````````` + +When read, ``testmode`` will print out a name of one of the specified +USB 2.0 Testmodes (``test_j``, ``test_k``, ``test_se0_nak``, +``test_packet``, ``test_force_enable``) or the string ``no test`` in +case no tests are currently being executed. + +In order to start any of these test modes, the same strings can be +written to the file and DWC3 will enter the requested test mode. + + +``ep[0..15]{in,out}`` +`````````````````````` + +For each endpoint we expose one directory following the naming +convention ``ep$num$dir`` *(ep0in, ep0out, ep1in, ...)*. Inside each +of these directories you will find the following files: + +``descriptor_fetch_queue`` +``event_queue`` +``rx_fifo_queue`` +``rx_info_queue`` +``rx_request_queue`` +``transfer_type`` +``trb_ring`` +``tx_fifo_queue`` +``tx_request_queue`` + +With access to Synopsys Databook, you can decode the information on +them. + +``transfer_type`` +~~~~~~~~~~~~~~~~~~ + +When read, ``transfer_type`` will print out one of ``control``, +``bulk``, ``interrupt`` or ``isochronous`` depending on what the +endpoint descriptor says. If the endpoint hasn't been enabled yet, it +will print ``--``. + +``trb_ring`` +~~~~~~~~~~~~~ + +When read, ``trb_ring`` will print out details about all TRBs on the +ring. It will also tell you where our enqueue and dequeue pointers are +located in the ring: + +.. code-block:: sh + + buffer_addr,size,type,ioc,isp_imi,csp,chn,lst,hwo + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c75c000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c75c000,481,normal,1,0,1,0,0,0 + 000000002c784000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c784000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c784000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c784000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c75c000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c75c000,481,normal,1,0,1,0,0,0 + 000000002c780000,481,normal,1,0,1,0,0,0 + 000000002c784000,481,normal,1,0,1,0,0,0 + 000000002c788000,481,normal,1,0,1,0,0,0 + 000000002c78c000,481,normal,1,0,1,0,0,0 + 000000002c790000,481,normal,1,0,1,0,0,0 + 000000002c754000,481,normal,1,0,1,0,0,0 + 000000002c758000,481,normal,1,0,1,0,0,0 + 000000002c75c000,512,normal,1,0,1,0,0,1 D + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 E + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 0000000000000000,0,UNKNOWN,0,0,0,0,0,0 + 00000000381ab000,0,link,0,0,0,0,0,1 + + +Trace Events +------------- + +DWC3 also provides several trace events which help us gathering +information about the behavior of the driver during runtime. + +In order to use these events, you must enable ``CONFIG_FTRACE`` in +your kernel config. + +For details about how enable DWC3 events, see section **Reporting +Bugs**. + +The following subsections will give details about each Event Class and +each Event defined by DWC3. + +MMIO +``````` + +It is sometimes useful to look at every MMIO access when looking for +bugs. Because of that, DWC3 offers two Trace Events (one for +dwc3_readl() and one for dwc3_writel()). ``TP_printk`` follows:: + + TP_printk("addr %p value %08x", __entry->base + __entry->offset, + __entry->value) + +Interrupt Events +```````````````` + +Every IRQ event can be logged and decoded into a human readable +string. Because every event will be different, we don't give an +example other than the ``TP_printk`` format used:: + + TP_printk("event (%08x): %s", __entry->event, + dwc3_decode_event(__entry->event, __entry->ep0state)) + +Control Request +````````````````` + +Every USB Control Request can be logged to the trace buffer. The +output format is:: + + TP_printk("%s", dwc3_decode_ctrl(__entry->bRequestType, + __entry->bRequest, __entry->wValue, + __entry->wIndex, __entry->wLength) + ) + +Note that Standard Control Requests will be decoded into +human-readable strings with their respective arguments. Class and +Vendor requests will be printed out a sequence of 8 bytes in hex +format. + +Lifetime of a ``struct usb_request`` +``````````````````````````````````````` + +The entire lifetime of a ``struct usb_request`` can be tracked on the +trace buffer. We have one event for each of allocation, free, +queueing, dequeueing, and giveback. Output format is:: + + TP_printk("%s: req %p length %u/%u %s%s%s ==> %d", + __get_str(name), __entry->req, __entry->actual, __entry->length, + __entry->zero ? "Z" : "z", + __entry->short_not_ok ? "S" : "s", + __entry->no_interrupt ? "i" : "I", + __entry->status + ) + +Generic Commands +```````````````````` + +We can log and decode every Generic Command with its completion +code. Format is:: + + TP_printk("cmd '%s' [%x] param %08x --> status: %s", + dwc3_gadget_generic_cmd_string(__entry->cmd), + __entry->cmd, __entry->param, + dwc3_gadget_generic_cmd_status_string(__entry->status) + ) + +Endpoint Commands +```````````````````` + +Endpoints commands can also be logged together with completion +code. Format is:: + + TP_printk("%s: cmd '%s' [%d] params %08x %08x %08x --> status: %s", + __get_str(name), dwc3_gadget_ep_cmd_string(__entry->cmd), + __entry->cmd, __entry->param0, + __entry->param1, __entry->param2, + dwc3_ep_cmd_status_string(__entry->cmd_status) + ) + +Lifetime of a ``TRB`` +`````````````````````` + +A ``TRB`` Lifetime is simple. We are either preparing a ``TRB`` or +completing it. With these two events, we can see how a ``TRB`` changes +over time. Format is:: + + TP_printk("%s: %d/%d trb %p buf %08x%08x size %s%d ctrl %08x (%c%c%c%c:%c%c:%s)", + __get_str(name), __entry->queued, __entry->allocated, + __entry->trb, __entry->bph, __entry->bpl, + ({char *s; + int pcm = ((__entry->size >> 24) & 3) + 1; + switch (__entry->type) { + case USB_ENDPOINT_XFER_INT: + case USB_ENDPOINT_XFER_ISOC: + switch (pcm) { + case 1: + s = "1x "; + break; + case 2: + s = "2x "; + break; + case 3: + s = "3x "; + break; + } + default: + s = ""; + } s; }), + DWC3_TRB_SIZE_LENGTH(__entry->size), __entry->ctrl, + __entry->ctrl & DWC3_TRB_CTRL_HWO ? 'H' : 'h', + __entry->ctrl & DWC3_TRB_CTRL_LST ? 'L' : 'l', + __entry->ctrl & DWC3_TRB_CTRL_CHN ? 'C' : 'c', + __entry->ctrl & DWC3_TRB_CTRL_CSP ? 'S' : 's', + __entry->ctrl & DWC3_TRB_CTRL_ISP_IMI ? 'S' : 's', + __entry->ctrl & DWC3_TRB_CTRL_IOC ? 'C' : 'c', + dwc3_trb_type_string(DWC3_TRBCTL_TYPE(__entry->ctrl)) + ) + +Lifetime of an Endpoint +``````````````````````` + +And endpoint's lifetime is summarized with enable and disable +operations, both of which can be traced. Format is:: + + TP_printk("%s: mps %d/%d streams %d burst %d ring %d/%d flags %c:%c%c%c%c%c:%c:%c", + __get_str(name), __entry->maxpacket, + __entry->maxpacket_limit, __entry->max_streams, + __entry->maxburst, __entry->trb_enqueue, + __entry->trb_dequeue, + __entry->flags & DWC3_EP_ENABLED ? 'E' : 'e', + __entry->flags & DWC3_EP_STALL ? 'S' : 's', + __entry->flags & DWC3_EP_WEDGE ? 'W' : 'w', + __entry->flags & DWC3_EP_TRANSFER_STARTED ? 'B' : 'b', + __entry->flags & DWC3_EP_PENDING_REQUEST ? 'P' : 'p', + __entry->flags & DWC3_EP_END_TRANSFER_PENDING ? 'E' : 'e', + __entry->direction ? '<' : '>' + ) + + +Structures, Methods and Definitions +==================================== + +.. kernel-doc:: drivers/usb/dwc3/core.h + :doc: main data structures + :internal: + +.. kernel-doc:: drivers/usb/dwc3/gadget.h + :doc: gadget-only helpers + :internal: + +.. kernel-doc:: drivers/usb/dwc3/gadget.c + :doc: gadget-side implementation + :internal: + +.. kernel-doc:: drivers/usb/dwc3/core.c + :doc: core driver (probe, PM, etc) + :internal: + +.. [#trb] Transfer Request Block +.. [#link_trb] Transfer Request Block pointing to another Transfer + Request Block. +.. [#debugfs] The Debug File System +.. [#configfs] The Config File System +.. [#cbw] Command Block Wrapper +.. _Linus' tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ +.. _me: felipe.balbi@linux.intel.com +.. _linux-usb: linux-usb@vger.kernel.org diff --git a/Documentation/driver-api/usb/error-codes.rst b/Documentation/driver-api/usb/error-codes.rst new file mode 100644 index 0000000000..8f9790c2d6 --- /dev/null +++ b/Documentation/driver-api/usb/error-codes.rst @@ -0,0 +1,210 @@ +.. _usb-error-codes: + +USB Error codes +~~~~~~~~~~~~~~~ + +:Revised: 2004-Oct-21 + +This is the documentation of (hopefully) all possible error codes (and +their interpretation) that can be returned from usbcore. + +Some of them are returned by the Host Controller Drivers (HCDs), which +device drivers only see through usbcore. As a rule, all the HCDs should +behave the same except for transfer speed dependent behaviors and the +way certain faults are reported. + + +Error codes returned by :c:func:`usb_submit_urb` +================================================ + +Non-USB-specific: + + +=============== =============================================== +0 URB submission went fine + +``-ENOMEM`` no memory for allocation of internal structures +=============== =============================================== + +USB-specific: + +======================= ======================================================= +``-EBUSY`` The URB is already active. + +``-ENODEV`` specified USB-device or bus doesn't exist + +``-ENOENT`` specified interface or endpoint does not exist or + is not enabled + +``-ENXIO`` host controller driver does not support queuing of + this type of urb. (treat as a host controller bug.) + +``-EINVAL`` a) Invalid transfer type specified (or not supported) + b) Invalid or unsupported periodic transfer interval + c) ISO: attempted to change transfer interval + d) ISO: ``number_of_packets`` is < 0 + e) various other cases + +``-EXDEV`` ISO: ``URB_ISO_ASAP`` wasn't specified and all the + frames the URB would be scheduled in have already + expired. + +``-EFBIG`` Host controller driver can't schedule that many ISO + frames. + +``-EPIPE`` The pipe type specified in the URB doesn't match the + endpoint's actual type. + +``-EMSGSIZE`` (a) endpoint maxpacket size is zero; it is not usable + in the current interface altsetting. + (b) ISO packet is larger than the endpoint maxpacket. + (c) requested data transfer length is invalid: negative + or too large for the host controller. + +``-EBADR`` The wLength value in a control URB's setup packet does + not match the URB's transfer_buffer_length. + +``-ENOSPC`` This request would overcommit the usb bandwidth reserved + for periodic transfers (interrupt, isochronous). + +``-ESHUTDOWN`` The device or host controller has been disabled due to + some problem that could not be worked around. + +``-EPERM`` Submission failed because ``urb->reject`` was set. + +``-EHOSTUNREACH`` URB was rejected because the device is suspended. + +``-ENOEXEC`` A control URB doesn't contain a Setup packet. +======================= ======================================================= + +Error codes returned by ``in urb->status`` or in ``iso_frame_desc[n].status`` (for ISO) +======================================================================================= + +USB device drivers may only test urb status values in completion handlers. +This is because otherwise there would be a race between HCDs updating +these values on one CPU, and device drivers testing them on another CPU. + +A transfer's actual_length may be positive even when an error has been +reported. That's because transfers often involve several packets, so that +one or more packets could finish before an error stops further endpoint I/O. + +For isochronous URBs, the urb status value is non-zero only if the URB is +unlinked, the device is removed, the host controller is disabled, or the total +transferred length is less than the requested length and the +``URB_SHORT_NOT_OK`` flag is set. Completion handlers for isochronous URBs +should only see ``urb->status`` set to zero, ``-ENOENT``, ``-ECONNRESET``, +``-ESHUTDOWN``, or ``-EREMOTEIO``. Individual frame descriptor status fields +may report more status codes. + + +=============================== =============================================== +0 Transfer completed successfully + +``-ENOENT`` URB was synchronously unlinked by + :c:func:`usb_unlink_urb` + +``-EINPROGRESS`` URB still pending, no results yet + (That is, if drivers see this it's a bug.) + +``-EPROTO`` [#f1]_, [#f2]_ a) bitstuff error + b) no response packet received within the + prescribed bus turn-around time + c) unknown USB error + +``-EILSEQ`` [#f1]_, [#f2]_ a) CRC mismatch + b) no response packet received within the + prescribed bus turn-around time + c) unknown USB error + + Note that often the controller hardware does + not distinguish among cases a), b), and c), so + a driver cannot tell whether there was a + protocol error, a failure to respond (often + caused by device disconnect), or some other + fault. + +``-ETIME`` [#f2]_ No response packet received within the + prescribed bus turn-around time. This error + may instead be reported as + ``-EPROTO`` or ``-EILSEQ``. + +``-ETIMEDOUT`` Synchronous USB message functions use this code + to indicate timeout expired before the transfer + completed, and no other error was reported + by HC. + +``-EPIPE`` [#f2]_ Endpoint stalled. For non-control endpoints, + reset this status with + :c:func:`usb_clear_halt`. + +``-ECOMM`` During an IN transfer, the host controller + received data from an endpoint faster than it + could be written to system memory + +``-ENOSR`` During an OUT transfer, the host controller + could not retrieve data from system memory fast + enough to keep up with the USB data rate + +``-EOVERFLOW`` [#f1]_ The amount of data returned by the endpoint was + greater than either the max packet size of the + endpoint or the remaining buffer size. + "Babble". + +``-EREMOTEIO`` The data read from the endpoint did not fill + the specified buffer, and ``URB_SHORT_NOT_OK`` + was set in ``urb->transfer_flags``. + +``-ENODEV`` Device was removed. Often preceded by a burst + of other errors, since the hub driver doesn't + detect device removal events immediately. + +``-EXDEV`` ISO transfer only partially completed + (only set in ``iso_frame_desc[n].status``, + not ``urb->status``) + +``-EINVAL`` ISO madness, if this happens: Log off and + go home + +``-ECONNRESET`` URB was asynchronously unlinked by + :c:func:`usb_unlink_urb` + +``-ESHUTDOWN`` The device or host controller has been + disabled due to some problem that could not + be worked around, such as a physical + disconnect. +=============================== =============================================== + + +.. [#f1] + + Error codes like ``-EPROTO``, ``-EILSEQ`` and ``-EOVERFLOW`` normally + indicate hardware problems such as bad devices (including firmware) + or cables. + +.. [#f2] + + This is also one of several codes that different kinds of host + controller use to indicate a transfer has failed because of device + disconnect. In the interval before the hub driver starts disconnect + processing, devices may receive such fault reports for every request. + + + +Error codes returned by usbcore-functions +========================================= + +.. note:: expect also other submit and transfer status codes + +:c:func:`usb_register`: + +======================= =================================== +``-EINVAL`` error during registering new driver +======================= =================================== + +``usb_get_*/usb_set_*()``, +:c:func:`usb_control_msg`, +:c:func:`usb_bulk_msg()`: + +======================= ============================================== +``-ETIMEDOUT`` Timeout expired before the transfer completed. +======================= ============================================== diff --git a/Documentation/driver-api/usb/gadget.rst b/Documentation/driver-api/usb/gadget.rst new file mode 100644 index 0000000000..09396edd61 --- /dev/null +++ b/Documentation/driver-api/usb/gadget.rst @@ -0,0 +1,510 @@ +======================== +USB Gadget API for Linux +======================== + +:Author: David Brownell +:Date: 20 August 2004 + +Introduction +============ + +This document presents a Linux-USB "Gadget" kernel mode API, for use +within peripherals and other USB devices that embed Linux. It provides +an overview of the API structure, and shows how that fits into a system +development project. This is the first such API released on Linux to +address a number of important problems, including: + +- Supports USB 2.0, for high speed devices which can stream data at + several dozen megabytes per second. + +- Handles devices with dozens of endpoints just as well as ones with + just two fixed-function ones. Gadget drivers can be written so + they're easy to port to new hardware. + +- Flexible enough to expose more complex USB device capabilities such + as multiple configurations, multiple interfaces, composite devices, + and alternate interface settings. + +- USB "On-The-Go" (OTG) support, in conjunction with updates to the + Linux-USB host side. + +- Sharing data structures and API models with the Linux-USB host side + API. This helps the OTG support, and looks forward to more-symmetric + frameworks (where the same I/O model is used by both host and device + side drivers). + +- Minimalist, so it's easier to support new device controller hardware. + I/O processing doesn't imply large demands for memory or CPU + resources. + +Most Linux developers will not be able to use this API, since they have +USB ``host`` hardware in a PC, workstation, or server. Linux users with +embedded systems are more likely to have USB peripheral hardware. To +distinguish drivers running inside such hardware from the more familiar +Linux "USB device drivers", which are host side proxies for the real USB +devices, a different term is used: the drivers inside the peripherals +are "USB gadget drivers". In USB protocol interactions, the device +driver is the master (or "client driver") and the gadget driver is the +slave (or "function driver"). + +The gadget API resembles the host side Linux-USB API in that both use +queues of request objects to package I/O buffers, and those requests may +be submitted or canceled. They share common definitions for the standard +USB *Chapter 9* messages, structures, and constants. Also, both APIs +bind and unbind drivers to devices. The APIs differ in detail, since the +host side's current URB framework exposes a number of implementation +details and assumptions that are inappropriate for a gadget API. While +the model for control transfers and configuration management is +necessarily different (one side is a hardware-neutral master, the other +is a hardware-aware slave), the endpoint I/0 API used here should also +be usable for an overhead-reduced host side API. + +Structure of Gadget Drivers +=========================== + +A system running inside a USB peripheral normally has at least three +layers inside the kernel to handle USB protocol processing, and may have +additional layers in user space code. The ``gadget`` API is used by the +middle layer to interact with the lowest level (which directly handles +hardware). + +In Linux, from the bottom up, these layers are: + +*USB Controller Driver* + This is the lowest software level. It is the only layer that talks + to hardware, through registers, fifos, dma, irqs, and the like. The + ``<linux/usb/gadget.h>`` API abstracts the peripheral controller + endpoint hardware. That hardware is exposed through endpoint + objects, which accept streams of IN/OUT buffers, and through + callbacks that interact with gadget drivers. Since normal USB + devices only have one upstream port, they only have one of these + drivers. The controller driver can support any number of different + gadget drivers, but only one of them can be used at a time. + + Examples of such controller hardware include the PCI-based NetChip + 2280 USB 2.0 high speed controller, the SA-11x0 or PXA-25x UDC + (found within many PDAs), and a variety of other products. + +*Gadget Driver* + The lower boundary of this driver implements hardware-neutral USB + functions, using calls to the controller driver. Because such + hardware varies widely in capabilities and restrictions, and is used + in embedded environments where space is at a premium, the gadget + driver is often configured at compile time to work with endpoints + supported by one particular controller. Gadget drivers may be + portable to several different controllers, using conditional + compilation. (Recent kernels substantially simplify the work + involved in supporting new hardware, by *autoconfiguring* endpoints + automatically for many bulk-oriented drivers.) Gadget driver + responsibilities include: + + - handling setup requests (ep0 protocol responses) possibly + including class-specific functionality + + - returning configuration and string descriptors + + - (re)setting configurations and interface altsettings, including + enabling and configuring endpoints + + - handling life cycle events, such as managing bindings to + hardware, USB suspend/resume, remote wakeup, and disconnection + from the USB host. + + - managing IN and OUT transfers on all currently enabled endpoints + + Such drivers may be modules of proprietary code, although that + approach is discouraged in the Linux community. + +*Upper Level* + Most gadget drivers have an upper boundary that connects to some + Linux driver or framework in Linux. Through that boundary flows the + data which the gadget driver produces and/or consumes through + protocol transfers over USB. Examples include: + + - user mode code, using generic (gadgetfs) or application specific + files in ``/dev`` + + - networking subsystem (for network gadgets, like the CDC Ethernet + Model gadget driver) + + - data capture drivers, perhaps video4Linux or a scanner driver; or + test and measurement hardware. + + - input subsystem (for HID gadgets) + + - sound subsystem (for audio gadgets) + + - file system (for PTP gadgets) + + - block i/o subsystem (for usb-storage gadgets) + + - ... and more + +*Additional Layers* + Other layers may exist. These could include kernel layers, such as + network protocol stacks, as well as user mode applications building + on standard POSIX system call APIs such as ``open()``, ``close()``, + ``read()`` and ``write()``. On newer systems, POSIX Async I/O calls may + be an option. Such user mode code will not necessarily be subject to + the GNU General Public License (GPL). + +OTG-capable systems will also need to include a standard Linux-USB host +side stack, with ``usbcore``, one or more *Host Controller Drivers* +(HCDs), *USB Device Drivers* to support the OTG "Targeted Peripheral +List", and so forth. There will also be an *OTG Controller Driver*, +which is visible to gadget and device driver developers only indirectly. +That helps the host and device side USB controllers implement the two +new OTG protocols (HNP and SRP). Roles switch (host to peripheral, or +vice versa) using HNP during USB suspend processing, and SRP can be +viewed as a more battery-friendly kind of device wakeup protocol. + +Over time, reusable utilities are evolving to help make some gadget +driver tasks simpler. For example, building configuration descriptors +from vectors of descriptors for the configurations interfaces and +endpoints is now automated, and many drivers now use autoconfiguration +to choose hardware endpoints and initialize their descriptors. A +potential example of particular interest is code implementing standard +USB-IF protocols for HID, networking, storage, or audio classes. Some +developers are interested in KDB or KGDB hooks, to let target hardware +be remotely debugged. Most such USB protocol code doesn't need to be +hardware-specific, any more than network protocols like X11, HTTP, or +NFS are. Such gadget-side interface drivers should eventually be +combined, to implement composite devices. + +Kernel Mode Gadget API +====================== + +Gadget drivers declare themselves through a struct +:c:type:`usb_gadget_driver`, which is responsible for most parts of enumeration +for a struct usb_gadget. The response to a set_configuration usually +involves enabling one or more of the struct usb_ep objects exposed by +the gadget, and submitting one or more struct usb_request buffers to +transfer data. Understand those four data types, and their operations, +and you will understand how this API works. + +.. Note:: + + Other than the "Chapter 9" data types, most of the significant data + types and functions are described here. + + However, some relevant information is likely omitted from what you + are reading. One example of such information is endpoint + autoconfiguration. You'll have to read the header file, and use + example source code (such as that for "Gadget Zero"), to fully + understand the API. + + The part of the API implementing some basic driver capabilities is + specific to the version of the Linux kernel that's in use. The 2.6 + and upper kernel versions include a *driver model* framework that has + no analogue on earlier kernels; so those parts of the gadget API are + not fully portable. (They are implemented on 2.4 kernels, but in a + different way.) The driver model state is another part of this API that is + ignored by the kerneldoc tools. + +The core API does not expose every possible hardware feature, only the +most widely available ones. There are significant hardware features, +such as device-to-device DMA (without temporary storage in a memory +buffer) that would be added using hardware-specific APIs. + +This API allows drivers to use conditional compilation to handle +endpoint capabilities of different hardware, but doesn't require that. +Hardware tends to have arbitrary restrictions, relating to transfer +types, addressing, packet sizes, buffering, and availability. As a rule, +such differences only matter for "endpoint zero" logic that handles +device configuration and management. The API supports limited run-time +detection of capabilities, through naming conventions for endpoints. +Many drivers will be able to at least partially autoconfigure +themselves. In particular, driver init sections will often have endpoint +autoconfiguration logic that scans the hardware's list of endpoints to +find ones matching the driver requirements (relying on those +conventions), to eliminate some of the most common reasons for +conditional compilation. + +Like the Linux-USB host side API, this API exposes the "chunky" nature +of USB messages: I/O requests are in terms of one or more "packets", and +packet boundaries are visible to drivers. Compared to RS-232 serial +protocols, USB resembles synchronous protocols like HDLC (N bytes per +frame, multipoint addressing, host as the primary station and devices as +secondary stations) more than asynchronous ones (tty style: 8 data bits +per frame, no parity, one stop bit). So for example the controller +drivers won't buffer two single byte writes into a single two-byte USB +IN packet, although gadget drivers may do so when they implement +protocols where packet boundaries (and "short packets") are not +significant. + +Driver Life Cycle +----------------- + +Gadget drivers make endpoint I/O requests to hardware without needing to +know many details of the hardware, but driver setup/configuration code +needs to handle some differences. Use the API like this: + +1. Register a driver for the particular device side usb controller + hardware, such as the net2280 on PCI (USB 2.0), sa11x0 or pxa25x as + found in Linux PDAs, and so on. At this point the device is logically + in the USB ch9 initial state (``attached``), drawing no power and not + usable (since it does not yet support enumeration). Any host should + not see the device, since it's not activated the data line pullup + used by the host to detect a device, even if VBUS power is available. + +2. Register a gadget driver that implements some higher level device + function. That will then bind() to a :c:type:`usb_gadget`, which activates + the data line pullup sometime after detecting VBUS. + +3. The hardware driver can now start enumerating. The steps it handles + are to accept USB ``power`` and ``set_address`` requests. Other steps are + handled by the gadget driver. If the gadget driver module is unloaded + before the host starts to enumerate, steps before step 7 are skipped. + +4. The gadget driver's ``setup()`` call returns usb descriptors, based both + on what the bus interface hardware provides and on the functionality + being implemented. That can involve alternate settings or + configurations, unless the hardware prevents such operation. For OTG + devices, each configuration descriptor includes an OTG descriptor. + +5. The gadget driver handles the last step of enumeration, when the USB + host issues a ``set_configuration`` call. It enables all endpoints used + in that configuration, with all interfaces in their default settings. + That involves using a list of the hardware's endpoints, enabling each + endpoint according to its descriptor. It may also involve using + ``usb_gadget_vbus_draw`` to let more power be drawn from VBUS, as + allowed by that configuration. For OTG devices, setting a + configuration may also involve reporting HNP capabilities through a + user interface. + +6. Do real work and perform data transfers, possibly involving changes + to interface settings or switching to new configurations, until the + device is disconnect()ed from the host. Queue any number of transfer + requests to each endpoint. It may be suspended and resumed several + times before being disconnected. On disconnect, the drivers go back + to step 3 (above). + +7. When the gadget driver module is being unloaded, the driver unbind() + callback is issued. That lets the controller driver be unloaded. + +Drivers will normally be arranged so that just loading the gadget driver +module (or statically linking it into a Linux kernel) allows the +peripheral device to be enumerated, but some drivers will defer +enumeration until some higher level component (like a user mode daemon) +enables it. Note that at this lowest level there are no policies about +how ep0 configuration logic is implemented, except that it should obey +USB specifications. Such issues are in the domain of gadget drivers, +including knowing about implementation constraints imposed by some USB +controllers or understanding that composite devices might happen to be +built by integrating reusable components. + +Note that the lifecycle above can be slightly different for OTG devices. +Other than providing an additional OTG descriptor in each configuration, +only the HNP-related differences are particularly visible to driver +code. They involve reporting requirements during the ``SET_CONFIGURATION`` +request, and the option to invoke HNP during some suspend callbacks. +Also, SRP changes the semantics of ``usb_gadget_wakeup`` slightly. + +USB 2.0 Chapter 9 Types and Constants +------------------------------------- + +Gadget drivers rely on common USB structures and constants defined in +the :ref:`linux/usb/ch9.h <usb_chapter9>` header file, which is standard in +Linux 2.6+ kernels. These are the same types and constants used by host side +drivers (and usbcore). + +Core Objects and Methods +------------------------ + +These are declared in ``<linux/usb/gadget.h>``, and are used by gadget +drivers to interact with USB peripheral controller drivers. + +.. kernel-doc:: include/linux/usb/gadget.h + :internal: + +Optional Utilities +------------------ + +The core API is sufficient for writing a USB Gadget Driver, but some +optional utilities are provided to simplify common tasks. These +utilities include endpoint autoconfiguration. + +.. kernel-doc:: drivers/usb/gadget/usbstring.c + :export: + +.. kernel-doc:: drivers/usb/gadget/config.c + :export: + +Composite Device Framework +-------------------------- + +The core API is sufficient for writing drivers for composite USB devices +(with more than one function in a given configuration), and also +multi-configuration devices (also more than one function, but not +necessarily sharing a given configuration). There is however an optional +framework which makes it easier to reuse and combine functions. + +Devices using this framework provide a struct usb_composite_driver, +which in turn provides one or more struct usb_configuration +instances. Each such configuration includes at least one struct +:c:type:`usb_function`, which packages a user visible role such as "network +link" or "mass storage device". Management functions may also exist, +such as "Device Firmware Upgrade". + +.. kernel-doc:: include/linux/usb/composite.h + :internal: + +.. kernel-doc:: drivers/usb/gadget/composite.c + :export: + +Composite Device Functions +-------------------------- + +At this writing, a few of the current gadget drivers have been converted +to this framework. Near-term plans include converting all of them, +except for ``gadgetfs``. + +Peripheral Controller Drivers +============================= + +The first hardware supporting this API was the NetChip 2280 controller, +which supports USB 2.0 high speed and is based on PCI. This is the +``net2280`` driver module. The driver supports Linux kernel versions 2.4 +and 2.6; contact NetChip Technologies for development boards and product +information. + +Other hardware working in the ``gadget`` framework includes: Intel's PXA +25x and IXP42x series processors (``pxa2xx_udc``), Toshiba TC86c001 +"Goku-S" (``goku_udc``), Renesas SH7705/7727 (``sh_udc``), MediaQ 11xx +(``mq11xx_udc``), Hynix HMS30C7202 (``h7202_udc``), National 9303/4 +(``n9604_udc``), Texas Instruments OMAP (``omap_udc``), Sharp LH7A40x +(``lh7a40x_udc``), and more. Most of those are full speed controllers. + +At this writing, there are people at work on drivers in this framework +for several other USB device controllers, with plans to make many of +them be widely available. + +A partial USB simulator, the ``dummy_hcd`` driver, is available. It can +act like a net2280, a pxa25x, or an sa11x0 in terms of available +endpoints and device speeds; and it simulates control, bulk, and to some +extent interrupt transfers. That lets you develop some parts of a gadget +driver on a normal PC, without any special hardware, and perhaps with +the assistance of tools such as GDB running with User Mode Linux. At +least one person has expressed interest in adapting that approach, +hooking it up to a simulator for a microcontroller. Such simulators can +help debug subsystems where the runtime hardware is unfriendly to +software development, or is not yet available. + +Support for other controllers is expected to be developed and +contributed over time, as this driver framework evolves. + +Gadget Drivers +============== + +In addition to *Gadget Zero* (used primarily for testing and development +with drivers for usb controller hardware), other gadget drivers exist. + +There's an ``ethernet`` gadget driver, which implements one of the most +useful *Communications Device Class* (CDC) models. One of the standards +for cable modem interoperability even specifies the use of this ethernet +model as one of two mandatory options. Gadgets using this code look to a +USB host as if they're an Ethernet adapter. It provides access to a +network where the gadget's CPU is one host, which could easily be +bridging, routing, or firewalling access to other networks. Since some +hardware can't fully implement the CDC Ethernet requirements, this +driver also implements a "good parts only" subset of CDC Ethernet. (That +subset doesn't advertise itself as CDC Ethernet, to avoid creating +problems.) + +Support for Microsoft's ``RNDIS`` protocol has been contributed by +Pengutronix and Auerswald GmbH. This is like CDC Ethernet, but it runs +on more slightly USB hardware (but less than the CDC subset). However, +its main claim to fame is being able to connect directly to recent +versions of Windows, using drivers that Microsoft bundles and supports, +making it much simpler to network with Windows. + +There is also support for user mode gadget drivers, using ``gadgetfs``. +This provides a *User Mode API* that presents each endpoint as a single +file descriptor. I/O is done using normal ``read()`` and ``read()`` calls. +Familiar tools like GDB and pthreads can be used to develop and debug +user mode drivers, so that once a robust controller driver is available +many applications for it won't require new kernel mode software. Linux +2.6 *Async I/O (AIO)* support is available, so that user mode software +can stream data with only slightly more overhead than a kernel driver. + +There's a USB Mass Storage class driver, which provides a different +solution for interoperability with systems such as MS-Windows and MacOS. +That *Mass Storage* driver uses a file or block device as backing store +for a drive, like the ``loop`` driver. The USB host uses the BBB, CB, or +CBI versions of the mass storage class specification, using transparent +SCSI commands to access the data from the backing store. + +There's a "serial line" driver, useful for TTY style operation over USB. +The latest version of that driver supports CDC ACM style operation, like +a USB modem, and so on most hardware it can interoperate easily with +MS-Windows. One interesting use of that driver is in boot firmware (like +a BIOS), which can sometimes use that model with very small systems +without real serial lines. + +Support for other kinds of gadget is expected to be developed and +contributed over time, as this driver framework evolves. + +USB On-The-GO (OTG) +=================== + +USB OTG support on Linux 2.6 was initially developed by Texas +Instruments for `OMAP <http://www.omap.com>`__ 16xx and 17xx series +processors. Other OTG systems should work in similar ways, but the +hardware level details could be very different. + +Systems need specialized hardware support to implement OTG, notably +including a special *Mini-AB* jack and associated transceiver to support +*Dual-Role* operation: they can act either as a host, using the standard +Linux-USB host side driver stack, or as a peripheral, using this +``gadget`` framework. To do that, the system software relies on small +additions to those programming interfaces, and on a new internal +component (here called an "OTG Controller") affecting which driver stack +connects to the OTG port. In each role, the system can re-use the +existing pool of hardware-neutral drivers, layered on top of the +controller driver interfaces (:c:type:`usb_bus` or :c:type:`usb_gadget`). +Such drivers need at most minor changes, and most of the calls added to +support OTG can also benefit non-OTG products. + +- Gadget drivers test the ``is_otg`` flag, and use it to determine + whether or not to include an OTG descriptor in each of their + configurations. + +- Gadget drivers may need changes to support the two new OTG protocols, + exposed in new gadget attributes such as ``b_hnp_enable`` flag. HNP + support should be reported through a user interface (two LEDs could + suffice), and is triggered in some cases when the host suspends the + peripheral. SRP support can be user-initiated just like remote + wakeup, probably by pressing the same button. + +- On the host side, USB device drivers need to be taught to trigger HNP + at appropriate moments, using ``usb_suspend_device()``. That also + conserves battery power, which is useful even for non-OTG + configurations. + +- Also on the host side, a driver must support the OTG "Targeted + Peripheral List". That's just a whitelist, used to reject peripherals + not supported with a given Linux OTG host. *This whitelist is + product-specific; each product must modify* ``otg_whitelist.h`` *to + match its interoperability specification.* + + Non-OTG Linux hosts, like PCs and workstations, normally have some + solution for adding drivers, so that peripherals that aren't + recognized can eventually be supported. That approach is unreasonable + for consumer products that may never have their firmware upgraded, + and where it's usually unrealistic to expect traditional + PC/workstation/server kinds of support model to work. For example, + it's often impractical to change device firmware once the product has + been distributed, so driver bugs can't normally be fixed if they're + found after shipment. + +Additional changes are needed below those hardware-neutral :c:type:`usb_bus` +and :c:type:`usb_gadget` driver interfaces; those aren't discussed here in any +detail. Those affect the hardware-specific code for each USB Host or +Peripheral controller, and how the HCD initializes (since OTG can be +active only on a single port). They also involve what may be called an +*OTG Controller Driver*, managing the OTG transceiver and the OTG state +machine logic as well as much of the root hub behavior for the OTG port. +The OTG controller driver needs to activate and deactivate USB +controllers depending on the relevant device role. Some related changes +were needed inside usbcore, so that it can identify OTG-capable devices +and respond appropriately to HNP or SRP protocols. diff --git a/Documentation/driver-api/usb/hotplug.rst b/Documentation/driver-api/usb/hotplug.rst new file mode 100644 index 0000000000..c1e13107c5 --- /dev/null +++ b/Documentation/driver-api/usb/hotplug.rst @@ -0,0 +1,154 @@ +USB hotplugging +~~~~~~~~~~~~~~~ + +Linux Hotplugging +================= + + +In hotpluggable busses like USB (and Cardbus PCI), end-users plug devices +into the bus with power on. In most cases, users expect the devices to become +immediately usable. That means the system must do many things, including: + + - Find a driver that can handle the device. That may involve + loading a kernel module; newer drivers can use module-init-tools + to publish their device (and class) support to user utilities. + + - Bind a driver to that device. Bus frameworks do that using a + device driver's probe() routine. + + - Tell other subsystems to configure the new device. Print + queues may need to be enabled, networks brought up, disk + partitions mounted, and so on. In some cases these will + be driver-specific actions. + +This involves a mix of kernel mode and user mode actions. Making devices +be immediately usable means that any user mode actions can't wait for an +administrator to do them: the kernel must trigger them, either passively +(triggering some monitoring daemon to invoke a helper program) or +actively (calling such a user mode helper program directly). + +Those triggered actions must support a system's administrative policies; +such programs are called "policy agents" here. Typically they involve +shell scripts that dispatch to more familiar administration tools. + +Because some of those actions rely on information about drivers (metadata) +that is currently available only when the drivers are dynamically linked, +you get the best hotplugging when you configure a highly modular system. + +Kernel Hotplug Helper (``/sbin/hotplug``) +========================================= + +There is a kernel parameter: ``/proc/sys/kernel/hotplug``, which normally +holds the pathname ``/sbin/hotplug``. That parameter names a program +which the kernel may invoke at various times. + +The /sbin/hotplug program can be invoked by any subsystem as part of its +reaction to a configuration change, from a thread in that subsystem. +Only one parameter is required: the name of a subsystem being notified of +some kernel event. That name is used as the first key for further event +dispatch; any other argument and environment parameters are specified by +the subsystem making that invocation. + +Hotplug software and other resources is available at: + + http://linux-hotplug.sourceforge.net + +Mailing list information is also available at that site. + + +USB Policy Agent +================ + +The USB subsystem currently invokes ``/sbin/hotplug`` when USB devices +are added or removed from system. The invocation is done by the kernel +hub workqueue [hub_wq], or else as part of root hub initialization +(done by init, modprobe, kapmd, etc). Its single command line parameter +is the string "usb", and it passes these environment variables: + +========== ============================================ +ACTION ``add``, ``remove`` +PRODUCT USB vendor, product, and version codes (hex) +TYPE device class codes (decimal) +INTERFACE interface 0 class codes (decimal) +========== ============================================ + +If "usbdevfs" is configured, DEVICE and DEVFS are also passed. DEVICE is +the pathname of the device, and is useful for devices with multiple and/or +alternate interfaces that complicate driver selection. By design, USB +hotplugging is independent of ``usbdevfs``: you can do most essential parts +of USB device setup without using that filesystem, and without running a +user mode daemon to detect changes in system configuration. + +Currently available policy agent implementations can load drivers for +modules, and can invoke driver-specific setup scripts. The newest ones +leverage USB module-init-tools support. Later agents might unload drivers. + + +USB Modutils Support +==================== + +Current versions of module-init-tools will create a ``modules.usbmap`` file +which contains the entries from each driver's ``MODULE_DEVICE_TABLE``. Such +files can be used by various user mode policy agents to make sure all the +right driver modules get loaded, either at boot time or later. + +See ``linux/usb.h`` for full information about such table entries; or look +at existing drivers. Each table entry describes one or more criteria to +be used when matching a driver to a device or class of devices. The +specific criteria are identified by bits set in "match_flags", paired +with field values. You can construct the criteria directly, or with +macros such as these, and use driver_info to store more information:: + + USB_DEVICE (vendorId, productId) + ... matching devices with specified vendor and product ids + USB_DEVICE_VER (vendorId, productId, lo, hi) + ... like USB_DEVICE with lo <= productversion <= hi + USB_INTERFACE_INFO (class, subclass, protocol) + ... matching specified interface class info + USB_DEVICE_INFO (class, subclass, protocol) + ... matching specified device class info + +A short example, for a driver that supports several specific USB devices +and their quirks, might have a MODULE_DEVICE_TABLE like this:: + + static const struct usb_device_id mydriver_id_table[] = { + { USB_DEVICE (0x9999, 0xaaaa), driver_info: QUIRK_X }, + { USB_DEVICE (0xbbbb, 0x8888), driver_info: QUIRK_Y|QUIRK_Z }, + ... + { } /* end with an all-zeroes entry */ + }; + MODULE_DEVICE_TABLE(usb, mydriver_id_table); + +Most USB device drivers should pass these tables to the USB subsystem as +well as to the module management subsystem. Not all, though: some driver +frameworks connect using interfaces layered over USB, and so they won't +need such a struct usb_driver. + +Drivers that connect directly to the USB subsystem should be declared +something like this:: + + static struct usb_driver mydriver = { + .name = "mydriver", + .id_table = mydriver_id_table, + .probe = my_probe, + .disconnect = my_disconnect, + + /* + if using the usb chardev framework: + .minor = MY_USB_MINOR_START, + .fops = my_file_ops, + if exposing any operations through usbdevfs: + .ioctl = my_ioctl, + */ + }; + +When the USB subsystem knows about a driver's device ID table, it's used when +choosing drivers to probe(). The thread doing new device processing checks +drivers' device ID entries from the ``MODULE_DEVICE_TABLE`` against interface +and device descriptors for the device. It will only call ``probe()`` if there +is a match, and the third argument to ``probe()`` will be the entry that +matched. + +If you don't provide an ``id_table`` for your driver, then your driver may get +probed for each new device; the third parameter to ``probe()`` will be +``NULL``. diff --git a/Documentation/driver-api/usb/index.rst b/Documentation/driver-api/usb/index.rst new file mode 100644 index 0000000000..cfa8797ea6 --- /dev/null +++ b/Documentation/driver-api/usb/index.rst @@ -0,0 +1,30 @@ +============= +Linux USB API +============= + +.. toctree:: + + usb + gadget + anchors + bulk-streams + callbacks + dma + URB + power-management + hotplug + persist + error-codes + writing_usb_driver + dwc3 + writing_musb_glue_layer + typec + typec_bus + usb3-debug-port + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/usb/persist.rst b/Documentation/driver-api/usb/persist.rst new file mode 100644 index 0000000000..08cafc6292 --- /dev/null +++ b/Documentation/driver-api/usb/persist.rst @@ -0,0 +1,171 @@ +.. _usb-persist: + +USB device persistence during system suspend +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Author: Alan Stern <stern@rowland.harvard.edu> +:Date: September 2, 2006 (Updated February 25, 2008) + + +What is the problem? +==================== + +According to the USB specification, when a USB bus is suspended the +bus must continue to supply suspend current (around 1-5 mA). This +is so that devices can maintain their internal state and hubs can +detect connect-change events (devices being plugged in or unplugged). +The technical term is "power session". + +If a USB device's power session is interrupted then the system is +required to behave as though the device has been unplugged. It's a +conservative approach; in the absence of suspend current the computer +has no way to know what has actually happened. Perhaps the same +device is still attached or perhaps it was removed and a different +device plugged into the port. The system must assume the worst. + +By default, Linux behaves according to the spec. If a USB host +controller loses power during a system suspend, then when the system +wakes up all the devices attached to that controller are treated as +though they had disconnected. This is always safe and it is the +"officially correct" thing to do. + +For many sorts of devices this behavior doesn't matter in the least. +If the kernel wants to believe that your USB keyboard was unplugged +while the system was asleep and a new keyboard was plugged in when the +system woke up, who cares? It'll still work the same when you type on +it. + +Unfortunately problems _can_ arise, particularly with mass-storage +devices. The effect is exactly the same as if the device really had +been unplugged while the system was suspended. If you had a mounted +filesystem on the device, you're out of luck -- everything in that +filesystem is now inaccessible. This is especially annoying if your +root filesystem was located on the device, since your system will +instantly crash. + +Loss of power isn't the only mechanism to worry about. Anything that +interrupts a power session will have the same effect. For example, +even though suspend current may have been maintained while the system +was asleep, on many systems during the initial stages of wakeup the +firmware (i.e., the BIOS) resets the motherboard's USB host +controllers. Result: all the power sessions are destroyed and again +it's as though you had unplugged all the USB devices. Yes, it's +entirely the BIOS's fault, but that doesn't do _you_ any good unless +you can convince the BIOS supplier to fix the problem (lots of luck!). + +On many systems the USB host controllers will get reset after a +suspend-to-RAM. On almost all systems, no suspend current is +available during hibernation (also known as swsusp or suspend-to-disk). +You can check the kernel log after resuming to see if either of these +has happened; look for lines saying "root hub lost power or was reset". + +In practice, people are forced to unmount any filesystems on a USB +device before suspending. If the root filesystem is on a USB device, +the system can't be suspended at all. (All right, it _can_ be +suspended -- but it will crash as soon as it wakes up, which isn't +much better.) + + +What is the solution? +===================== + +The kernel includes a feature called USB-persist. It tries to work +around these issues by allowing the core USB device data structures to +persist across a power-session disruption. + +It works like this. If the kernel sees that a USB host controller is +not in the expected state during resume (i.e., if the controller was +reset or otherwise had lost power) then it applies a persistence check +to each of the USB devices below that controller for which the +"persist" attribute is set. It doesn't try to resume the device; that +can't work once the power session is gone. Instead it issues a USB +port reset and then re-enumerates the device. (This is exactly the +same thing that happens whenever a USB device is reset.) If the +re-enumeration shows that the device now attached to that port has the +same descriptors as before, including the Vendor and Product IDs, then +the kernel continues to use the same device structure. In effect, the +kernel treats the device as though it had merely been reset instead of +unplugged. + +The same thing happens if the host controller is in the expected state +but a USB device was unplugged and then replugged, or if a USB device +fails to carry out a normal resume. + +If no device is now attached to the port, or if the descriptors are +different from what the kernel remembers, then the treatment is what +you would expect. The kernel destroys the old device structure and +behaves as though the old device had been unplugged and a new device +plugged in. + +The end result is that the USB device remains available and usable. +Filesystem mounts and memory mappings are unaffected, and the world is +now a good and happy place. + +Note that the "USB-persist" feature will be applied only to those +devices for which it is enabled. You can enable the feature by doing +(as root):: + + echo 1 >/sys/bus/usb/devices/.../power/persist + +where the "..." should be filled in the with the device's ID. Disable +the feature by writing 0 instead of 1. For hubs the feature is +automatically and permanently enabled and the power/persist file +doesn't even exist, so you only have to worry about setting it for +devices where it really matters. + + +Is this the best solution? +========================== + +Perhaps not. Arguably, keeping track of mounted filesystems and +memory mappings across device disconnects should be handled by a +centralized Logical Volume Manager. Such a solution would allow you +to plug in a USB flash device, create a persistent volume associated +with it, unplug the flash device, plug it back in later, and still +have the same persistent volume associated with the device. As such +it would be more far-reaching than USB-persist. + +On the other hand, writing a persistent volume manager would be a big +job and using it would require significant input from the user. This +solution is much quicker and easier -- and it exists now, a giant +point in its favor! + +Furthermore, the USB-persist feature applies to _all_ USB devices, not +just mass-storage devices. It might turn out to be equally useful for +other device types, such as network interfaces. + + +WARNING: USB-persist can be dangerous!! +======================================= + +When recovering an interrupted power session the kernel does its best +to make sure the USB device hasn't been changed; that is, the same +device is still plugged into the port as before. But the checks +aren't guaranteed to be 100% accurate. + +If you replace one USB device with another of the same type (same +manufacturer, same IDs, and so on) there's an excellent chance the +kernel won't detect the change. The serial number string and other +descriptors are compared with the kernel's stored values, but this +might not help since manufacturers frequently omit serial numbers +entirely in their devices. + +Furthermore it's quite possible to leave a USB device exactly the same +while changing its media. If you replace the flash memory card in a +USB card reader while the system is asleep, the kernel will have no +way to know you did it. The kernel will assume that nothing has +happened and will continue to use the partition tables, inodes, and +memory mappings for the old card. + +If the kernel gets fooled in this way, it's almost certain to cause +data corruption and to crash your system. You'll have no one to blame +but yourself. + +For those devices with avoid_reset_quirk attribute being set, persist +maybe fail because they may morph after reset. + +YOU HAVE BEEN WARNED! USE AT YOUR OWN RISK! + +That having been said, most of the time there shouldn't be any trouble +at all. The USB-persist feature can be extremely useful. Make the +most of it. diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst new file mode 100644 index 0000000000..2525c3622c --- /dev/null +++ b/Documentation/driver-api/usb/power-management.rst @@ -0,0 +1,798 @@ +.. _usb-power-management: + +Power Management for USB +~~~~~~~~~~~~~~~~~~~~~~~~ + +:Author: Alan Stern <stern@rowland.harvard.edu> +:Date: Last-updated: February 2014 + +.. + Contents: + --------- + * What is Power Management? + * What is Remote Wakeup? + * When is a USB device idle? + * Forms of dynamic PM + * The user interface for dynamic PM + * Changing the default idle-delay time + * Warnings + * The driver interface for Power Management + * The driver interface for autosuspend and autoresume + * Other parts of the driver interface + * Mutual exclusion + * Interaction between dynamic PM and system PM + * xHCI hardware link PM + * USB Port Power Control + * User Interface for Port Power Control + * Suggested Userspace Port Power Policy + + +What is Power Management? +------------------------- + +Power Management (PM) is the practice of saving energy by suspending +parts of a computer system when they aren't being used. While a +component is ``suspended`` it is in a nonfunctional low-power state; it +might even be turned off completely. A suspended component can be +``resumed`` (returned to a functional full-power state) when the kernel +needs to use it. (There also are forms of PM in which components are +placed in a less functional but still usable state instead of being +suspended; an example would be reducing the CPU's clock rate. This +document will not discuss those other forms.) + +When the parts being suspended include the CPU and most of the rest of +the system, we speak of it as a "system suspend". When a particular +device is turned off while the system as a whole remains running, we +call it a "dynamic suspend" (also known as a "runtime suspend" or +"selective suspend"). This document concentrates mostly on how +dynamic PM is implemented in the USB subsystem, although system PM is +covered to some extent (see ``Documentation/power/*.rst`` for more +information about system PM). + +System PM support is present only if the kernel was built with +``CONFIG_SUSPEND`` or ``CONFIG_HIBERNATION`` enabled. Dynamic PM support + +for USB is present whenever +the kernel was built with ``CONFIG_PM`` enabled. + +[Historically, dynamic PM support for USB was present only if the +kernel had been built with ``CONFIG_USB_SUSPEND`` enabled (which depended on +``CONFIG_PM_RUNTIME``). Starting with the 3.10 kernel release, dynamic PM +support for USB was present whenever the kernel was built with +``CONFIG_PM_RUNTIME`` enabled. The ``CONFIG_USB_SUSPEND`` option had been +eliminated.] + + +What is Remote Wakeup? +---------------------- + +When a device has been suspended, it generally doesn't resume until +the computer tells it to. Likewise, if the entire computer has been +suspended, it generally doesn't resume until the user tells it to, say +by pressing a power button or opening the cover. + +However some devices have the capability of resuming by themselves, or +asking the kernel to resume them, or even telling the entire computer +to resume. This capability goes by several names such as "Wake On +LAN"; we will refer to it generically as "remote wakeup". When a +device is enabled for remote wakeup and it is suspended, it may resume +itself (or send a request to be resumed) in response to some external +event. Examples include a suspended keyboard resuming when a key is +pressed, or a suspended USB hub resuming when a device is plugged in. + + +When is a USB device idle? +-------------------------- + +A device is idle whenever the kernel thinks it's not busy doing +anything important and thus is a candidate for being suspended. The +exact definition depends on the device's driver; drivers are allowed +to declare that a device isn't idle even when there's no actual +communication taking place. (For example, a hub isn't considered idle +unless all the devices plugged into that hub are already suspended.) +In addition, a device isn't considered idle so long as a program keeps +its usbfs file open, whether or not any I/O is going on. + +If a USB device has no driver, its usbfs file isn't open, and it isn't +being accessed through sysfs, then it definitely is idle. + + +Forms of dynamic PM +------------------- + +Dynamic suspends occur when the kernel decides to suspend an idle +device. This is called ``autosuspend`` for short. In general, a device +won't be autosuspended unless it has been idle for some minimum period +of time, the so-called idle-delay time. + +Of course, nothing the kernel does on its own initiative should +prevent the computer or its devices from working properly. If a +device has been autosuspended and a program tries to use it, the +kernel will automatically resume the device (autoresume). For the +same reason, an autosuspended device will usually have remote wakeup +enabled, if the device supports remote wakeup. + +It is worth mentioning that many USB drivers don't support +autosuspend. In fact, at the time of this writing (Linux 2.6.23) the +only drivers which do support it are the hub driver, kaweth, asix, +usblp, usblcd, and usb-skeleton (which doesn't count). If a +non-supporting driver is bound to a device, the device won't be +autosuspended. In effect, the kernel pretends the device is never +idle. + +We can categorize power management events in two broad classes: +external and internal. External events are those triggered by some +agent outside the USB stack: system suspend/resume (triggered by +userspace), manual dynamic resume (also triggered by userspace), and +remote wakeup (triggered by the device). Internal events are those +triggered within the USB stack: autosuspend and autoresume. Note that +all dynamic suspend events are internal; external agents are not +allowed to issue dynamic suspends. + + +The user interface for dynamic PM +--------------------------------- + +The user interface for controlling dynamic PM is located in the ``power/`` +subdirectory of each USB device's sysfs directory, that is, in +``/sys/bus/usb/devices/.../power/`` where "..." is the device's ID. The +relevant attribute files are: wakeup, control, and +``autosuspend_delay_ms``. (There may also be a file named ``level``; this +file was deprecated as of the 2.6.35 kernel and replaced by the +``control`` file. In 2.6.38 the ``autosuspend`` file will be deprecated +and replaced by the ``autosuspend_delay_ms`` file. The only difference +is that the newer file expresses the delay in milliseconds whereas the +older file uses seconds. Confusingly, both files are present in 2.6.37 +but only ``autosuspend`` works.) + + ``power/wakeup`` + + This file is empty if the device does not support + remote wakeup. Otherwise the file contains either the + word ``enabled`` or the word ``disabled``, and you can + write those words to the file. The setting determines + whether or not remote wakeup will be enabled when the + device is next suspended. (If the setting is changed + while the device is suspended, the change won't take + effect until the following suspend.) + + ``power/control`` + + This file contains one of two words: ``on`` or ``auto``. + You can write those words to the file to change the + device's setting. + + - ``on`` means that the device should be resumed and + autosuspend is not allowed. (Of course, system + suspends are still allowed.) + + - ``auto`` is the normal state in which the kernel is + allowed to autosuspend and autoresume the device. + + (In kernels up to 2.6.32, you could also specify + ``suspend``, meaning that the device should remain + suspended and autoresume was not allowed. This + setting is no longer supported.) + + ``power/autosuspend_delay_ms`` + + This file contains an integer value, which is the + number of milliseconds the device should remain idle + before the kernel will autosuspend it (the idle-delay + time). The default is 2000. 0 means to autosuspend + as soon as the device becomes idle, and negative + values mean never to autosuspend. You can write a + number to the file to change the autosuspend + idle-delay time. + +Writing ``-1`` to ``power/autosuspend_delay_ms`` and writing ``on`` to +``power/control`` do essentially the same thing -- they both prevent the +device from being autosuspended. Yes, this is a redundancy in the +API. + +(In 2.6.21 writing ``0`` to ``power/autosuspend`` would prevent the device +from being autosuspended; the behavior was changed in 2.6.22. The +``power/autosuspend`` attribute did not exist prior to 2.6.21, and the +``power/level`` attribute did not exist prior to 2.6.22. ``power/control`` +was added in 2.6.34, and ``power/autosuspend_delay_ms`` was added in +2.6.37 but did not become functional until 2.6.38.) + + +Changing the default idle-delay time +------------------------------------ + +The default autosuspend idle-delay time (in seconds) is controlled by +a module parameter in usbcore. You can specify the value when usbcore +is loaded. For example, to set it to 5 seconds instead of 2 you would +do:: + + modprobe usbcore autosuspend=5 + +Equivalently, you could add to a configuration file in /etc/modprobe.d +a line saying:: + + options usbcore autosuspend=5 + +Some distributions load the usbcore module very early during the boot +process, by means of a program or script running from an initramfs +image. To alter the parameter value you would have to rebuild that +image. + +If usbcore is compiled into the kernel rather than built as a loadable +module, you can add:: + + usbcore.autosuspend=5 + +to the kernel's boot command line. + +Finally, the parameter value can be changed while the system is +running. If you do:: + + echo 5 >/sys/module/usbcore/parameters/autosuspend + +then each new USB device will have its autosuspend idle-delay +initialized to 5. (The idle-delay values for already existing devices +will not be affected.) + +Setting the initial default idle-delay to -1 will prevent any +autosuspend of any USB device. This has the benefit of allowing you +then to enable autosuspend for selected devices. + + +Warnings +-------- + +The USB specification states that all USB devices must support power +management. Nevertheless, the sad fact is that many devices do not +support it very well. You can suspend them all right, but when you +try to resume them they disconnect themselves from the USB bus or +they stop working entirely. This seems to be especially prevalent +among printers and scanners, but plenty of other types of device have +the same deficiency. + +For this reason, by default the kernel disables autosuspend (the +``power/control`` attribute is initialized to ``on``) for all devices other +than hubs. Hubs, at least, appear to be reasonably well-behaved in +this regard. + +(In 2.6.21 and 2.6.22 this wasn't the case. Autosuspend was enabled +by default for almost all USB devices. A number of people experienced +problems as a result.) + +This means that non-hub devices won't be autosuspended unless the user +or a program explicitly enables it. As of this writing there aren't +any widespread programs which will do this; we hope that in the near +future device managers such as HAL will take on this added +responsibility. In the meantime you can always carry out the +necessary operations by hand or add them to a udev script. You can +also change the idle-delay time; 2 seconds is not the best choice for +every device. + +If a driver knows that its device has proper suspend/resume support, +it can enable autosuspend all by itself. For example, the video +driver for a laptop's webcam might do this (in recent kernels they +do), since these devices are rarely used and so should normally be +autosuspended. + +Sometimes it turns out that even when a device does work okay with +autosuspend there are still problems. For example, the usbhid driver, +which manages keyboards and mice, has autosuspend support. Tests with +a number of keyboards show that typing on a suspended keyboard, while +causing the keyboard to do a remote wakeup all right, will nonetheless +frequently result in lost keystrokes. Tests with mice show that some +of them will issue a remote-wakeup request in response to button +presses but not to motion, and some in response to neither. + +The kernel will not prevent you from enabling autosuspend on devices +that can't handle it. It is even possible in theory to damage a +device by suspending it at the wrong time. (Highly unlikely, but +possible.) Take care. + + +The driver interface for Power Management +----------------------------------------- + +The requirements for a USB driver to support external power management +are pretty modest; the driver need only define:: + + .suspend + .resume + .reset_resume + +methods in its :c:type:`usb_driver` structure, and the ``reset_resume`` method +is optional. The methods' jobs are quite simple: + + - The ``suspend`` method is called to warn the driver that the + device is going to be suspended. If the driver returns a + negative error code, the suspend will be aborted. Normally + the driver will return 0, in which case it must cancel all + outstanding URBs (:c:func:`usb_kill_urb`) and not submit any more. + + - The ``resume`` method is called to tell the driver that the + device has been resumed and the driver can return to normal + operation. URBs may once more be submitted. + + - The ``reset_resume`` method is called to tell the driver that + the device has been resumed and it also has been reset. + The driver should redo any necessary device initialization, + since the device has probably lost most or all of its state + (although the interfaces will be in the same altsettings as + before the suspend). + +If the device is disconnected or powered down while it is suspended, +the ``disconnect`` method will be called instead of the ``resume`` or +``reset_resume`` method. This is also quite likely to happen when +waking up from hibernation, as many systems do not maintain suspend +current to the USB host controllers during hibernation. (It's +possible to work around the hibernation-forces-disconnect problem by +using the USB Persist facility.) + +The ``reset_resume`` method is used by the USB Persist facility (see +:ref:`usb-persist`) and it can also be used under certain +circumstances when ``CONFIG_USB_PERSIST`` is not enabled. Currently, if a +device is reset during a resume and the driver does not have a +``reset_resume`` method, the driver won't receive any notification about +the resume. Later kernels will call the driver's ``disconnect`` method; +2.6.23 doesn't do this. + +USB drivers are bound to interfaces, so their ``suspend`` and ``resume`` +methods get called when the interfaces are suspended or resumed. In +principle one might want to suspend some interfaces on a device (i.e., +force the drivers for those interface to stop all activity) without +suspending the other interfaces. The USB core doesn't allow this; all +interfaces are suspended when the device itself is suspended and all +interfaces are resumed when the device is resumed. It isn't possible +to suspend or resume some but not all of a device's interfaces. The +closest you can come is to unbind the interfaces' drivers. + + +The driver interface for autosuspend and autoresume +--------------------------------------------------- + +To support autosuspend and autoresume, a driver should implement all +three of the methods listed above. In addition, a driver indicates +that it supports autosuspend by setting the ``.supports_autosuspend`` flag +in its usb_driver structure. It is then responsible for informing the +USB core whenever one of its interfaces becomes busy or idle. The +driver does so by calling these six functions:: + + int usb_autopm_get_interface(struct usb_interface *intf); + void usb_autopm_put_interface(struct usb_interface *intf); + int usb_autopm_get_interface_async(struct usb_interface *intf); + void usb_autopm_put_interface_async(struct usb_interface *intf); + void usb_autopm_get_interface_no_resume(struct usb_interface *intf); + void usb_autopm_put_interface_no_suspend(struct usb_interface *intf); + +The functions work by maintaining a usage counter in the +usb_interface's embedded device structure. When the counter is > 0 +then the interface is deemed to be busy, and the kernel will not +autosuspend the interface's device. When the usage counter is = 0 +then the interface is considered to be idle, and the kernel may +autosuspend the device. + +Drivers must be careful to balance their overall changes to the usage +counter. Unbalanced "get"s will remain in effect when a driver is +unbound from its interface, preventing the device from going into +runtime suspend should the interface be bound to a driver again. On +the other hand, drivers are allowed to achieve this balance by calling +the ``usb_autopm_*`` functions even after their ``disconnect`` routine +has returned -- say from within a work-queue routine -- provided they +retain an active reference to the interface (via ``usb_get_intf`` and +``usb_put_intf``). + +Drivers using the async routines are responsible for their own +synchronization and mutual exclusion. + + :c:func:`usb_autopm_get_interface` increments the usage counter and + does an autoresume if the device is suspended. If the + autoresume fails, the counter is decremented back. + + :c:func:`usb_autopm_put_interface` decrements the usage counter and + attempts an autosuspend if the new value is = 0. + + :c:func:`usb_autopm_get_interface_async` and + :c:func:`usb_autopm_put_interface_async` do almost the same things as + their non-async counterparts. The big difference is that they + use a workqueue to do the resume or suspend part of their + jobs. As a result they can be called in an atomic context, + such as an URB's completion handler, but when they return the + device will generally not yet be in the desired state. + + :c:func:`usb_autopm_get_interface_no_resume` and + :c:func:`usb_autopm_put_interface_no_suspend` merely increment or + decrement the usage counter; they do not attempt to carry out + an autoresume or an autosuspend. Hence they can be called in + an atomic context. + +The simplest usage pattern is that a driver calls +:c:func:`usb_autopm_get_interface` in its open routine and +:c:func:`usb_autopm_put_interface` in its close or release routine. But other +patterns are possible. + +The autosuspend attempts mentioned above will often fail for one +reason or another. For example, the ``power/control`` attribute might be +set to ``on``, or another interface in the same device might not be +idle. This is perfectly normal. If the reason for failure was that +the device hasn't been idle for long enough, a timer is scheduled to +carry out the operation automatically when the autosuspend idle-delay +has expired. + +Autoresume attempts also can fail, although failure would mean that +the device is no longer present or operating properly. Unlike +autosuspend, there's no idle-delay for an autoresume. + + +Other parts of the driver interface +----------------------------------- + +Drivers can enable autosuspend for their devices by calling:: + + usb_enable_autosuspend(struct usb_device *udev); + +in their :c:func:`probe` routine, if they know that the device is capable of +suspending and resuming correctly. This is exactly equivalent to +writing ``auto`` to the device's ``power/control`` attribute. Likewise, +drivers can disable autosuspend by calling:: + + usb_disable_autosuspend(struct usb_device *udev); + +This is exactly the same as writing ``on`` to the ``power/control`` attribute. + +Sometimes a driver needs to make sure that remote wakeup is enabled +during autosuspend. For example, there's not much point +autosuspending a keyboard if the user can't cause the keyboard to do a +remote wakeup by typing on it. If the driver sets +``intf->needs_remote_wakeup`` to 1, the kernel won't autosuspend the +device if remote wakeup isn't available. (If the device is already +autosuspended, though, setting this flag won't cause the kernel to +autoresume it. Normally a driver would set this flag in its ``probe`` +method, at which time the device is guaranteed not to be +autosuspended.) + +If a driver does its I/O asynchronously in interrupt context, it +should call :c:func:`usb_autopm_get_interface_async` before starting output and +:c:func:`usb_autopm_put_interface_async` when the output queue drains. When +it receives an input event, it should call:: + + usb_mark_last_busy(struct usb_device *udev); + +in the event handler. This tells the PM core that the device was just +busy and therefore the next autosuspend idle-delay expiration should +be pushed back. Many of the usb_autopm_* routines also make this call, +so drivers need to worry only when interrupt-driven input arrives. + +Asynchronous operation is always subject to races. For example, a +driver may call the :c:func:`usb_autopm_get_interface_async` routine at a time +when the core has just finished deciding the device has been idle for +long enough but not yet gotten around to calling the driver's ``suspend`` +method. The ``suspend`` method must be responsible for synchronizing with +the I/O request routine and the URB completion handler; it should +cause autosuspends to fail with -EBUSY if the driver needs to use the +device. + +External suspend calls should never be allowed to fail in this way, +only autosuspend calls. The driver can tell them apart by applying +the :c:func:`PMSG_IS_AUTO` macro to the message argument to the ``suspend`` +method; it will return True for internal PM events (autosuspend) and +False for external PM events. + + +Mutual exclusion +---------------- + +For external events -- but not necessarily for autosuspend or +autoresume -- the device semaphore (udev->dev.sem) will be held when a +``suspend`` or ``resume`` method is called. This implies that external +suspend/resume events are mutually exclusive with calls to ``probe``, +``disconnect``, ``pre_reset``, and ``post_reset``; the USB core guarantees that +this is true of autosuspend/autoresume events as well. + +If a driver wants to block all suspend/resume calls during some +critical section, the best way is to lock the device and call +:c:func:`usb_autopm_get_interface` (and do the reverse at the end of the +critical section). Holding the device semaphore will block all +external PM calls, and the :c:func:`usb_autopm_get_interface` will prevent any +internal PM calls, even if it fails. (Exercise: Why?) + + +Interaction between dynamic PM and system PM +-------------------------------------------- + +Dynamic power management and system power management can interact in +a couple of ways. + +Firstly, a device may already be autosuspended when a system suspend +occurs. Since system suspends are supposed to be as transparent as +possible, the device should remain suspended following the system +resume. But this theory may not work out well in practice; over time +the kernel's behavior in this regard has changed. As of 2.6.37 the +policy is to resume all devices during a system resume and let them +handle their own runtime suspends afterward. + +Secondly, a dynamic power-management event may occur as a system +suspend is underway. The window for this is short, since system +suspends don't take long (a few seconds usually), but it can happen. +For example, a suspended device may send a remote-wakeup signal while +the system is suspending. The remote wakeup may succeed, which would +cause the system suspend to abort. If the remote wakeup doesn't +succeed, it may still remain active and thus cause the system to +resume as soon as the system suspend is complete. Or the remote +wakeup may fail and get lost. Which outcome occurs depends on timing +and on the hardware and firmware design. + + +xHCI hardware link PM +--------------------- + +xHCI host controller provides hardware link power management to usb2.0 +(xHCI 1.0 feature) and usb3.0 devices which support link PM. By +enabling hardware LPM, the host can automatically put the device into +lower power state(L1 for usb2.0 devices, or U1/U2 for usb3.0 devices), +which state device can enter and resume very quickly. + +The user interface for controlling hardware LPM is located in the +``power/`` subdirectory of each USB device's sysfs directory, that is, in +``/sys/bus/usb/devices/.../power/`` where "..." is the device's ID. The +relevant attribute files are ``usb2_hardware_lpm`` and ``usb3_hardware_lpm``. + + ``power/usb2_hardware_lpm`` + + When a USB2 device which support LPM is plugged to a + xHCI host root hub which support software LPM, the + host will run a software LPM test for it; if the device + enters L1 state and resume successfully and the host + supports USB2 hardware LPM, this file will show up and + driver will enable hardware LPM for the device. You + can write y/Y/1 or n/N/0 to the file to enable/disable + USB2 hardware LPM manually. This is for test purpose mainly. + + ``power/usb3_hardware_lpm_u1`` + ``power/usb3_hardware_lpm_u2`` + + When a USB 3.0 lpm-capable device is plugged in to a + xHCI host which supports link PM, it will check if U1 + and U2 exit latencies have been set in the BOS + descriptor; if the check is passed and the host + supports USB3 hardware LPM, USB3 hardware LPM will be + enabled for the device and these files will be created. + The files hold a string value (enable or disable) + indicating whether or not USB3 hardware LPM U1 or U2 + is enabled for the device. + +USB Port Power Control +---------------------- + +In addition to suspending endpoint devices and enabling hardware +controlled link power management, the USB subsystem also has the +capability to disable power to ports under some conditions. Power is +controlled through ``Set/ClearPortFeature(PORT_POWER)`` requests to a hub. +In the case of a root or platform-internal hub the host controller +driver translates ``PORT_POWER`` requests into platform firmware (ACPI) +method calls to set the port power state. For more background see the +Linux Plumbers Conference 2012 slides [#f1]_ and video [#f2]_: + +Upon receiving a ``ClearPortFeature(PORT_POWER)`` request a USB port is +logically off, and may trigger the actual loss of VBUS to the port [#f3]_. +VBUS may be maintained in the case where a hub gangs multiple ports into +a shared power well causing power to remain until all ports in the gang +are turned off. VBUS may also be maintained by hub ports configured for +a charging application. In any event a logically off port will lose +connection with its device, not respond to hotplug events, and not +respond to remote wakeup events. + +.. warning:: + + turning off a port may result in the inability to hot add a device. + Please see "User Interface for Port Power Control" for details. + +As far as the effect on the device itself it is similar to what a device +goes through during system suspend, i.e. the power session is lost. Any +USB device or driver that misbehaves with system suspend will be +similarly affected by a port power cycle event. For this reason the +implementation shares the same device recovery path (and honors the same +quirks) as the system resume path for the hub. + +.. [#f1] + + http://dl.dropbox.com/u/96820575/sarah-sharp-lpt-port-power-off2-mini.pdf + +.. [#f2] + + http://linuxplumbers.ubicast.tv/videos/usb-port-power-off-kerneluserspace-api/ + +.. [#f3] + + USB 3.1 Section 10.12 + + wakeup note: if a device is configured to send wakeup events the port + power control implementation will block poweroff attempts on that + port. + + +User Interface for Port Power Control +------------------------------------- + +The port power control mechanism uses the PM runtime system. Poweroff is +requested by clearing the ``power/pm_qos_no_power_off`` flag of the port device +(defaults to 1). If the port is disconnected it will immediately receive a +``ClearPortFeature(PORT_POWER)`` request. Otherwise, it will honor the pm +runtime rules and require the attached child device and all descendants to be +suspended. This mechanism is dependent on the hub advertising port power +switching in its hub descriptor (wHubCharacteristics logical power switching +mode field). + +Note, some interface devices/drivers do not support autosuspend. Userspace may +need to unbind the interface drivers before the :c:type:`usb_device` will +suspend. An unbound interface device is suspended by default. When unbinding, +be careful to unbind interface drivers, not the driver of the parent usb +device. Also, leave hub interface drivers bound. If the driver for the usb +device (not interface) is unbound the kernel is no longer able to resume the +device. If a hub interface driver is unbound, control of its child ports is +lost and all attached child-devices will disconnect. A good rule of thumb is +that if the 'driver/module' link for a device points to +``/sys/module/usbcore`` then unbinding it will interfere with port power +control. + +Example of the relevant files for port power control. Note, in this example +these files are relative to a usb hub device (prefix):: + + prefix=/sys/devices/pci0000:00/0000:00:14.0/usb3/3-1 + + attached child device + + hub port device + | + hub interface device + | | + v v v + $prefix/3-1:1.0/3-1-port1/device + + $prefix/3-1:1.0/3-1-port1/power/pm_qos_no_power_off + $prefix/3-1:1.0/3-1-port1/device/power/control + $prefix/3-1:1.0/3-1-port1/device/3-1.1:<intf0>/driver/unbind + $prefix/3-1:1.0/3-1-port1/device/3-1.1:<intf1>/driver/unbind + ... + $prefix/3-1:1.0/3-1-port1/device/3-1.1:<intfN>/driver/unbind + +In addition to these files some ports may have a 'peer' link to a port on +another hub. The expectation is that all superspeed ports have a +hi-speed peer:: + + $prefix/3-1:1.0/3-1-port1/peer -> ../../../../usb2/2-1/2-1:1.0/2-1-port1 + ../../../../usb2/2-1/2-1:1.0/2-1-port1/peer -> ../../../../usb3/3-1/3-1:1.0/3-1-port1 + +Distinct from 'companion ports', or 'ehci/xhci shared switchover ports' +peer ports are simply the hi-speed and superspeed interface pins that +are combined into a single usb3 connector. Peer ports share the same +ancestor XHCI device. + +While a superspeed port is powered off a device may downgrade its +connection and attempt to connect to the hi-speed pins. The +implementation takes steps to prevent this: + +1. Port suspend is sequenced to guarantee that hi-speed ports are powered-off + before their superspeed peer is permitted to power-off. The implication is + that the setting ``pm_qos_no_power_off`` to zero on a superspeed port may + not cause the port to power-off until its highspeed peer has gone to its + runtime suspend state. Userspace must take care to order the suspensions + if it wants to guarantee that a superspeed port will power-off. + +2. Port resume is sequenced to force a superspeed port to power-on prior to its + highspeed peer. + +3. Port resume always triggers an attached child device to resume. After a + power session is lost the device may have been removed, or need reset. + Resuming the child device when the parent port regains power resolves those + states and clamps the maximum port power cycle frequency at the rate the + child device can suspend (autosuspend-delay) and resume (reset-resume + latency). + +Sysfs files relevant for port power control: + + ``<hubdev-portX>/power/pm_qos_no_power_off``: + This writable flag controls the state of an idle port. + Once all children and descendants have suspended the + port may suspend/poweroff provided that + pm_qos_no_power_off is '0'. If pm_qos_no_power_off is + '1' the port will remain active/powered regardless of + the stats of descendants. Defaults to 1. + + ``<hubdev-portX>/power/runtime_status``: + This file reflects whether the port is 'active' (power is on) + or 'suspended' (logically off). There is no indication to + userspace whether VBUS is still supplied. + + ``<hubdev-portX>/connect_type``: + An advisory read-only flag to userspace indicating the + location and connection type of the port. It returns + one of four values 'hotplug', 'hardwired', 'not used', + and 'unknown'. All values, besides unknown, are set by + platform firmware. + + ``hotplug`` indicates an externally connectable/visible + port on the platform. Typically userspace would choose + to keep such a port powered to handle new device + connection events. + + ``hardwired`` refers to a port that is not visible but + connectable. Examples are internal ports for USB + bluetooth that can be disconnected via an external + switch or a port with a hardwired USB camera. It is + expected to be safe to allow these ports to suspend + provided pm_qos_no_power_off is coordinated with any + switch that gates connections. Userspace must arrange + for the device to be connected prior to the port + powering off, or to activate the port prior to enabling + connection via a switch. + + ``not used`` refers to an internal port that is expected + to never have a device connected to it. These may be + empty internal ports, or ports that are not physically + exposed on a platform. Considered safe to be + powered-off at all times. + + ``unknown`` means platform firmware does not provide + information for this port. Most commonly refers to + external hub ports which should be considered 'hotplug' + for policy decisions. + + .. note:: + + - since we are relying on the BIOS to get this ACPI + information correct, the USB port descriptions may + be missing or wrong. + + - Take care in clearing ``pm_qos_no_power_off``. Once + power is off this port will + not respond to new connect events. + + Once a child device is attached additional constraints are + applied before the port is allowed to poweroff. + + ``<child>/power/control``: + Must be ``auto``, and the port will not + power down until ``<child>/power/runtime_status`` + reflects the 'suspended' state. Default + value is controlled by child device driver. + + ``<child>/power/persist``: + This defaults to ``1`` for most devices and indicates if + kernel can persist the device's configuration across a + power session loss (suspend / port-power event). When + this value is ``0`` (quirky devices), port poweroff is + disabled. + + ``<child>/driver/unbind``: + Wakeup capable devices will block port poweroff. At + this time the only mechanism to clear the usb-internal + wakeup-capability for an interface device is to unbind + its driver. + +Summary of poweroff pre-requisite settings relative to a port device:: + + echo 0 > power/pm_qos_no_power_off + echo 0 > peer/power/pm_qos_no_power_off # if it exists + echo auto > power/control # this is the default value + echo auto > <child>/power/control + echo 1 > <child>/power/persist # this is the default value + +Suggested Userspace Port Power Policy +------------------------------------- + +As noted above userspace needs to be careful and deliberate about what +ports are enabled for poweroff. + +The default configuration is that all ports start with +``power/pm_qos_no_power_off`` set to ``1`` causing ports to always remain +active. + +Given confidence in the platform firmware's description of the ports +(ACPI _PLD record for a port populates 'connect_type') userspace can +clear pm_qos_no_power_off for all 'not used' ports. The same can be +done for 'hardwired' ports provided poweroff is coordinated with any +connection switch for the port. + +A more aggressive userspace policy is to enable USB port power off for +all ports (set ``<hubdev-portX>/power/pm_qos_no_power_off`` to ``0``) when +some external factor indicates the user has stopped interacting with the +system. For example, a distro may want to enable power off all USB +ports when the screen blanks, and re-power them when the screen becomes +active. Smart phones and tablets may want to power off USB ports when +the user pushes the power button. diff --git a/Documentation/driver-api/usb/typec.rst b/Documentation/driver-api/usb/typec.rst new file mode 100644 index 0000000000..201163d8c1 --- /dev/null +++ b/Documentation/driver-api/usb/typec.rst @@ -0,0 +1,234 @@ +.. _typec: + +USB Type-C connector class +========================== + +Introduction +------------ + +The typec class is meant for describing the USB Type-C ports in a system to the +user space in unified fashion. The class is designed to provide nothing else +except the user space interface implementation in hope that it can be utilized +on as many platforms as possible. + +The platforms are expected to register every USB Type-C port they have with the +class. In a normal case the registration will be done by a USB Type-C or PD PHY +driver, but it may be a driver for firmware interface such as UCSI, driver for +USB PD controller or even driver for Thunderbolt3 controller. This document +considers the component registering the USB Type-C ports with the class as "port +driver". + +On top of showing the capabilities, the class also offer user space control over +the roles and alternate modes of ports, partners and cable plugs when the port +driver is capable of supporting those features. + +The class provides an API for the port drivers described in this document. The +attributes are described in Documentation/ABI/testing/sysfs-class-typec. + +User space interface +-------------------- +Every port will be presented as its own device under /sys/class/typec/. The +first port will be named "port0", the second "port1" and so on. + +When connected, the partner will be presented also as its own device under +/sys/class/typec/. The parent of the partner device will always be the port it +is attached to. The partner attached to port "port0" will be named +"port0-partner". Full path to the device would be +/sys/class/typec/port0/port0-partner/. + +The cable and the two plugs on it may also be optionally presented as their own +devices under /sys/class/typec/. The cable attached to the port "port0" port +will be named port0-cable and the plug on the SOP Prime end (see USB Power +Delivery Specification ch. 2.4) will be named "port0-plug0" and on the SOP +Double Prime end "port0-plug1". The parent of a cable will always be the port, +and the parent of the cable plugs will always be the cable. + +If the port, partner or cable plug supports Alternate Modes, every supported +Alternate Mode SVID will have their own device describing them. Note that the +Alternate Mode devices will not be attached to the typec class. The parent of an +alternate mode will be the device that supports it, so for example an alternate +mode of port0-partner will be presented under /sys/class/typec/port0-partner/. +Every mode that is supported will have its own group under the Alternate Mode +device named "mode<index>", for example /sys/class/typec/port0/<alternate +mode>/mode1/. The requests for entering/exiting a mode can be done with "active" +attribute file in that group. + +Driver API +---------- + +Registering the ports +~~~~~~~~~~~~~~~~~~~~~ + +The port drivers will describe every Type-C port they control with struct +typec_capability data structure, and register them with the following API: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_register_port typec_unregister_port + +When registering the ports, the prefer_role member in struct typec_capability +deserves special notice. If the port that is being registered does not have +initial role preference, which means the port does not execute Try.SNK or +Try.SRC by default, the member must have value TYPEC_NO_PREFERRED_ROLE. +Otherwise if the port executes Try.SNK by default, the member must have value +TYPEC_DEVICE, and with Try.SRC the value must be TYPEC_HOST. + +Registering Partners +~~~~~~~~~~~~~~~~~~~~ + +After successful connection of a partner, the port driver needs to register the +partner with the class. Details about the partner need to be described in struct +typec_partner_desc. The class copies the details of the partner during +registration. The class offers the following API for registering/unregistering +partners. + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_register_partner typec_unregister_partner + +The class will provide a handle to struct typec_partner if the registration was +successful, or NULL. + +If the partner is USB Power Delivery capable, and the port driver is able to +show the result of Discover Identity command, the partner descriptor structure +should include handle to struct usb_pd_identity instance. The class will then +create a sysfs directory for the identity under the partner device. The result +of Discover Identity command can then be reported with the following API: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_partner_set_identity + +Registering Cables +~~~~~~~~~~~~~~~~~~ + +After successful connection of a cable that supports USB Power Delivery +Structured VDM "Discover Identity", the port driver needs to register the cable +and one or two plugs, depending if there is CC Double Prime controller present +in the cable or not. So a cable capable of SOP Prime communication, but not SOP +Double Prime communication, should only have one plug registered. For more +information about SOP communication, please read chapter about it from the +latest USB Power Delivery specification. + +The plugs are represented as their own devices. The cable is registered first, +followed by registration of the cable plugs. The cable will be the parent device +for the plugs. Details about the cable need to be described in struct +typec_cable_desc and about a plug in struct typec_plug_desc. The class copies +the details during registration. The class offers the following API for +registering/unregistering cables and their plugs: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_register_cable typec_unregister_cable typec_register_plug typec_unregister_plug + +The class will provide a handle to struct typec_cable and struct typec_plug if +the registration is successful, or NULL if it isn't. + +If the cable is USB Power Delivery capable, and the port driver is able to show +the result of Discover Identity command, the cable descriptor structure should +include handle to struct usb_pd_identity instance. The class will then create a +sysfs directory for the identity under the cable device. The result of Discover +Identity command can then be reported with the following API: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_cable_set_identity + +Notifications +~~~~~~~~~~~~~ + +When the partner has executed a role change, or when the default roles change +during connection of a partner or cable, the port driver must use the following +APIs to report it to the class: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_set_data_role typec_set_pwr_role typec_set_vconn_role typec_set_pwr_opmode + +Alternate Modes +~~~~~~~~~~~~~~~ + +USB Type-C ports, partners and cable plugs may support Alternate Modes. Each +Alternate Mode will have identifier called SVID, which is either a Standard ID +given by USB-IF or vendor ID, and each supported SVID can have 1 - 6 modes. The +class provides struct typec_mode_desc for describing individual mode of a SVID, +and struct typec_altmode_desc which is a container for all the supported modes. + +Ports that support Alternate Modes need to register each SVID they support with +the following API: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_port_register_altmode + +If a partner or cable plug provides a list of SVIDs as response to USB Power +Delivery Structured VDM Discover SVIDs message, each SVID needs to be +registered. + +API for the partners: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_partner_register_altmode + +API for the Cable Plugs: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_plug_register_altmode + +So ports, partners and cable plugs will register the alternate modes with their +own functions, but the registration will always return a handle to struct +typec_altmode on success, or NULL. The unregistration will happen with the same +function: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_unregister_altmode + +If a partner or cable plug enters or exits a mode, the port driver needs to +notify the class with the following API: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_altmode_update_active + +Multiplexer/DeMultiplexer Switches +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +USB Type-C connectors may have one or more mux/demux switches behind them. Since +the plugs can be inserted right-side-up or upside-down, a switch is needed to +route the correct data pairs from the connector to the USB controllers. If +Alternate or Accessory Modes are supported, another switch is needed that can +route the pins on the connector to some other component besides USB. USB Type-C +Connector Class supplies an API for registering those switches. + +.. kernel-doc:: drivers/usb/typec/mux.c + :functions: typec_switch_register typec_switch_unregister typec_mux_register typec_mux_unregister + +In most cases the same physical mux will handle both the orientation and mode. +However, as the port drivers will be responsible for the orientation, and the +alternate mode drivers for the mode, the two are always separated into their +own logical components: "mux" for the mode and "switch" for the orientation. + +When a port is registered, USB Type-C Connector Class requests both the mux and +the switch for the port. The drivers can then use the following API for +controlling them: + +.. kernel-doc:: drivers/usb/typec/class.c + :functions: typec_set_orientation typec_set_mode + +If the connector is dual-role capable, there may also be a switch for the data +role. USB Type-C Connector Class does not supply separate API for them. The +port drivers can use USB Role Class API with those. + +Illustration of the muxes behind a connector that supports an alternate mode:: + + ------------------------ + | Connector | + ------------------------ + | | + ------------------------ + \ Orientation / + -------------------- + | + -------------------- + / Mode \ + ------------------------ + / \ + ------------------------ -------------------- + | Alt Mode | / USB Role \ + ------------------------ ------------------------ + / \ + ------------------------ ------------------------ + | USB Host | | USB Device | + ------------------------ ------------------------ diff --git a/Documentation/driver-api/usb/typec_bus.rst b/Documentation/driver-api/usb/typec_bus.rst new file mode 100644 index 0000000000..21c890ae17 --- /dev/null +++ b/Documentation/driver-api/usb/typec_bus.rst @@ -0,0 +1,122 @@ + +API for USB Type-C Alternate Mode drivers +========================================= + +Introduction +------------ + +Alternate modes require communication with the partner using Vendor Defined +Messages (VDM) as defined in USB Type-C and USB Power Delivery Specifications. +The communication is SVID (Standard or Vendor ID) specific, i.e. specific for +every alternate mode, so every alternate mode will need a custom driver. + +USB Type-C bus allows binding a driver to the discovered partner alternate +modes by using the SVID and the mode number. + +:ref:`USB Type-C Connector Class <typec>` provides a device for every alternate +mode a port supports, and separate device for every alternate mode the partner +supports. The drivers for the alternate modes are bound to the partner alternate +mode devices, and the port alternate mode devices must be handled by the port +drivers. + +When a new partner alternate mode device is registered, it is linked to the +alternate mode device of the port that the partner is attached to, that has +matching SVID and mode. Communication between the port driver and alternate mode +driver will happen using the same API. + +The port alternate mode devices are used as a proxy between the partner and the +alternate mode drivers, so the port drivers are only expected to pass the SVID +specific commands from the alternate mode drivers to the partner, and from the +partners to the alternate mode drivers. No direct SVID specific communication is +needed from the port drivers, but the port drivers need to provide the operation +callbacks for the port alternate mode devices, just like the alternate mode +drivers need to provide them for the partner alternate mode devices. + +Usage: +------ + +General +~~~~~~~ + +By default, the alternate mode drivers are responsible for entering the mode. +It is also possible to leave the decision about entering the mode to the user +space (See Documentation/ABI/testing/sysfs-class-typec). Port drivers should not +enter any modes on their own. + +``->vdm`` is the most important callback in the operation callbacks vector. It +will be used to deliver all the SVID specific commands from the partner to the +alternate mode driver, and vice versa in case of port drivers. The drivers send +the SVID specific commands to each other using :c:func:`typec_altmode_vdm()`. + +If the communication with the partner using the SVID specific commands results +in need to reconfigure the pins on the connector, the alternate mode driver +needs to notify the bus using :c:func:`typec_altmode_notify()`. The driver +passes the negotiated SVID specific pin configuration value to the function as +parameter. The bus driver will then configure the mux behind the connector using +that value as the state value for the mux. + +NOTE: The SVID specific pin configuration values must always start from +``TYPEC_STATE_MODAL``. USB Type-C specification defines two default states for +the connector: ``TYPEC_STATE_USB`` and ``TYPEC_STATE_SAFE``. These values are +reserved by the bus as the first possible values for the state. When the +alternate mode is entered, the bus will put the connector into +``TYPEC_STATE_SAFE`` before sending Enter or Exit Mode command as defined in USB +Type-C Specification, and also put the connector back to ``TYPEC_STATE_USB`` +after the mode has been exited. + +An example of working definitions for SVID specific pin configurations would +look like this:: + + enum { + ALTMODEX_CONF_A = TYPEC_STATE_MODAL, + ALTMODEX_CONF_B, + ... + }; + +Helper macro ``TYPEC_MODAL_STATE()`` can also be used:: + +#define ALTMODEX_CONF_A = TYPEC_MODAL_STATE(0); +#define ALTMODEX_CONF_B = TYPEC_MODAL_STATE(1); + +Cable plug alternate modes +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The alternate mode drivers are not bound to cable plug alternate mode devices, +only to the partner alternate mode devices. If the alternate mode supports, or +requires, a cable that responds to SOP Prime, and optionally SOP Double Prime +messages, the driver for that alternate mode must request handle to the cable +plug alternate modes using :c:func:`typec_altmode_get_plug()`, and take over +their control. + +Driver API +---------- + +Alternate mode structs +~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/linux/usb/typec_altmode.h + :functions: typec_altmode_driver typec_altmode_ops + +Alternate mode driver registering/unregistering +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/linux/usb/typec_altmode.h + :functions: typec_altmode_register_driver typec_altmode_unregister_driver + +Alternate mode driver operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/usb/typec/bus.c + :functions: typec_altmode_enter typec_altmode_exit typec_altmode_attention typec_altmode_vdm typec_altmode_notify + +API for the port drivers +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/usb/typec/bus.c + :functions: typec_match_altmode + +Cable Plug operations +~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/usb/typec/bus.c + :functions: typec_altmode_get_plug typec_altmode_put_plug diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst new file mode 100644 index 0000000000..fb41768696 --- /dev/null +++ b/Documentation/driver-api/usb/usb.rst @@ -0,0 +1,1056 @@ +.. _usb-hostside-api: + +=========================== +The Linux-USB Host Side API +=========================== + +Introduction to USB on Linux +============================ + +A Universal Serial Bus (USB) is used to connect a host, such as a PC or +workstation, to a number of peripheral devices. USB uses a tree +structure, with the host as the root (the system's master), hubs as +interior nodes, and peripherals as leaves (and slaves). Modern PCs +support several such trees of USB devices, usually +a few USB 3.0 (5 GBit/s) or USB 3.1 (10 GBit/s) and some legacy +USB 2.0 (480 MBit/s) busses just in case. + +That master/slave asymmetry was designed-in for a number of reasons, one +being ease of use. It is not physically possible to mistake upstream and +downstream or it does not matter with a type C plug (or they are built into the +peripheral). Also, the host software doesn't need to deal with +distributed auto-configuration since the pre-designated master node +manages all that. + +Kernel developers added USB support to Linux early in the 2.2 kernel +series and have been developing it further since then. Besides support +for each new generation of USB, various host controllers gained support, +new drivers for peripherals have been added and advanced features for latency +measurement and improved power management introduced. + +Linux can run inside USB devices as well as on the hosts that control +the devices. But USB device drivers running inside those peripherals +don't do the same things as the ones running inside hosts, so they've +been given a different name: *gadget drivers*. This document does not +cover gadget drivers. + +USB Host-Side API Model +======================= + +Host-side drivers for USB devices talk to the "usbcore" APIs. There are +two. One is intended for *general-purpose* drivers (exposed through +driver frameworks), and the other is for drivers that are *part of the +core*. Such core drivers include the *hub* driver (which manages trees +of USB devices) and several different kinds of *host controller +drivers*, which control individual busses. + +The device model seen by USB drivers is relatively complex. + +- USB supports four kinds of data transfers (control, bulk, interrupt, + and isochronous). Two of them (control and bulk) use bandwidth as + it's available, while the other two (interrupt and isochronous) are + scheduled to provide guaranteed bandwidth. + +- The device description model includes one or more "configurations" + per device, only one of which is active at a time. Devices are supposed + to be capable of operating at lower than their top + speeds and may provide a BOS descriptor showing the lowest speed they + remain fully operational at. + +- From USB 3.0 on configurations have one or more "functions", which + provide a common functionality and are grouped together for purposes + of power management. + +- Configurations or functions have one or more "interfaces", each of which may have + "alternate settings". Interfaces may be standardized by USB "Class" + specifications, or may be specific to a vendor or device. + + USB device drivers actually bind to interfaces, not devices. Think of + them as "interface drivers", though you may not see many devices + where the distinction is important. *Most USB devices are simple, + with only one function, one configuration, one interface, and one alternate + setting.* + +- Interfaces have one or more "endpoints", each of which supports one + type and direction of data transfer such as "bulk out" or "interrupt + in". The entire configuration may have up to sixteen endpoints in + each direction, allocated as needed among all the interfaces. + +- Data transfer on USB is packetized; each endpoint has a maximum + packet size. Drivers must often be aware of conventions such as + flagging the end of bulk transfers using "short" (including zero + length) packets. + +- The Linux USB API supports synchronous calls for control and bulk + messages. It also supports asynchronous calls for all kinds of data + transfer, using request structures called "URBs" (USB Request + Blocks). + +Accordingly, the USB Core API exposed to device drivers covers quite a +lot of territory. You'll probably need to consult the USB 3.0 +specification, available online from www.usb.org at no cost, as well as +class or device specifications. + +The only host-side drivers that actually touch hardware (reading/writing +registers, handling IRQs, and so on) are the HCDs. In theory, all HCDs +provide the same functionality through the same API. In practice, that's +becoming more true, but there are still differences +that crop up especially with fault handling on the less common controllers. +Different controllers don't +necessarily report the same aspects of failures, and recovery from +faults (including software-induced ones like unlinking an URB) isn't yet +fully consistent. Device driver authors should make a point of doing +disconnect testing (while the device is active) with each different host +controller driver, to make sure drivers don't have bugs of their own as +well as to make sure they aren't relying on some HCD-specific behavior. + +.. _usb_chapter9: + +USB-Standard Types +================== + +In ``include/uapi/linux/usb/ch9.h`` you will find the USB data types defined +in chapter 9 of the USB specification. These data types are used throughout +USB, and in APIs including this host side API, gadget APIs, usb character +devices and debugfs interfaces. That file is itself included by +``include/linux/usb/ch9.h``, which also contains declarations of a few +utility routines for manipulating these data types; the implementations +are in ``drivers/usb/common/common.c``. + +.. kernel-doc:: drivers/usb/common/common.c + :export: + +In addition, some functions useful for creating debugging output are +defined in ``drivers/usb/common/debug.c``. + +.. _usb_header: + +Host-Side Data Types and Macros +=============================== + +The host side API exposes several layers to drivers, some of which are +more necessary than others. These support lifecycle models for host side +drivers and devices, and support passing buffers through usbcore to some +HCD that performs the I/O for the device driver. + +.. kernel-doc:: include/linux/usb.h + :internal: + +USB Core APIs +============= + +There are two basic I/O models in the USB API. The most elemental one is +asynchronous: drivers submit requests in the form of an URB, and the +URB's completion callback handles the next step. All USB transfer types +support that model, although there are special cases for control URBs +(which always have setup and status stages, but may not have a data +stage) and isochronous URBs (which allow large packets and include +per-packet fault reports). Built on top of that is synchronous API +support, where a driver calls a routine that allocates one or more URBs, +submits them, and waits until they complete. There are synchronous +wrappers for single-buffer control and bulk transfers (which are awkward +to use in some driver disconnect scenarios), and for scatterlist based +streaming i/o (bulk or interrupt). + +USB drivers need to provide buffers that can be used for DMA, although +they don't necessarily need to provide the DMA mapping themselves. There +are APIs to use used when allocating DMA buffers, which can prevent use +of bounce buffers on some systems. In some cases, drivers may be able to +rely on 64bit DMA to eliminate another kind of bounce buffer. + +.. kernel-doc:: drivers/usb/core/urb.c + :export: + +.. kernel-doc:: drivers/usb/core/message.c + :export: + +.. kernel-doc:: drivers/usb/core/file.c + :export: + +.. kernel-doc:: drivers/usb/core/driver.c + :export: + +.. kernel-doc:: drivers/usb/core/usb.c + :export: + +.. kernel-doc:: drivers/usb/core/hub.c + :export: + +Host Controller APIs +==================== + +These APIs are only for use by host controller drivers, most of which +implement standard register interfaces such as XHCI, EHCI, OHCI, or UHCI. UHCI +was one of the first interfaces, designed by Intel and also used by VIA; +it doesn't do much in hardware. OHCI was designed later, to have the +hardware do more work (bigger transfers, tracking protocol state, and so +on). EHCI was designed with USB 2.0; its design has features that +resemble OHCI (hardware does much more work) as well as UHCI (some parts +of ISO support, TD list processing). XHCI was designed with USB 3.0. It +continues to shift support for functionality into hardware. + +There are host controllers other than the "big three", although most PCI +based controllers (and a few non-PCI based ones) use one of those +interfaces. Not all host controllers use DMA; some use PIO, and there is +also a simulator and a virtual host controller to pipe USB over the network. + +The same basic APIs are available to drivers for all those controllers. +For historical reasons they are in two layers: :c:type:`struct +usb_bus <usb_bus>` is a rather thin layer that became available +in the 2.2 kernels, while :c:type:`struct usb_hcd <usb_hcd>` +is a more featureful layer +that lets HCDs share common code, to shrink driver size and +significantly reduce hcd-specific behaviors. + +.. kernel-doc:: drivers/usb/core/hcd.c + :export: + +.. kernel-doc:: drivers/usb/core/hcd-pci.c + :export: + +.. kernel-doc:: drivers/usb/core/buffer.c + :internal: + +The USB character device nodes +============================== + +This chapter presents the Linux character device nodes. You may prefer +to avoid writing new kernel code for your USB driver. User mode device +drivers are usually packaged as applications or libraries, and may use +character devices through some programming library that wraps it. +Such libraries include: + + - `libusb <http://libusb.sourceforge.net>`__ for C/C++, and + - `jUSB <http://jUSB.sourceforge.net>`__ for Java. + +Some old information about it can be seen at the "USB Device Filesystem" +section of the USB Guide. The latest copy of the USB Guide can be found +at http://www.linux-usb.org/ + +.. note:: + + - They were used to be implemented via *usbfs*, but this is not part of + the sysfs debug interface. + + - This particular documentation is incomplete, especially with respect + to the asynchronous mode. As of kernel 2.5.66 the code and this + (new) documentation need to be cross-reviewed. + +What files are in "devtmpfs"? +----------------------------- + +Conventionally mounted at ``/dev/bus/usb/``, usbfs features include: + +- ``/dev/bus/usb/BBB/DDD`` ... magic files exposing the each device's + configuration descriptors, and supporting a series of ioctls for + making device requests, including I/O to devices. (Purely for access + by programs.) + +Each bus is given a number (``BBB``) based on when it was enumerated; within +each bus, each device is given a similar number (``DDD``). Those ``BBB/DDD`` +paths are not "stable" identifiers; expect them to change even if you +always leave the devices plugged in to the same hub port. *Don't even +think of saving these in application configuration files.* Stable +identifiers are available, for user mode applications that want to use +them. HID and networking devices expose these stable IDs, so that for +example you can be sure that you told the right UPS to power down its +second server. Pleast note that it doesn't (yet) expose those IDs. + +/dev/bus/usb/BBB/DDD +-------------------- + +Use these files in one of these basic ways: + +- *They can be read,* producing first the device descriptor (18 bytes) and + then the descriptors for the current configuration. See the USB 2.0 spec + for details about those binary data formats. You'll need to convert most + multibyte values from little endian format to your native host byte + order, although a few of the fields in the device descriptor (both of + the BCD-encoded fields, and the vendor and product IDs) will be + byteswapped for you. Note that configuration descriptors include + descriptors for interfaces, altsettings, endpoints, and maybe additional + class descriptors. + +- *Perform USB operations* using *ioctl()* requests to make endpoint I/O + requests (synchronously or asynchronously) or manage the device. These + requests need the ``CAP_SYS_RAWIO`` capability, as well as filesystem + access permissions. Only one ioctl request can be made on one of these + device files at a time. This means that if you are synchronously reading + an endpoint from one thread, you won't be able to write to a different + endpoint from another thread until the read completes. This works for + *half duplex* protocols, but otherwise you'd use asynchronous i/o + requests. + +Each connected USB device has one file. The ``BBB`` indicates the bus +number. The ``DDD`` indicates the device address on that bus. Both +of these numbers are assigned sequentially, and can be reused, so +you can't rely on them for stable access to devices. For example, +it's relatively common for devices to re-enumerate while they are +still connected (perhaps someone jostled their power supply, hub, +or USB cable), so a device might be ``002/027`` when you first connect +it and ``002/048`` sometime later. + +These files can be read as binary data. The binary data consists +of first the device descriptor, then the descriptors for each +configuration of the device. Multi-byte fields in the device descriptor +are converted to host endianness by the kernel. The configuration +descriptors are in bus endian format! The configuration descriptor +are wTotalLength bytes apart. If a device returns less configuration +descriptor data than indicated by wTotalLength there will be a hole in +the file for the missing bytes. This information is also shown +in text form by the ``/sys/kernel/debug/usb/devices`` file, described later. + +These files may also be used to write user-level drivers for the USB +devices. You would open the ``/dev/bus/usb/BBB/DDD`` file read/write, +read its descriptors to make sure it's the device you expect, and then +bind to an interface (or perhaps several) using an ioctl call. You +would issue more ioctls to the device to communicate to it using +control, bulk, or other kinds of USB transfers. The IOCTLs are +listed in the ``<linux/usbdevice_fs.h>`` file, and at this writing the +source code (``linux/drivers/usb/core/devio.c``) is the primary reference +for how to access devices through those files. + +Note that since by default these ``BBB/DDD`` files are writable only by +root, only root can write such user mode drivers. You can selectively +grant read/write permissions to other users by using ``chmod``. Also, +usbfs mount options such as ``devmode=0666`` may be helpful. + + +Life Cycle of User Mode Drivers +------------------------------- + +Such a driver first needs to find a device file for a device it knows +how to handle. Maybe it was told about it because a ``/sbin/hotplug`` +event handling agent chose that driver to handle the new device. Or +maybe it's an application that scans all the ``/dev/bus/usb`` device files, +and ignores most devices. In either case, it should :c:func:`read()` +all the descriptors from the device file, and check them against what it +knows how to handle. It might just reject everything except a particular +vendor and product ID, or need a more complex policy. + +Never assume there will only be one such device on the system at a time! +If your code can't handle more than one device at a time, at least +detect when there's more than one, and have your users choose which +device to use. + +Once your user mode driver knows what device to use, it interacts with +it in either of two styles. The simple style is to make only control +requests; some devices don't need more complex interactions than those. +(An example might be software using vendor-specific control requests for +some initialization or configuration tasks, with a kernel driver for the +rest.) + +More likely, you need a more complex style driver: one using non-control +endpoints, reading or writing data and claiming exclusive use of an +interface. *Bulk* transfers are easiest to use, but only their sibling +*interrupt* transfers work with low speed devices. Both interrupt and +*isochronous* transfers offer service guarantees because their bandwidth +is reserved. Such "periodic" transfers are awkward to use through usbfs, +unless you're using the asynchronous calls. However, interrupt transfers +can also be used in a synchronous "one shot" style. + +Your user-mode driver should never need to worry about cleaning up +request state when the device is disconnected, although it should close +its open file descriptors as soon as it starts seeing the ENODEV errors. + +The ioctl() Requests +-------------------- + +To use these ioctls, you need to include the following headers in your +userspace program:: + + #include <linux/usb.h> + #include <linux/usbdevice_fs.h> + #include <asm/byteorder.h> + +The standard USB device model requests, from "Chapter 9" of the USB 2.0 +specification, are automatically included from the ``<linux/usb/ch9.h>`` +header. + +Unless noted otherwise, the ioctl requests described here will update +the modification time on the usbfs file to which they are applied +(unless they fail). A return of zero indicates success; otherwise, a +standard USB error code is returned (These are documented in +:ref:`usb-error-codes`). + +Each of these files multiplexes access to several I/O streams, one per +endpoint. Each device has one control endpoint (endpoint zero) which +supports a limited RPC style RPC access. Devices are configured by +hub_wq (in the kernel) setting a device-wide *configuration* that +affects things like power consumption and basic functionality. The +endpoints are part of USB *interfaces*, which may have *altsettings* +affecting things like which endpoints are available. Many devices only +have a single configuration and interface, so drivers for them will +ignore configurations and altsettings. + +Management/Status Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A number of usbfs requests don't deal very directly with device I/O. +They mostly relate to device management and status. These are all +synchronous requests. + +USBDEVFS_CLAIMINTERFACE + This is used to force usbfs to claim a specific interface, which has + not previously been claimed by usbfs or any other kernel driver. The + ioctl parameter is an integer holding the number of the interface + (bInterfaceNumber from descriptor). + + Note that if your driver doesn't claim an interface before trying to + use one of its endpoints, and no other driver has bound to it, then + the interface is automatically claimed by usbfs. + + This claim will be released by a RELEASEINTERFACE ioctl, or by + closing the file descriptor. File modification time is not updated + by this request. + +USBDEVFS_CONNECTINFO + Says whether the device is lowspeed. The ioctl parameter points to a + structure like this:: + + struct usbdevfs_connectinfo { + unsigned int devnum; + unsigned char slow; + }; + + File modification time is not updated by this request. + + *You can't tell whether a "not slow" device is connected at high + speed (480 MBit/sec) or just full speed (12 MBit/sec).* You should + know the devnum value already, it's the DDD value of the device file + name. + +USBDEVFS_GET_SPEED + Returns the speed of the device. The speed is returned as a + nummerical value in accordance with enum usb_device_speed + + File modification time is not updated by this request. + +USBDEVFS_GETDRIVER + Returns the name of the kernel driver bound to a given interface (a + string). Parameter is a pointer to this structure, which is + modified:: + + struct usbdevfs_getdriver { + unsigned int interface; + char driver[USBDEVFS_MAXDRIVERNAME + 1]; + }; + + File modification time is not updated by this request. + +USBDEVFS_IOCTL + Passes a request from userspace through to a kernel driver that has + an ioctl entry in the *struct usb_driver* it registered:: + + struct usbdevfs_ioctl { + int ifno; + int ioctl_code; + void *data; + }; + + /* user mode call looks like this. + * 'request' becomes the driver->ioctl() 'code' parameter. + * the size of 'param' is encoded in 'request', and that data + * is copied to or from the driver->ioctl() 'buf' parameter. + */ + static int + usbdev_ioctl (int fd, int ifno, unsigned request, void *param) + { + struct usbdevfs_ioctl wrapper; + + wrapper.ifno = ifno; + wrapper.ioctl_code = request; + wrapper.data = param; + + return ioctl (fd, USBDEVFS_IOCTL, &wrapper); + } + + File modification time is not updated by this request. + + This request lets kernel drivers talk to user mode code through + filesystem operations even when they don't create a character or + block special device. It's also been used to do things like ask + devices what device special file should be used. Two pre-defined + ioctls are used to disconnect and reconnect kernel drivers, so that + user mode code can completely manage binding and configuration of + devices. + +USBDEVFS_RELEASEINTERFACE + This is used to release the claim usbfs made on interface, either + implicitly or because of a USBDEVFS_CLAIMINTERFACE call, before the + file descriptor is closed. The ioctl parameter is an integer holding + the number of the interface (bInterfaceNumber from descriptor); File + modification time is not updated by this request. + + .. warning:: + + *No security check is made to ensure that the task which made + the claim is the one which is releasing it. This means that user + mode driver may interfere other ones.* + +USBDEVFS_RESETEP + Resets the data toggle value for an endpoint (bulk or interrupt) to + DATA0. The ioctl parameter is an integer endpoint number (1 to 15, + as identified in the endpoint descriptor), with USB_DIR_IN added + if the device's endpoint sends data to the host. + + .. Warning:: + + *Avoid using this request. It should probably be removed.* Using + it typically means the device and driver will lose toggle + synchronization. If you really lost synchronization, you likely + need to completely handshake with the device, using a request + like CLEAR_HALT or SET_INTERFACE. + +USBDEVFS_DROP_PRIVILEGES + This is used to relinquish the ability to do certain operations + which are considered to be privileged on a usbfs file descriptor. + This includes claiming arbitrary interfaces, resetting a device on + which there are currently claimed interfaces from other users, and + issuing USBDEVFS_IOCTL calls. The ioctl parameter is a 32 bit mask + of interfaces the user is allowed to claim on this file descriptor. + You may issue this ioctl more than one time to narrow said mask. + +Synchronous I/O Support +~~~~~~~~~~~~~~~~~~~~~~~ + +Synchronous requests involve the kernel blocking until the user mode +request completes, either by finishing successfully or by reporting an +error. In most cases this is the simplest way to use usbfs, although as +noted above it does prevent performing I/O to more than one endpoint at +a time. + +USBDEVFS_BULK + Issues a bulk read or write request to the device. The ioctl + parameter is a pointer to this structure:: + + struct usbdevfs_bulktransfer { + unsigned int ep; + unsigned int len; + unsigned int timeout; /* in milliseconds */ + void *data; + }; + + The ``ep`` value identifies a bulk endpoint number (1 to 15, as + identified in an endpoint descriptor), masked with USB_DIR_IN when + referring to an endpoint which sends data to the host from the + device. The length of the data buffer is identified by ``len``; Recent + kernels support requests up to about 128KBytes. *FIXME say how read + length is returned, and how short reads are handled.*. + +USBDEVFS_CLEAR_HALT + Clears endpoint halt (stall) and resets the endpoint toggle. This is + only meaningful for bulk or interrupt endpoints. The ioctl parameter + is an integer endpoint number (1 to 15, as identified in an endpoint + descriptor), masked with USB_DIR_IN when referring to an endpoint + which sends data to the host from the device. + + Use this on bulk or interrupt endpoints which have stalled, + returning ``-EPIPE`` status to a data transfer request. Do not issue + the control request directly, since that could invalidate the host's + record of the data toggle. + +USBDEVFS_CONTROL + Issues a control request to the device. The ioctl parameter points + to a structure like this:: + + struct usbdevfs_ctrltransfer { + __u8 bRequestType; + __u8 bRequest; + __u16 wValue; + __u16 wIndex; + __u16 wLength; + __u32 timeout; /* in milliseconds */ + void *data; + }; + + The first eight bytes of this structure are the contents of the + SETUP packet to be sent to the device; see the USB 2.0 specification + for details. The bRequestType value is composed by combining a + ``USB_TYPE_*`` value, a ``USB_DIR_*`` value, and a ``USB_RECIP_*`` + value (from ``linux/usb.h``). If wLength is nonzero, it describes + the length of the data buffer, which is either written to the device + (USB_DIR_OUT) or read from the device (USB_DIR_IN). + + At this writing, you can't transfer more than 4 KBytes of data to or + from a device; usbfs has a limit, and some host controller drivers + have a limit. (That's not usually a problem.) *Also* there's no way + to say it's not OK to get a short read back from the device. + +USBDEVFS_RESET + Does a USB level device reset. The ioctl parameter is ignored. After + the reset, this rebinds all device interfaces. File modification + time is not updated by this request. + +.. warning:: + + *Avoid using this call* until some usbcore bugs get fixed, since + it does not fully synchronize device, interface, and driver (not + just usbfs) state. + +USBDEVFS_SETINTERFACE + Sets the alternate setting for an interface. The ioctl parameter is + a pointer to a structure like this:: + + struct usbdevfs_setinterface { + unsigned int interface; + unsigned int altsetting; + }; + + File modification time is not updated by this request. + + Those struct members are from some interface descriptor applying to + the current configuration. The interface number is the + bInterfaceNumber value, and the altsetting number is the + bAlternateSetting value. (This resets each endpoint in the + interface.) + +USBDEVFS_SETCONFIGURATION + Issues the :c:func:`usb_set_configuration()` call for the + device. The parameter is an integer holding the number of a + configuration (bConfigurationValue from descriptor). File + modification time is not updated by this request. + +.. warning:: + + *Avoid using this call* until some usbcore bugs get fixed, since + it does not fully synchronize device, interface, and driver (not + just usbfs) state. + +Asynchronous I/O Support +~~~~~~~~~~~~~~~~~~~~~~~~ + +As mentioned above, there are situations where it may be important to +initiate concurrent operations from user mode code. This is particularly +important for periodic transfers (interrupt and isochronous), but it can +be used for other kinds of USB requests too. In such cases, the +asynchronous requests described here are essential. Rather than +submitting one request and having the kernel block until it completes, +the blocking is separate. + +These requests are packaged into a structure that resembles the URB used +by kernel device drivers. (No POSIX Async I/O support here, sorry.) It +identifies the endpoint type (``USBDEVFS_URB_TYPE_*``), endpoint +(number, masked with USB_DIR_IN as appropriate), buffer and length, +and a user "context" value serving to uniquely identify each request. +(It's usually a pointer to per-request data.) Flags can modify requests +(not as many as supported for kernel drivers). + +Each request can specify a realtime signal number (between SIGRTMIN and +SIGRTMAX, inclusive) to request a signal be sent when the request +completes. + +When usbfs returns these urbs, the status value is updated, and the +buffer may have been modified. Except for isochronous transfers, the +actual_length is updated to say how many bytes were transferred; if the +USBDEVFS_URB_DISABLE_SPD flag is set ("short packets are not OK"), if +fewer bytes were read than were requested then you get an error report:: + + struct usbdevfs_iso_packet_desc { + unsigned int length; + unsigned int actual_length; + unsigned int status; + }; + + struct usbdevfs_urb { + unsigned char type; + unsigned char endpoint; + int status; + unsigned int flags; + void *buffer; + int buffer_length; + int actual_length; + int start_frame; + int number_of_packets; + int error_count; + unsigned int signr; + void *usercontext; + struct usbdevfs_iso_packet_desc iso_frame_desc[]; + }; + +For these asynchronous requests, the file modification time reflects +when the request was initiated. This contrasts with their use with the +synchronous requests, where it reflects when requests complete. + +USBDEVFS_DISCARDURB + *TBS* File modification time is not updated by this request. + +USBDEVFS_DISCSIGNAL + *TBS* File modification time is not updated by this request. + +USBDEVFS_REAPURB + *TBS* File modification time is not updated by this request. + +USBDEVFS_REAPURBNDELAY + *TBS* File modification time is not updated by this request. + +USBDEVFS_SUBMITURB + *TBS* + +The USB devices +=============== + +The USB devices are now exported via debugfs: + +- ``/sys/kernel/debug/usb/devices`` ... a text file showing each of the USB + devices on known to the kernel, and their configuration descriptors. + You can also poll() this to learn about new devices. + +/sys/kernel/debug/usb/devices +----------------------------- + +This file is handy for status viewing tools in user mode, which can scan +the text format and ignore most of it. More detailed device status +(including class and vendor status) is available from device-specific +files. For information about the current format of this file, see below. + +This file, in combination with the poll() system call, can also be used +to detect when devices are added or removed:: + + int fd; + struct pollfd pfd; + + fd = open("/sys/kernel/debug/usb/devices", O_RDONLY); + pfd = { fd, POLLIN, 0 }; + for (;;) { + /* The first time through, this call will return immediately. */ + poll(&pfd, 1, -1); + + /* To see what's changed, compare the file's previous and current + contents or scan the filesystem. (Scanning is more precise.) */ + } + +Note that this behavior is intended to be used for informational and +debug purposes. It would be more appropriate to use programs such as +udev or HAL to initialize a device or start a user-mode helper program, +for instance. + +In this file, each device's output has multiple lines of ASCII output. + +I made it ASCII instead of binary on purpose, so that someone +can obtain some useful data from it without the use of an +auxiliary program. However, with an auxiliary program, the numbers +in the first 4 columns of each ``T:`` line (topology info: +Lev, Prnt, Port, Cnt) can be used to build a USB topology diagram. + +Each line is tagged with a one-character ID for that line:: + + T = Topology (etc.) + B = Bandwidth (applies only to USB host controllers, which are + virtualized as root hubs) + D = Device descriptor info. + P = Product ID info. (from Device descriptor, but they won't fit + together on one line) + S = String descriptors. + C = Configuration descriptor info. (* = active configuration) + I = Interface descriptor info. + E = Endpoint descriptor info. + +/sys/kernel/debug/usb/devices output format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Legend:: + d = decimal number (may have leading spaces or 0's) + x = hexadecimal number (may have leading spaces or 0's) + s = string + + + +Topology info +^^^^^^^^^^^^^ + +:: + + T: Bus=dd Lev=dd Prnt=dd Port=dd Cnt=dd Dev#=ddd Spd=dddd MxCh=dd + | | | | | | | | |__MaxChildren + | | | | | | | |__Device Speed in Mbps + | | | | | | |__DeviceNumber + | | | | | |__Count of devices at this level + | | | | |__Connector/Port on Parent for this device + | | | |__Parent DeviceNumber + | | |__Level in topology for this bus + | |__Bus number + |__Topology info tag + +Speed may be: + + ======= ====================================================== + 1.5 Mbit/s for low speed USB + 12 Mbit/s for full speed USB + 480 Mbit/s for high speed USB (added for USB 2.0) + 5000 Mbit/s for SuperSpeed USB (added for USB 3.0) + ======= ====================================================== + +For reasons lost in the mists of time, the Port number is always +too low by 1. For example, a device plugged into port 4 will +show up with ``Port=03``. + +Bandwidth info +^^^^^^^^^^^^^^ + +:: + + B: Alloc=ddd/ddd us (xx%), #Int=ddd, #Iso=ddd + | | | |__Number of isochronous requests + | | |__Number of interrupt requests + | |__Total Bandwidth allocated to this bus + |__Bandwidth info tag + +Bandwidth allocation is an approximation of how much of one frame +(millisecond) is in use. It reflects only periodic transfers, which +are the only transfers that reserve bandwidth. Control and bulk +transfers use all other bandwidth, including reserved bandwidth that +is not used for transfers (such as for short packets). + +The percentage is how much of the "reserved" bandwidth is scheduled by +those transfers. For a low or full speed bus (loosely, "USB 1.1"), +90% of the bus bandwidth is reserved. For a high speed bus (loosely, +"USB 2.0") 80% is reserved. + + +Device descriptor info & Product ID info +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + D: Ver=x.xx Cls=xx(s) Sub=xx Prot=xx MxPS=dd #Cfgs=dd + P: Vendor=xxxx ProdID=xxxx Rev=xx.xx + +where:: + + D: Ver=x.xx Cls=xx(sssss) Sub=xx Prot=xx MxPS=dd #Cfgs=dd + | | | | | | |__NumberConfigurations + | | | | | |__MaxPacketSize of Default Endpoint + | | | | |__DeviceProtocol + | | | |__DeviceSubClass + | | |__DeviceClass + | |__Device USB version + |__Device info tag #1 + +where:: + + P: Vendor=xxxx ProdID=xxxx Rev=xx.xx + | | | |__Product revision number + | | |__Product ID code + | |__Vendor ID code + |__Device info tag #2 + + +String descriptor info +^^^^^^^^^^^^^^^^^^^^^^ +:: + + S: Manufacturer=ssss + | |__Manufacturer of this device as read from the device. + | For USB host controller drivers (virtual root hubs) this may + | be omitted, or (for newer drivers) will identify the kernel + | version and the driver which provides this hub emulation. + |__String info tag + + S: Product=ssss + | |__Product description of this device as read from the device. + | For older USB host controller drivers (virtual root hubs) this + | indicates the driver; for newer ones, it's a product (and vendor) + | description that often comes from the kernel's PCI ID database. + |__String info tag + + S: SerialNumber=ssss + | |__Serial Number of this device as read from the device. + | For USB host controller drivers (virtual root hubs) this is + | some unique ID, normally a bus ID (address or slot name) that + | can't be shared with any other device. + |__String info tag + + + +Configuration descriptor info +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + C:* #Ifs=dd Cfg#=dd Atr=xx MPwr=dddmA + | | | | | |__MaxPower in mA + | | | | |__Attributes + | | | |__ConfiguratioNumber + | | |__NumberOfInterfaces + | |__ "*" indicates the active configuration (others are " ") + |__Config info tag + +USB devices may have multiple configurations, each of which act +rather differently. For example, a bus-powered configuration +might be much less capable than one that is self-powered. Only +one device configuration can be active at a time; most devices +have only one configuration. + +Each configuration consists of one or more interfaces. Each +interface serves a distinct "function", which is typically bound +to a different USB device driver. One common example is a USB +speaker with an audio interface for playback, and a HID interface +for use with software volume control. + +Interface descriptor info (can be multiple per Config) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + I:* If#=dd Alt=dd #EPs=dd Cls=xx(sssss) Sub=xx Prot=xx Driver=ssss + | | | | | | | | |__Driver name + | | | | | | | | or "(none)" + | | | | | | | |__InterfaceProtocol + | | | | | | |__InterfaceSubClass + | | | | | |__InterfaceClass + | | | | |__NumberOfEndpoints + | | | |__AlternateSettingNumber + | | |__InterfaceNumber + | |__ "*" indicates the active altsetting (others are " ") + |__Interface info tag + +A given interface may have one or more "alternate" settings. +For example, default settings may not use more than a small +amount of periodic bandwidth. To use significant fractions +of bus bandwidth, drivers must select a non-default altsetting. + +Only one setting for an interface may be active at a time, and +only one driver may bind to an interface at a time. Most devices +have only one alternate setting per interface. + + +Endpoint descriptor info (can be multiple per Interface) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + E: Ad=xx(s) Atr=xx(ssss) MxPS=dddd Ivl=dddss + | | | | |__Interval (max) between transfers + | | | |__EndpointMaxPacketSize + | | |__Attributes(EndpointType) + | |__EndpointAddress(I=In,O=Out) + |__Endpoint info tag + +The interval is nonzero for all periodic (interrupt or isochronous) +endpoints. For high speed endpoints the transfer interval may be +measured in microseconds rather than milliseconds. + +For high speed periodic endpoints, the ``EndpointMaxPacketSize`` reflects +the per-microframe data transfer size. For "high bandwidth" +endpoints, that can reflect two or three packets (for up to +3KBytes every 125 usec) per endpoint. + +With the Linux-USB stack, periodic bandwidth reservations use the +transfer intervals and sizes provided by URBs, which can be less +than those found in endpoint descriptor. + +Usage examples +~~~~~~~~~~~~~~ + +If a user or script is interested only in Topology info, for +example, use something like ``grep ^T: /sys/kernel/debug/usb/devices`` +for only the Topology lines. A command like +``grep -i ^[tdp]: /sys/kernel/debug/usb/devices`` can be used to list +only the lines that begin with the characters in square brackets, +where the valid characters are TDPCIE. With a slightly more able +script, it can display any selected lines (for example, only T, D, +and P lines) and change their output format. (The ``procusb`` +Perl script is the beginning of this idea. It will list only +selected lines [selected from TBDPSCIE] or "All" lines from +``/sys/kernel/debug/usb/devices``.) + +The Topology lines can be used to generate a graphic/pictorial +of the USB devices on a system's root hub. (See more below +on how to do this.) + +The Interface lines can be used to determine what driver is +being used for each device, and which altsetting it activated. + +The Configuration lines could be used to list maximum power +(in milliamps) that a system's USB devices are using. +For example, ``grep ^C: /sys/kernel/debug/usb/devices``. + + +Here's an example, from a system which has a UHCI root hub, +an external hub connected to the root hub, and a mouse and +a serial converter connected to the external hub. + +:: + + T: Bus=00 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#= 1 Spd=12 MxCh= 2 + B: Alloc= 28/900 us ( 3%), #Int= 2, #Iso= 0 + D: Ver= 1.00 Cls=09(hub ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=0000 ProdID=0000 Rev= 0.00 + S: Product=USB UHCI Root Hub + S: SerialNumber=dce0 + C:* #Ifs= 1 Cfg#= 1 Atr=40 MxPwr= 0mA + I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub + E: Ad=81(I) Atr=03(Int.) MxPS= 8 Ivl=255ms + + T: Bus=00 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 4 + D: Ver= 1.00 Cls=09(hub ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=0451 ProdID=1446 Rev= 1.00 + C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub + E: Ad=81(I) Atr=03(Int.) MxPS= 1 Ivl=255ms + + T: Bus=00 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=1.5 MxCh= 0 + D: Ver= 1.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=04b4 ProdID=0001 Rev= 0.00 + C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=mouse + E: Ad=81(I) Atr=03(Int.) MxPS= 3 Ivl= 10ms + + T: Bus=00 Lev=02 Prnt=02 Port=02 Cnt=02 Dev#= 4 Spd=12 MxCh= 0 + D: Ver= 1.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=0565 ProdID=0001 Rev= 1.08 + S: Manufacturer=Peracom Networks, Inc. + S: Product=Peracom USB to Serial Converter + C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 3 Cls=00(>ifc ) Sub=00 Prot=00 Driver=serial + E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl= 16ms + E: Ad=01(O) Atr=02(Bulk) MxPS= 16 Ivl= 16ms + E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl= 8ms + + +Selecting only the ``T:`` and ``I:`` lines from this (for example, by using +``procusb ti``), we have + +:: + + T: Bus=00 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#= 1 Spd=12 MxCh= 2 + T: Bus=00 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 4 + I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub + T: Bus=00 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=1.5 MxCh= 0 + I: If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=mouse + T: Bus=00 Lev=02 Prnt=02 Port=02 Cnt=02 Dev#= 4 Spd=12 MxCh= 0 + I: If#= 0 Alt= 0 #EPs= 3 Cls=00(>ifc ) Sub=00 Prot=00 Driver=serial + + +Physically this looks like (or could be converted to):: + + +------------------+ + | PC/root_hub (12)| Dev# = 1 + +------------------+ (nn) is Mbps. + Level 0 | CN.0 | CN.1 | [CN = connector/port #] + +------------------+ + / + / + +-----------------------+ + Level 1 | Dev#2: 4-port hub (12)| + +-----------------------+ + |CN.0 |CN.1 |CN.2 |CN.3 | + +-----------------------+ + \ \____________________ + \_____ \ + \ \ + +--------------------+ +--------------------+ + Level 2 | Dev# 3: mouse (1.5)| | Dev# 4: serial (12)| + +--------------------+ +--------------------+ + + + +Or, in a more tree-like structure (ports [Connectors] without +connections could be omitted):: + + PC: Dev# 1, root hub, 2 ports, 12 Mbps + |_ CN.0: Dev# 2, hub, 4 ports, 12 Mbps + |_ CN.0: Dev #3, mouse, 1.5 Mbps + |_ CN.1: + |_ CN.2: Dev #4, serial, 12 Mbps + |_ CN.3: + |_ CN.1: diff --git a/Documentation/driver-api/usb/usb3-debug-port.rst b/Documentation/driver-api/usb/usb3-debug-port.rst new file mode 100644 index 0000000000..d4610457b0 --- /dev/null +++ b/Documentation/driver-api/usb/usb3-debug-port.rst @@ -0,0 +1,152 @@ +=============== +USB3 debug port +=============== + +:Author: Lu Baolu <baolu.lu@linux.intel.com> +:Date: March 2017 + +GENERAL +======= + +This is a HOWTO for using the USB3 debug port on x86 systems. + +Before using any kernel debugging functionality based on USB3 +debug port, you need to:: + + 1) check whether any USB3 debug port is available in + your system; + 2) check which port is used for debugging purposes; + 3) have a USB 3.0 super-speed A-to-A debugging cable. + +INTRODUCTION +============ + +The xHCI debug capability (DbC) is an optional but standalone +functionality provided by the xHCI host controller. The xHCI +specification describes DbC in the section 7.6. + +When DbC is initialized and enabled, it will present a debug +device through the debug port (normally the first USB3 +super-speed port). The debug device is fully compliant with +the USB framework and provides the equivalent of a very high +performance full-duplex serial link between the debug target +(the system under debugging) and a debug host. + +EARLY PRINTK +============ + +DbC has been designed to log early printk messages. One use for +this feature is kernel debugging. For example, when your machine +crashes very early before the regular console code is initialized. +Other uses include simpler, lockless logging instead of a full- +blown printk console driver and klogd. + +On the debug target system, you need to customize a debugging +kernel with CONFIG_EARLY_PRINTK_USB_XDBC enabled. And, add below +kernel boot parameter:: + + "earlyprintk=xdbc" + +If there are multiple xHCI controllers in your system, you can +append a host controller index to this kernel parameter. This +index starts from 0. + +Current design doesn't support DbC runtime suspend/resume. As +the result, you'd better disable runtime power management for +USB subsystem by adding below kernel boot parameter:: + + "usbcore.autosuspend=-1" + +Before starting the debug target, you should connect the debug +port to a USB port (root port or port of any external hub) on +the debug host. The cable used to connect these two ports +should be a USB 3.0 super-speed A-to-A debugging cable. + +During early boot of the debug target, DbC will be detected and +initialized. After initialization, the debug host should be able +to enumerate the debug device in debug target. The debug host +will then bind the debug device with the usb_debug driver module +and create the /dev/ttyUSB device. + +If the debug device enumeration goes smoothly, you should be able +to see below kernel messages on the debug host:: + + # tail -f /var/log/kern.log + [ 1815.983374] usb 4-3: new SuperSpeed USB device number 4 using xhci_hcd + [ 1815.999595] usb 4-3: LPM exit latency is zeroed, disabling LPM. + [ 1815.999899] usb 4-3: New USB device found, idVendor=1d6b, idProduct=0004 + [ 1815.999902] usb 4-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 + [ 1815.999903] usb 4-3: Product: Remote GDB + [ 1815.999904] usb 4-3: Manufacturer: Linux + [ 1815.999905] usb 4-3: SerialNumber: 0001 + [ 1816.000240] usb_debug 4-3:1.0: xhci_dbc converter detected + [ 1816.000360] usb 4-3: xhci_dbc converter now attached to ttyUSB0 + +You can use any communication program, for example minicom, to +read and view the messages. Below simple bash scripts can help +you to check the sanity of the setup. + +.. code-block:: sh + + ===== start of bash scripts ============= + #!/bin/bash + + while true ; do + while [ ! -d /sys/class/tty/ttyUSB0 ] ; do + : + done + cat /dev/ttyUSB0 + done + ===== end of bash scripts =============== + +Serial TTY +========== + +The DbC support has been added to the xHCI driver. You can get a +debug device provided by the DbC at runtime. + +In order to use this, you need to make sure your kernel has been +configured to support USB_XHCI_DBGCAP. A sysfs attribute under +the xHCI device node is used to enable or disable DbC. By default, +DbC is disabled:: + + root@target:/sys/bus/pci/devices/0000:00:14.0# cat dbc + disabled + +Enable DbC with the following command:: + + root@target:/sys/bus/pci/devices/0000:00:14.0# echo enable > dbc + +You can check the DbC state at anytime:: + + root@target:/sys/bus/pci/devices/0000:00:14.0# cat dbc + enabled + +Connect the debug target to the debug host with a USB 3.0 super- +speed A-to-A debugging cable. You can see /dev/ttyDBC0 created +on the debug target. You will see below kernel message lines:: + + root@target: tail -f /var/log/kern.log + [ 182.730103] xhci_hcd 0000:00:14.0: DbC connected + [ 191.169420] xhci_hcd 0000:00:14.0: DbC configured + [ 191.169597] xhci_hcd 0000:00:14.0: DbC now attached to /dev/ttyDBC0 + +Accordingly, the DbC state has been brought up to:: + + root@target:/sys/bus/pci/devices/0000:00:14.0# cat dbc + configured + +On the debug host, you will see the debug device has been enumerated. +You will see below kernel message lines:: + + root@host: tail -f /var/log/kern.log + [ 79.454780] usb 2-2.1: new SuperSpeed USB device number 3 using xhci_hcd + [ 79.475003] usb 2-2.1: LPM exit latency is zeroed, disabling LPM. + [ 79.475389] usb 2-2.1: New USB device found, idVendor=1d6b, idProduct=0010 + [ 79.475390] usb 2-2.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 + [ 79.475391] usb 2-2.1: Product: Linux USB Debug Target + [ 79.475392] usb 2-2.1: Manufacturer: Linux Foundation + [ 79.475393] usb 2-2.1: SerialNumber: 0001 + +The debug device works now. You can use any communication or debugging +program to talk between the host and the target. diff --git a/Documentation/driver-api/usb/writing_musb_glue_layer.rst b/Documentation/driver-api/usb/writing_musb_glue_layer.rst new file mode 100644 index 0000000000..10416cc11c --- /dev/null +++ b/Documentation/driver-api/usb/writing_musb_glue_layer.rst @@ -0,0 +1,720 @@ +========================= +Writing a MUSB Glue Layer +========================= + +:Author: Apelete Seketeli + +Introduction +============ + +The Linux MUSB subsystem is part of the larger Linux USB subsystem. It +provides support for embedded USB Device Controllers (UDC) that do not +use Universal Host Controller Interface (UHCI) or Open Host Controller +Interface (OHCI). + +Instead, these embedded UDC rely on the USB On-the-Go (OTG) +specification which they implement at least partially. The silicon +reference design used in most cases is the Multipoint USB Highspeed +Dual-Role Controller (MUSB HDRC) found in the Mentor Graphics Inventra™ +design. + +As a self-taught exercise I have written an MUSB glue layer for the +Ingenic JZ4740 SoC, modelled after the many MUSB glue layers in the +kernel source tree. This layer can be found at +``drivers/usb/musb/jz4740.c``. In this documentation I will walk through the +basics of the ``jz4740.c`` glue layer, explaining the different pieces and +what needs to be done in order to write your own device glue layer. + +.. _musb-basics: + +Linux MUSB Basics +================= + +To get started on the topic, please read USB On-the-Go Basics (see +Resources) which provides an introduction of USB OTG operation at the +hardware level. A couple of wiki pages by Texas Instruments and Analog +Devices also provide an overview of the Linux kernel MUSB configuration, +albeit focused on some specific devices provided by these companies. +Finally, getting acquainted with the USB specification at USB home page +may come in handy, with practical instance provided through the Writing +USB Device Drivers documentation (again, see Resources). + +Linux USB stack is a layered architecture in which the MUSB controller +hardware sits at the lowest. The MUSB controller driver abstract the +MUSB controller hardware to the Linux USB stack:: + + ------------------------ + | | <------- drivers/usb/gadget + | Linux USB Core Stack | <------- drivers/usb/host + | | <------- drivers/usb/core + ------------------------ + ⬍ + -------------------------- + | | <------ drivers/usb/musb/musb_gadget.c + | MUSB Controller driver | <------ drivers/usb/musb/musb_host.c + | | <------ drivers/usb/musb/musb_core.c + -------------------------- + ⬍ + --------------------------------- + | MUSB Platform Specific Driver | + | | <-- drivers/usb/musb/jz4740.c + | aka "Glue Layer" | + --------------------------------- + ⬍ + --------------------------------- + | MUSB Controller Hardware | + --------------------------------- + +As outlined above, the glue layer is actually the platform specific code +sitting in between the controller driver and the controller hardware. + +Just like a Linux USB driver needs to register itself with the Linux USB +subsystem, the MUSB glue layer needs first to register itself with the +MUSB controller driver. This will allow the controller driver to know +about which device the glue layer supports and which functions to call +when a supported device is detected or released; remember we are talking +about an embedded controller chip here, so no insertion or removal at +run-time. + +All of this information is passed to the MUSB controller driver through +a :c:type:`platform_driver` structure defined in the glue layer as:: + + static struct platform_driver jz4740_driver = { + .probe = jz4740_probe, + .remove = jz4740_remove, + .driver = { + .name = "musb-jz4740", + }, + }; + +The probe and remove function pointers are called when a matching device +is detected and, respectively, released. The name string describes the +device supported by this glue layer. In the current case it matches a +platform_device structure declared in ``arch/mips/jz4740/platform.c``. Note +that we are not using device tree bindings here. + +In order to register itself to the controller driver, the glue layer +goes through a few steps, basically allocating the controller hardware +resources and initialising a couple of circuits. To do so, it needs to +keep track of the information used throughout these steps. This is done +by defining a private ``jz4740_glue`` structure:: + + struct jz4740_glue { + struct device *dev; + struct platform_device *musb; + struct clk *clk; + }; + + +The dev and musb members are both device structure variables. The first +one holds generic information about the device, since it's the basic +device structure, and the latter holds information more closely related +to the subsystem the device is registered to. The clk variable keeps +information related to the device clock operation. + +Let's go through the steps of the probe function that leads the glue +layer to register itself to the controller driver. + +.. note:: + + For the sake of readability each function will be split in logical + parts, each part being shown as if it was independent from the others. + +.. code-block:: c + :emphasize-lines: 8,12,18 + + static int jz4740_probe(struct platform_device *pdev) + { + struct platform_device *musb; + struct jz4740_glue *glue; + struct clk *clk; + int ret; + + glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL); + if (!glue) + return -ENOMEM; + + musb = platform_device_alloc("musb-hdrc", PLATFORM_DEVID_AUTO); + if (!musb) { + dev_err(&pdev->dev, "failed to allocate musb device\n"); + return -ENOMEM; + } + + clk = devm_clk_get(&pdev->dev, "udc"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + ret = PTR_ERR(clk); + goto err_platform_device_put; + } + + ret = clk_prepare_enable(clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable clock\n"); + goto err_platform_device_put; + } + + musb->dev.parent = &pdev->dev; + + glue->dev = &pdev->dev; + glue->musb = musb; + glue->clk = clk; + + return 0; + + err_platform_device_put: + platform_device_put(musb); + return ret; + } + +The first few lines of the probe function allocate and assign the glue, +musb and clk variables. The ``GFP_KERNEL`` flag (line 8) allows the +allocation process to sleep and wait for memory, thus being usable in a +locking situation. The ``PLATFORM_DEVID_AUTO`` flag (line 12) allows +automatic allocation and management of device IDs in order to avoid +device namespace collisions with explicit IDs. With :c:func:`devm_clk_get` +(line 18) the glue layer allocates the clock -- the ``devm_`` prefix +indicates that :c:func:`clk_get` is managed: it automatically frees the +allocated clock resource data when the device is released -- and enable +it. + + + +Then comes the registration steps: + +.. code-block:: c + :emphasize-lines: 3,5,7,9,16 + + static int jz4740_probe(struct platform_device *pdev) + { + struct musb_hdrc_platform_data *pdata = &jz4740_musb_platform_data; + + pdata->platform_ops = &jz4740_musb_ops; + + platform_set_drvdata(pdev, glue); + + ret = platform_device_add_resources(musb, pdev->resource, + pdev->num_resources); + if (ret) { + dev_err(&pdev->dev, "failed to add resources\n"); + goto err_clk_disable; + } + + ret = platform_device_add_data(musb, pdata, sizeof(*pdata)); + if (ret) { + dev_err(&pdev->dev, "failed to add platform_data\n"); + goto err_clk_disable; + } + + return 0; + + err_clk_disable: + clk_disable_unprepare(clk); + err_platform_device_put: + platform_device_put(musb); + return ret; + } + +The first step is to pass the device data privately held by the glue +layer on to the controller driver through :c:func:`platform_set_drvdata` +(line 7). Next is passing on the device resources information, also privately +held at that point, through :c:func:`platform_device_add_resources` (line 9). + +Finally comes passing on the platform specific data to the controller +driver (line 16). Platform data will be discussed in +:ref:`musb-dev-platform-data`, but here we are looking at the +``platform_ops`` function pointer (line 5) in ``musb_hdrc_platform_data`` +structure (line 3). This function pointer allows the MUSB controller +driver to know which function to call for device operation:: + + static const struct musb_platform_ops jz4740_musb_ops = { + .init = jz4740_musb_init, + .exit = jz4740_musb_exit, + }; + +Here we have the minimal case where only init and exit functions are +called by the controller driver when needed. Fact is the JZ4740 MUSB +controller is a basic controller, lacking some features found in other +controllers, otherwise we may also have pointers to a few other +functions like a power management function or a function to switch +between OTG and non-OTG modes, for instance. + +At that point of the registration process, the controller driver +actually calls the init function: + + .. code-block:: c + :emphasize-lines: 12,14 + + static int jz4740_musb_init(struct musb *musb) + { + musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); + if (!musb->xceiv) { + pr_err("HS UDC: no transceiver configured\n"); + return -ENODEV; + } + + /* Silicon does not implement ConfigData register. + * Set dyn_fifo to avoid reading EP config from hardware. + */ + musb->dyn_fifo = true; + + musb->isr = jz4740_musb_interrupt; + + return 0; + } + +The goal of ``jz4740_musb_init()`` is to get hold of the transceiver +driver data of the MUSB controller hardware and pass it on to the MUSB +controller driver, as usual. The transceiver is the circuitry inside the +controller hardware responsible for sending/receiving the USB data. +Since it is an implementation of the physical layer of the OSI model, +the transceiver is also referred to as PHY. + +Getting hold of the ``MUSB PHY`` driver data is done with ``usb_get_phy()`` +which returns a pointer to the structure containing the driver instance +data. The next couple of instructions (line 12 and 14) are used as a +quirk and to setup IRQ handling respectively. Quirks and IRQ handling +will be discussed later in :ref:`musb-dev-quirks` and +:ref:`musb-handling-irqs`\ :: + + static int jz4740_musb_exit(struct musb *musb) + { + usb_put_phy(musb->xceiv); + + return 0; + } + +Acting as the counterpart of init, the exit function releases the MUSB +PHY driver when the controller hardware itself is about to be released. + +Again, note that init and exit are fairly simple in this case due to the +basic set of features of the JZ4740 controller hardware. When writing an +musb glue layer for a more complex controller hardware, you might need +to take care of more processing in those two functions. + +Returning from the init function, the MUSB controller driver jumps back +into the probe function:: + + static int jz4740_probe(struct platform_device *pdev) + { + ret = platform_device_add(musb); + if (ret) { + dev_err(&pdev->dev, "failed to register musb device\n"); + goto err_clk_disable; + } + + return 0; + + err_clk_disable: + clk_disable_unprepare(clk); + err_platform_device_put: + platform_device_put(musb); + return ret; + } + +This is the last part of the device registration process where the glue +layer adds the controller hardware device to Linux kernel device +hierarchy: at this stage, all known information about the device is +passed on to the Linux USB core stack: + + .. code-block:: c + :emphasize-lines: 5,6 + + static int jz4740_remove(struct platform_device *pdev) + { + struct jz4740_glue *glue = platform_get_drvdata(pdev); + + platform_device_unregister(glue->musb); + clk_disable_unprepare(glue->clk); + + return 0; + } + +Acting as the counterpart of probe, the remove function unregister the +MUSB controller hardware (line 5) and disable the clock (line 6), +allowing it to be gated. + +.. _musb-handling-irqs: + +Handling IRQs +============= + +Additionally to the MUSB controller hardware basic setup and +registration, the glue layer is also responsible for handling the IRQs: + + .. code-block:: c + :emphasize-lines: 7,9-11,14,24 + + static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci) + { + unsigned long flags; + irqreturn_t retval = IRQ_NONE; + struct musb *musb = __hci; + + spin_lock_irqsave(&musb->lock, flags); + + musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB); + musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX); + musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX); + + /* + * The controller is gadget only, the state of the host mode IRQ bits is + * undefined. Mask them to make sure that the musb driver core will + * never see them set + */ + musb->int_usb &= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME | + MUSB_INTR_RESET | MUSB_INTR_SOF; + + if (musb->int_usb || musb->int_tx || musb->int_rx) + retval = musb_interrupt(musb); + + spin_unlock_irqrestore(&musb->lock, flags); + + return retval; + } + +Here the glue layer mostly has to read the relevant hardware registers +and pass their values on to the controller driver which will handle the +actual event that triggered the IRQ. + +The interrupt handler critical section is protected by the +:c:func:`spin_lock_irqsave` and counterpart :c:func:`spin_unlock_irqrestore` +functions (line 7 and 24 respectively), which prevent the interrupt +handler code to be run by two different threads at the same time. + +Then the relevant interrupt registers are read (line 9 to 11): + +- ``MUSB_INTRUSB``: indicates which USB interrupts are currently active, + +- ``MUSB_INTRTX``: indicates which of the interrupts for TX endpoints are + currently active, + +- ``MUSB_INTRRX``: indicates which of the interrupts for TX endpoints are + currently active. + +Note that :c:func:`musb_readb` is used to read 8-bit registers at most, while +:c:func:`musb_readw` allows us to read at most 16-bit registers. There are +other functions that can be used depending on the size of your device +registers. See ``musb_io.h`` for more information. + +Instruction on line 18 is another quirk specific to the JZ4740 USB +device controller, which will be discussed later in :ref:`musb-dev-quirks`. + +The glue layer still needs to register the IRQ handler though. Remember +the instruction on line 14 of the init function:: + + static int jz4740_musb_init(struct musb *musb) + { + musb->isr = jz4740_musb_interrupt; + + return 0; + } + +This instruction sets a pointer to the glue layer IRQ handler function, +in order for the controller hardware to call the handler back when an +IRQ comes from the controller hardware. The interrupt handler is now +implemented and registered. + +.. _musb-dev-platform-data: + +Device Platform Data +==================== + +In order to write an MUSB glue layer, you need to have some data +describing the hardware capabilities of your controller hardware, which +is called the platform data. + +Platform data is specific to your hardware, though it may cover a broad +range of devices, and is generally found somewhere in the ``arch/`` +directory, depending on your device architecture. + +For instance, platform data for the JZ4740 SoC is found in +``arch/mips/jz4740/platform.c``. In the ``platform.c`` file each device of the +JZ4740 SoC is described through a set of structures. + +Here is the part of ``arch/mips/jz4740/platform.c`` that covers the USB +Device Controller (UDC): + + .. code-block:: c + :emphasize-lines: 2,7,14-17,21,22,25,26,28,29 + + /* USB Device Controller */ + struct platform_device jz4740_udc_xceiv_device = { + .name = "usb_phy_gen_xceiv", + .id = 0, + }; + + static struct resource jz4740_udc_resources[] = { + [0] = { + .start = JZ4740_UDC_BASE_ADDR, + .end = JZ4740_UDC_BASE_ADDR + 0x10000 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = JZ4740_IRQ_UDC, + .end = JZ4740_IRQ_UDC, + .flags = IORESOURCE_IRQ, + .name = "mc", + }, + }; + + struct platform_device jz4740_udc_device = { + .name = "musb-jz4740", + .id = -1, + .dev = { + .dma_mask = &jz4740_udc_device.dev.coherent_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, + .num_resources = ARRAY_SIZE(jz4740_udc_resources), + .resource = jz4740_udc_resources, + }; + +The ``jz4740_udc_xceiv_device`` platform device structure (line 2) +describes the UDC transceiver with a name and id number. + +At the time of this writing, note that ``usb_phy_gen_xceiv`` is the +specific name to be used for all transceivers that are either built-in +with reference USB IP or autonomous and doesn't require any PHY +programming. You will need to set ``CONFIG_NOP_USB_XCEIV=y`` in the +kernel configuration to make use of the corresponding transceiver +driver. The id field could be set to -1 (equivalent to +``PLATFORM_DEVID_NONE``), -2 (equivalent to ``PLATFORM_DEVID_AUTO``) or +start with 0 for the first device of this kind if we want a specific id +number. + +The ``jz4740_udc_resources`` resource structure (line 7) defines the UDC +registers base addresses. + +The first array (line 9 to 11) defines the UDC registers base memory +addresses: start points to the first register memory address, end points +to the last register memory address and the flags member defines the +type of resource we are dealing with. So ``IORESOURCE_MEM`` is used to +define the registers memory addresses. The second array (line 14 to 17) +defines the UDC IRQ registers addresses. Since there is only one IRQ +register available for the JZ4740 UDC, start and end point at the same +address. The ``IORESOURCE_IRQ`` flag tells that we are dealing with IRQ +resources, and the name ``mc`` is in fact hard-coded in the MUSB core in +order for the controller driver to retrieve this IRQ resource by +querying it by its name. + +Finally, the ``jz4740_udc_device`` platform device structure (line 21) +describes the UDC itself. + +The ``musb-jz4740`` name (line 22) defines the MUSB driver that is used +for this device; remember this is in fact the name that we used in the +``jz4740_driver`` platform driver structure in :ref:`musb-basics`. +The id field (line 23) is set to -1 (equivalent to ``PLATFORM_DEVID_NONE``) +since we do not need an id for the device: the MUSB controller driver was +already set to allocate an automatic id in :ref:`musb-basics`. In the dev field +we care for DMA related information here. The ``dma_mask`` field (line 25) +defines the width of the DMA mask that is going to be used, and +``coherent_dma_mask`` (line 26) has the same purpose but for the +``alloc_coherent`` DMA mappings: in both cases we are using a 32 bits mask. +Then the resource field (line 29) is simply a pointer to the resource +structure defined before, while the ``num_resources`` field (line 28) keeps +track of the number of arrays defined in the resource structure (in this +case there were two resource arrays defined before). + +With this quick overview of the UDC platform data at the ``arch/`` level now +done, let's get back to the MUSB glue layer specific platform data in +``drivers/usb/musb/jz4740.c``: + + .. code-block:: c + :emphasize-lines: 3,5,7-9,11 + + static struct musb_hdrc_config jz4740_musb_config = { + /* Silicon does not implement USB OTG. */ + .multipoint = 0, + /* Max EPs scanned, driver will decide which EP can be used. */ + .num_eps = 4, + /* RAMbits needed to configure EPs from table */ + .ram_bits = 9, + .fifo_cfg = jz4740_musb_fifo_cfg, + .fifo_cfg_size = ARRAY_SIZE(jz4740_musb_fifo_cfg), + }; + + static struct musb_hdrc_platform_data jz4740_musb_platform_data = { + .mode = MUSB_PERIPHERAL, + .config = &jz4740_musb_config, + }; + +First the glue layer configures some aspects of the controller driver +operation related to the controller hardware specifics. This is done +through the ``jz4740_musb_config`` :c:type:`musb_hdrc_config` structure. + +Defining the OTG capability of the controller hardware, the multipoint +member (line 3) is set to 0 (equivalent to false) since the JZ4740 UDC +is not OTG compatible. Then ``num_eps`` (line 5) defines the number of USB +endpoints of the controller hardware, including endpoint 0: here we have +3 endpoints + endpoint 0. Next is ``ram_bits`` (line 7) which is the width +of the RAM address bus for the MUSB controller hardware. This +information is needed when the controller driver cannot automatically +configure endpoints by reading the relevant controller hardware +registers. This issue will be discussed when we get to device quirks in +:ref:`musb-dev-quirks`. Last two fields (line 8 and 9) are also +about device quirks: ``fifo_cfg`` points to the USB endpoints configuration +table and ``fifo_cfg_size`` keeps track of the size of the number of +entries in that configuration table. More on that later in +:ref:`musb-dev-quirks`. + +Then this configuration is embedded inside ``jz4740_musb_platform_data`` +:c:type:`musb_hdrc_platform_data` structure (line 11): config is a pointer to +the configuration structure itself, and mode tells the controller driver +if the controller hardware may be used as ``MUSB_HOST`` only, +``MUSB_PERIPHERAL`` only or ``MUSB_OTG`` which is a dual mode. + +Remember that ``jz4740_musb_platform_data`` is then used to convey +platform data information as we have seen in the probe function in +:ref:`musb-basics`. + +.. _musb-dev-quirks: + +Device Quirks +============= + +Completing the platform data specific to your device, you may also need +to write some code in the glue layer to work around some device specific +limitations. These quirks may be due to some hardware bugs, or simply be +the result of an incomplete implementation of the USB On-the-Go +specification. + +The JZ4740 UDC exhibits such quirks, some of which we will discuss here +for the sake of insight even though these might not be found in the +controller hardware you are working on. + +Let's get back to the init function first: + + .. code-block:: c + :emphasize-lines: 12 + + static int jz4740_musb_init(struct musb *musb) + { + musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); + if (!musb->xceiv) { + pr_err("HS UDC: no transceiver configured\n"); + return -ENODEV; + } + + /* Silicon does not implement ConfigData register. + * Set dyn_fifo to avoid reading EP config from hardware. + */ + musb->dyn_fifo = true; + + musb->isr = jz4740_musb_interrupt; + + return 0; + } + +Instruction on line 12 helps the MUSB controller driver to work around +the fact that the controller hardware is missing registers that are used +for USB endpoints configuration. + +Without these registers, the controller driver is unable to read the +endpoints configuration from the hardware, so we use line 12 instruction +to bypass reading the configuration from silicon, and rely on a +hard-coded table that describes the endpoints configuration instead:: + + static struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = { + { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, + { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, + { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 64, }, + }; + +Looking at the configuration table above, we see that each endpoints is +described by three fields: ``hw_ep_num`` is the endpoint number, style is +its direction (either ``FIFO_TX`` for the controller driver to send packets +in the controller hardware, or ``FIFO_RX`` to receive packets from +hardware), and maxpacket defines the maximum size of each data packet +that can be transmitted over that endpoint. Reading from the table, the +controller driver knows that endpoint 1 can be used to send and receive +USB data packets of 512 bytes at once (this is in fact a bulk in/out +endpoint), and endpoint 2 can be used to send data packets of 64 bytes +at once (this is in fact an interrupt endpoint). + +Note that there is no information about endpoint 0 here: that one is +implemented by default in every silicon design, with a predefined +configuration according to the USB specification. For more examples of +endpoint configuration tables, see ``musb_core.c``. + +Let's now get back to the interrupt handler function: + + .. code-block:: c + :emphasize-lines: 18-19 + + static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci) + { + unsigned long flags; + irqreturn_t retval = IRQ_NONE; + struct musb *musb = __hci; + + spin_lock_irqsave(&musb->lock, flags); + + musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB); + musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX); + musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX); + + /* + * The controller is gadget only, the state of the host mode IRQ bits is + * undefined. Mask them to make sure that the musb driver core will + * never see them set + */ + musb->int_usb &= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME | + MUSB_INTR_RESET | MUSB_INTR_SOF; + + if (musb->int_usb || musb->int_tx || musb->int_rx) + retval = musb_interrupt(musb); + + spin_unlock_irqrestore(&musb->lock, flags); + + return retval; + } + +Instruction on line 18 above is a way for the controller driver to work +around the fact that some interrupt bits used for USB host mode +operation are missing in the ``MUSB_INTRUSB`` register, thus left in an +undefined hardware state, since this MUSB controller hardware is used in +peripheral mode only. As a consequence, the glue layer masks these +missing bits out to avoid parasite interrupts by doing a logical AND +operation between the value read from ``MUSB_INTRUSB`` and the bits that +are actually implemented in the register. + +These are only a couple of the quirks found in the JZ4740 USB device +controller. Some others were directly addressed in the MUSB core since +the fixes were generic enough to provide a better handling of the issues +for others controller hardware eventually. + +Conclusion +========== + +Writing a Linux MUSB glue layer should be a more accessible task, as +this documentation tries to show the ins and outs of this exercise. + +The JZ4740 USB device controller being fairly simple, I hope its glue +layer serves as a good example for the curious mind. Used with the +current MUSB glue layers, this documentation should provide enough +guidance to get started; should anything gets out of hand, the linux-usb +mailing list archive is another helpful resource to browse through. + +Acknowledgements +================ + +Many thanks to Lars-Peter Clausen and Maarten ter Huurne for answering +my questions while I was writing the JZ4740 glue layer and for helping +me out getting the code in good shape. + +I would also like to thank the Qi-Hardware community at large for its +cheerful guidance and support. + +Resources +========= + +USB Home Page: https://www.usb.org + +linux-usb Mailing List Archives: https://marc.info/?l=linux-usb + +USB On-the-Go Basics: +https://www.maximintegrated.com/app-notes/index.mvp/id/1822 + +:ref:`Writing USB Device Drivers <writing-usb-driver>` + +Texas Instruments USB Configuration Wiki Page: +http://processors.wiki.ti.com/index.php/Usbgeneralpage diff --git a/Documentation/driver-api/usb/writing_usb_driver.rst b/Documentation/driver-api/usb/writing_usb_driver.rst new file mode 100644 index 0000000000..95c4f5d140 --- /dev/null +++ b/Documentation/driver-api/usb/writing_usb_driver.rst @@ -0,0 +1,328 @@ +.. _writing-usb-driver: + +========================== +Writing USB Device Drivers +========================== + +:Author: Greg Kroah-Hartman + +Introduction +============ + +The Linux USB subsystem has grown from supporting only two different +types of devices in the 2.2.7 kernel (mice and keyboards), to over 20 +different types of devices in the 2.4 kernel. Linux currently supports +almost all USB class devices (standard types of devices like keyboards, +mice, modems, printers and speakers) and an ever-growing number of +vendor-specific devices (such as USB to serial converters, digital +cameras, Ethernet devices and MP3 players). For a full list of the +different USB devices currently supported, see Resources. + +The remaining kinds of USB devices that do not have support on Linux are +almost all vendor-specific devices. Each vendor decides to implement a +custom protocol to talk to their device, so a custom driver usually +needs to be created. Some vendors are open with their USB protocols and +help with the creation of Linux drivers, while others do not publish +them, and developers are forced to reverse-engineer. See Resources for +some links to handy reverse-engineering tools. + +Because each different protocol causes a new driver to be created, I +have written a generic USB driver skeleton, modelled after the +pci-skeleton.c file in the kernel source tree upon which many PCI +network drivers have been based. This USB skeleton can be found at +drivers/usb/usb-skeleton.c in the kernel source tree. In this article I +will walk through the basics of the skeleton driver, explaining the +different pieces and what needs to be done to customize it to your +specific device. + +Linux USB Basics +================ + +If you are going to write a Linux USB driver, please become familiar +with the USB protocol specification. It can be found, along with many +other useful documents, at the USB home page (see Resources). An +excellent introduction to the Linux USB subsystem can be found at the +USB Working Devices List (see Resources). It explains how the Linux USB +subsystem is structured and introduces the reader to the concept of USB +urbs (USB Request Blocks), which are essential to USB drivers. + +The first thing a Linux USB driver needs to do is register itself with +the Linux USB subsystem, giving it some information about which devices +the driver supports and which functions to call when a device supported +by the driver is inserted or removed from the system. All of this +information is passed to the USB subsystem in the :c:type:`usb_driver` +structure. The skeleton driver declares a :c:type:`usb_driver` as:: + + static struct usb_driver skel_driver = { + .name = "skeleton", + .probe = skel_probe, + .disconnect = skel_disconnect, + .suspend = skel_suspend, + .resume = skel_resume, + .pre_reset = skel_pre_reset, + .post_reset = skel_post_reset, + .id_table = skel_table, + .supports_autosuspend = 1, + }; + + +The variable name is a string that describes the driver. It is used in +informational messages printed to the system log. The probe and +disconnect function pointers are called when a device that matches the +information provided in the ``id_table`` variable is either seen or +removed. + +The fops and minor variables are optional. Most USB drivers hook into +another kernel subsystem, such as the SCSI, network or TTY subsystem. +These types of drivers register themselves with the other kernel +subsystem, and any user-space interactions are provided through that +interface. But for drivers that do not have a matching kernel subsystem, +such as MP3 players or scanners, a method of interacting with user space +is needed. The USB subsystem provides a way to register a minor device +number and a set of :c:type:`file_operations` function pointers that enable +this user-space interaction. The skeleton driver needs this kind of +interface, so it provides a minor starting number and a pointer to its +:c:type:`file_operations` functions. + +The USB driver is then registered with a call to usb_register(), +usually in the driver's init function, as shown here:: + + static int __init usb_skel_init(void) + { + int result; + + /* register this driver with the USB subsystem */ + result = usb_register(&skel_driver); + if (result < 0) { + pr_err("usb_register failed for the %s driver. Error number %d\n", + skel_driver.name, result); + return -1; + } + + return 0; + } + module_init(usb_skel_init); + + +When the driver is unloaded from the system, it needs to deregister +itself with the USB subsystem. This is done with usb_deregister() +function:: + + static void __exit usb_skel_exit(void) + { + /* deregister this driver with the USB subsystem */ + usb_deregister(&skel_driver); + } + module_exit(usb_skel_exit); + + +To enable the linux-hotplug system to load the driver automatically when +the device is plugged in, you need to create a ``MODULE_DEVICE_TABLE``. +The following code tells the hotplug scripts that this module supports a +single device with a specific vendor and product ID:: + + /* table of devices that work with this driver */ + static struct usb_device_id skel_table [] = { + { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) }, + { } /* Terminating entry */ + }; + MODULE_DEVICE_TABLE (usb, skel_table); + + +There are other macros that can be used in describing a struct +:c:type:`usb_device_id` for drivers that support a whole class of USB +drivers. See :ref:`usb.h <usb_header>` for more information on this. + +Device operation +================ + +When a device is plugged into the USB bus that matches the device ID +pattern that your driver registered with the USB core, the probe +function is called. The :c:type:`usb_device` structure, interface number and +the interface ID are passed to the function:: + + static int skel_probe(struct usb_interface *interface, + const struct usb_device_id *id) + + +The driver now needs to verify that this device is actually one that it +can accept. If so, it returns 0. If not, or if any error occurs during +initialization, an errorcode (such as ``-ENOMEM`` or ``-ENODEV``) is +returned from the probe function. + +In the skeleton driver, we determine what end points are marked as +bulk-in and bulk-out. We create buffers to hold the data that will be +sent and received from the device, and a USB urb to write data to the +device is initialized. + +Conversely, when the device is removed from the USB bus, the disconnect +function is called with the device pointer. The driver needs to clean +any private data that has been allocated at this time and to shut down +any pending urbs that are in the USB system. + +Now that the device is plugged into the system and the driver is bound +to the device, any of the functions in the :c:type:`file_operations` structure +that were passed to the USB subsystem will be called from a user program +trying to talk to the device. The first function called will be open, as +the program tries to open the device for I/O. We increment our private +usage count and save a pointer to our internal structure in the file +structure. This is done so that future calls to file operations will +enable the driver to determine which device the user is addressing. All +of this is done with the following code:: + + /* increment our usage count for the device */ + kref_get(&dev->kref); + + /* save our object in the file's private structure */ + file->private_data = dev; + + +After the open function is called, the read and write functions are +called to receive and send data to the device. In the ``skel_write`` +function, we receive a pointer to some data that the user wants to send +to the device and the size of the data. The function determines how much +data it can send to the device based on the size of the write urb it has +created (this size depends on the size of the bulk out end point that +the device has). Then it copies the data from user space to kernel +space, points the urb to the data and submits the urb to the USB +subsystem. This can be seen in the following code:: + + /* we can only write as much as 1 urb will hold */ + size_t writesize = min_t(size_t, count, MAX_TRANSFER); + + /* copy the data from user space into our urb */ + copy_from_user(buf, user_buffer, writesize); + + /* set up our urb */ + usb_fill_bulk_urb(urb, + dev->udev, + usb_sndbulkpipe(dev->udev, dev->bulk_out_endpointAddr), + buf, + writesize, + skel_write_bulk_callback, + dev); + + /* send the data out the bulk port */ + retval = usb_submit_urb(urb, GFP_KERNEL); + if (retval) { + dev_err(&dev->interface->dev, + "%s - failed submitting write urb, error %d\n", + __func__, retval); + } + + +When the write urb is filled up with the proper information using the +:c:func:`usb_fill_bulk_urb` function, we point the urb's completion callback +to call our own ``skel_write_bulk_callback`` function. This function is +called when the urb is finished by the USB subsystem. The callback +function is called in interrupt context, so caution must be taken not to +do very much processing at that time. Our implementation of +``skel_write_bulk_callback`` merely reports if the urb was completed +successfully or not and then returns. + +The read function works a bit differently from the write function in +that we do not use an urb to transfer data from the device to the +driver. Instead we call the :c:func:`usb_bulk_msg` function, which can be used +to send or receive data from a device without having to create urbs and +handle urb completion callback functions. We call the :c:func:`usb_bulk_msg` +function, giving it a buffer into which to place any data received from +the device and a timeout value. If the timeout period expires without +receiving any data from the device, the function will fail and return an +error message. This can be shown with the following code:: + + /* do an immediate bulk read to get data from the device */ + retval = usb_bulk_msg (skel->dev, + usb_rcvbulkpipe (skel->dev, + skel->bulk_in_endpointAddr), + skel->bulk_in_buffer, + skel->bulk_in_size, + &count, 5000); + /* if the read was successful, copy the data to user space */ + if (!retval) { + if (copy_to_user (buffer, skel->bulk_in_buffer, count)) + retval = -EFAULT; + else + retval = count; + } + + +The :c:func:`usb_bulk_msg` function can be very useful for doing single reads +or writes to a device; however, if you need to read or write constantly to +a device, it is recommended to set up your own urbs and submit them to +the USB subsystem. + +When the user program releases the file handle that it has been using to +talk to the device, the release function in the driver is called. In +this function we decrement our private usage count and wait for possible +pending writes:: + + /* decrement our usage count for the device */ + --skel->open_count; + + +One of the more difficult problems that USB drivers must be able to +handle smoothly is the fact that the USB device may be removed from the +system at any point in time, even if a program is currently talking to +it. It needs to be able to shut down any current reads and writes and +notify the user-space programs that the device is no longer there. The +following code (function ``skel_delete``) is an example of how to do +this:: + + static inline void skel_delete (struct usb_skel *dev) + { + kfree (dev->bulk_in_buffer); + if (dev->bulk_out_buffer != NULL) + usb_free_coherent (dev->udev, dev->bulk_out_size, + dev->bulk_out_buffer, + dev->write_urb->transfer_dma); + usb_free_urb (dev->write_urb); + kfree (dev); + } + + +If a program currently has an open handle to the device, we reset the +flag ``device_present``. For every read, write, release and other +functions that expect a device to be present, the driver first checks +this flag to see if the device is still present. If not, it releases +that the device has disappeared, and a ``-ENODEV`` error is returned to the +user-space program. When the release function is eventually called, it +determines if there is no device and if not, it does the cleanup that +the ``skel_disconnect`` function normally does if there are no open files +on the device (see Listing 5). + +Isochronous Data +================ + +This usb-skeleton driver does not have any examples of interrupt or +isochronous data being sent to or from the device. Interrupt data is +sent almost exactly as bulk data is, with a few minor exceptions. +Isochronous data works differently with continuous streams of data being +sent to or from the device. The audio and video camera drivers are very +good examples of drivers that handle isochronous data and will be useful +if you also need to do this. + +Conclusion +========== + +Writing Linux USB device drivers is not a difficult task as the +usb-skeleton driver shows. This driver, combined with the other current +USB drivers, should provide enough examples to help a beginning author +create a working driver in a minimal amount of time. The linux-usb-devel +mailing list archives also contain a lot of helpful information. + +Resources +========= + +The Linux USB Project: +http://www.linux-usb.org/ + +Linux Hotplug Project: +http://linux-hotplug.sourceforge.net/ + +linux-usb Mailing List Archives: +https://lore.kernel.org/linux-usb/ + +Programming Guide for Linux USB Device Drivers: +https://lmu.web.psi.ch/docu/manuals/software_manuals/linux_sl/usb_linux_programming_guide.pdf + +USB Home Page: https://www.usb.org diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst new file mode 100644 index 0000000000..bbd548b66b --- /dev/null +++ b/Documentation/driver-api/vfio-mediated-device.rst @@ -0,0 +1,273 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. include:: <isonum.txt> + +===================== +VFIO Mediated devices +===================== + +:Copyright: |copy| 2016, NVIDIA CORPORATION. All rights reserved. +:Author: Neo Jia <cjia@nvidia.com> +:Author: Kirti Wankhede <kwankhede@nvidia.com> + + + +Virtual Function I/O (VFIO) Mediated devices[1] +=============================================== + +The number of use cases for virtualizing DMA devices that do not have built-in +SR_IOV capability is increasing. Previously, to virtualize such devices, +developers had to create their own management interfaces and APIs, and then +integrate them with user space software. To simplify integration with user space +software, we have identified common requirements and a unified management +interface for such devices. + +The VFIO driver framework provides unified APIs for direct device access. It is +an IOMMU/device-agnostic framework for exposing direct device access to user +space in a secure, IOMMU-protected environment. This framework is used for +multiple devices, such as GPUs, network adapters, and compute accelerators. With +direct device access, virtual machines or user space applications have direct +access to the physical device. This framework is reused for mediated devices. + +The mediated core driver provides a common interface for mediated device +management that can be used by drivers of different devices. This module +provides a generic interface to perform these operations: + +* Create and destroy a mediated device +* Add a mediated device to and remove it from a mediated bus driver +* Add a mediated device to and remove it from an IOMMU group + +The mediated core driver also provides an interface to register a bus driver. +For example, the mediated VFIO mdev driver is designed for mediated devices and +supports VFIO APIs. The mediated bus driver adds a mediated device to and +removes it from a VFIO group. + +The following high-level block diagram shows the main components and interfaces +in the VFIO mediated driver framework. The diagram shows NVIDIA, Intel, and IBM +devices as examples, as these devices are the first devices to use this module:: + + +---------------+ + | | + | +-----------+ | mdev_register_driver() +--------------+ + | | | +<------------------------+ | + | | mdev | | | | + | | bus | +------------------------>+ vfio_mdev.ko |<-> VFIO user + | | driver | | probe()/remove() | | APIs + | | | | +--------------+ + | +-----------+ | + | | + | MDEV CORE | + | MODULE | + | mdev.ko | + | +-----------+ | mdev_register_parent() +--------------+ + | | | +<------------------------+ | + | | | | | ccw_device.ko|<-> physical + | | | +------------------------>+ | device + | | | | callbacks +--------------+ + | | Physical | | + | | device | | mdev_register_parent() +--------------+ + | | interface | |<------------------------+ | + | | | | | i915.ko |<-> physical + | | | +------------------------>+ | device + | | | | callbacks +--------------+ + | +-----------+ | + +---------------+ + + +Registration Interfaces +======================= + +The mediated core driver provides the following types of registration +interfaces: + +* Registration interface for a mediated bus driver +* Physical device driver interface + +Registration Interface for a Mediated Bus Driver +------------------------------------------------ + +The registration interface for a mediated device driver provides the following +structure to represent a mediated device's driver:: + + /* + * struct mdev_driver [2] - Mediated device's driver + * @probe: called when new device created + * @remove: called when device removed + * @driver: device driver structure + */ + struct mdev_driver { + int (*probe) (struct mdev_device *dev); + void (*remove) (struct mdev_device *dev); + unsigned int (*get_available)(struct mdev_type *mtype); + ssize_t (*show_description)(struct mdev_type *mtype, char *buf); + struct device_driver driver; + }; + +A mediated bus driver for mdev should use this structure in the function calls +to register and unregister itself with the core driver: + +* Register:: + + int mdev_register_driver(struct mdev_driver *drv); + +* Unregister:: + + void mdev_unregister_driver(struct mdev_driver *drv); + +The mediated bus driver's probe function should create a vfio_device on top of +the mdev_device and connect it to an appropriate implementation of +vfio_device_ops. + +When a driver wants to add the GUID creation sysfs to an existing device it has +probe'd to then it should call:: + + int mdev_register_parent(struct mdev_parent *parent, struct device *dev, + struct mdev_driver *mdev_driver); + +This will provide the 'mdev_supported_types/XX/create' files which can then be +used to trigger the creation of a mdev_device. The created mdev_device will be +attached to the specified driver. + +When the driver needs to remove itself it calls:: + + void mdev_unregister_parent(struct mdev_parent *parent); + +Which will unbind and destroy all the created mdevs and remove the sysfs files. + +Mediated Device Management Interface Through sysfs +================================================== + +The management interface through sysfs enables user space software, such as +libvirt, to query and configure mediated devices in a hardware-agnostic fashion. +This management interface provides flexibility to the underlying physical +device's driver to support features such as: + +* Mediated device hot plug +* Multiple mediated devices in a single virtual machine +* Multiple mediated devices from different physical devices + +Links in the mdev_bus Class Directory +------------------------------------- +The /sys/class/mdev_bus/ directory contains links to devices that are registered +with the mdev core driver. + +Directories and files under the sysfs for Each Physical Device +-------------------------------------------------------------- + +:: + + |- [parent physical device] + |--- Vendor-specific-attributes [optional] + |--- [mdev_supported_types] + | |--- [<type-id>] + | | |--- create + | | |--- name + | | |--- available_instances + | | |--- device_api + | | |--- description + | | |--- [devices] + | |--- [<type-id>] + | | |--- create + | | |--- name + | | |--- available_instances + | | |--- device_api + | | |--- description + | | |--- [devices] + | |--- [<type-id>] + | |--- create + | |--- name + | |--- available_instances + | |--- device_api + | |--- description + | |--- [devices] + +* [mdev_supported_types] + + The list of currently supported mediated device types and their details. + + [<type-id>], device_api, and available_instances are mandatory attributes + that should be provided by vendor driver. + +* [<type-id>] + + The [<type-id>] name is created by adding the device driver string as a prefix + to the string provided by the vendor driver. This format of this name is as + follows:: + + sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name); + +* device_api + + This attribute shows which device API is being created, for example, + "vfio-pci" for a PCI device. + +* available_instances + + This attribute shows the number of devices of type <type-id> that can be + created. + +* [device] + + This directory contains links to the devices of type <type-id> that have been + created. + +* name + + This attribute shows a human readable name. + +* description + + This attribute can show brief features/description of the type. This is an + optional attribute. + +Directories and Files Under the sysfs for Each mdev Device +---------------------------------------------------------- + +:: + + |- [parent phy device] + |--- [$MDEV_UUID] + |--- remove + |--- mdev_type {link to its type} + |--- vendor-specific-attributes [optional] + +* remove (write only) + +Writing '1' to the 'remove' file destroys the mdev device. The vendor driver can +fail the remove() callback if that device is active and the vendor driver +doesn't support hot unplug. + +Example:: + + # echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove + +Mediated device Hot plug +------------------------ + +Mediated devices can be created and assigned at runtime. The procedure to hot +plug a mediated device is the same as the procedure to hot plug a PCI device. + +Translation APIs for Mediated Devices +===================================== + +The following APIs are provided for translating user pfn to host pfn in a VFIO +driver:: + + int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, + int npage, int prot, struct page **pages); + + void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, + int npage); + +These functions call back into the back-end IOMMU module by using the pin_pages +and unpin_pages callbacks of the struct vfio_iommu_driver_ops[4]. Currently +these callbacks are supported in the TYPE1 IOMMU module. To enable them for +other IOMMU backend modules, such as PPC64 sPAPR module, they need to provide +these two callback functions. + +References +========== + +1. See Documentation/driver-api/vfio.rst for more information on VFIO. +2. struct mdev_driver in include/linux/mdev.h +3. struct mdev_parent_ops in include/linux/mdev.h +4. struct vfio_iommu_driver_ops in include/linux/vfio.h diff --git a/Documentation/driver-api/vfio-pci-device-specific-driver-acceptance.rst b/Documentation/driver-api/vfio-pci-device-specific-driver-acceptance.rst new file mode 100644 index 0000000000..b7b99b876b --- /dev/null +++ b/Documentation/driver-api/vfio-pci-device-specific-driver-acceptance.rst @@ -0,0 +1,35 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Acceptance criteria for vfio-pci device specific driver variants +================================================================ + +Overview +-------- +The vfio-pci driver exists as a device agnostic driver using the +system IOMMU and relying on the robustness of platform fault +handling to provide isolated device access to userspace. While the +vfio-pci driver does include some device specific support, further +extensions for yet more advanced device specific features are not +sustainable. The vfio-pci driver has therefore split out +vfio-pci-core as a library that may be reused to implement features +requiring device specific knowledge, ex. saving and loading device +state for the purposes of supporting migration. + +In support of such features, it's expected that some device specific +variants may interact with parent devices (ex. SR-IOV PF in support of +a user assigned VF) or other extensions that may not be otherwise +accessible via the vfio-pci base driver. Authors of such drivers +should be diligent not to create exploitable interfaces via these +interactions or allow unchecked userspace data to have an effect +beyond the scope of the assigned device. + +New driver submissions are therefore requested to have approval via +sign-off/ack/review/etc for any interactions with parent drivers. +Additionally, drivers should make an attempt to provide sufficient +documentation for reviewers to understand the device specific +extensions, for example in the case of migration data, how is the +device state composed and consumed, which portions are not otherwise +available to the user via vfio-pci, what safeguards exist to validate +the data, etc. To that extent, authors should additionally expect to +require reviews from at least one of the listed reviewers, in addition +to the overall vfio maintainer. diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst new file mode 100644 index 0000000000..633d11c7fa --- /dev/null +++ b/Documentation/driver-api/vfio.rst @@ -0,0 +1,707 @@ +================================== +VFIO - "Virtual Function I/O" [1]_ +================================== + +Many modern systems now provide DMA and interrupt remapping facilities +to help ensure I/O devices behave within the boundaries they've been +allotted. This includes x86 hardware with AMD-Vi and Intel VT-d, +POWER systems with Partitionable Endpoints (PEs) and embedded PowerPC +systems such as Freescale PAMU. The VFIO driver is an IOMMU/device +agnostic framework for exposing direct device access to userspace, in +a secure, IOMMU protected environment. In other words, this allows +safe [2]_, non-privileged, userspace drivers. + +Why do we want that? Virtual machines often make use of direct device +access ("device assignment") when configured for the highest possible +I/O performance. From a device and host perspective, this simply +turns the VM into a userspace driver, with the benefits of +significantly reduced latency, higher bandwidth, and direct use of +bare-metal device drivers [3]_. + +Some applications, particularly in the high performance computing +field, also benefit from low-overhead, direct device access from +userspace. Examples include network adapters (often non-TCP/IP based) +and compute accelerators. Prior to VFIO, these drivers had to either +go through the full development cycle to become proper upstream +driver, be maintained out of tree, or make use of the UIO framework, +which has no notion of IOMMU protection, limited interrupt support, +and requires root privileges to access things like PCI configuration +space. + +The VFIO driver framework intends to unify these, replacing both the +KVM PCI specific device assignment code as well as provide a more +secure, more featureful userspace driver environment than UIO. + +Groups, Devices, and IOMMUs +--------------------------- + +Devices are the main target of any I/O driver. Devices typically +create a programming interface made up of I/O access, interrupts, +and DMA. Without going into the details of each of these, DMA is +by far the most critical aspect for maintaining a secure environment +as allowing a device read-write access to system memory imposes the +greatest risk to the overall system integrity. + +To help mitigate this risk, many modern IOMMUs now incorporate +isolation properties into what was, in many cases, an interface only +meant for translation (ie. solving the addressing problems of devices +with limited address spaces). With this, devices can now be isolated +from each other and from arbitrary memory access, thus allowing +things like secure direct assignment of devices into virtual machines. + +This isolation is not always at the granularity of a single device +though. Even when an IOMMU is capable of this, properties of devices, +interconnects, and IOMMU topologies can each reduce this isolation. +For instance, an individual device may be part of a larger multi- +function enclosure. While the IOMMU may be able to distinguish +between devices within the enclosure, the enclosure may not require +transactions between devices to reach the IOMMU. Examples of this +could be anything from a multi-function PCI device with backdoors +between functions to a non-PCI-ACS (Access Control Services) capable +bridge allowing redirection without reaching the IOMMU. Topology +can also play a factor in terms of hiding devices. A PCIe-to-PCI +bridge masks the devices behind it, making transaction appear as if +from the bridge itself. Obviously IOMMU design plays a major factor +as well. + +Therefore, while for the most part an IOMMU may have device level +granularity, any system is susceptible to reduced granularity. The +IOMMU API therefore supports a notion of IOMMU groups. A group is +a set of devices which is isolatable from all other devices in the +system. Groups are therefore the unit of ownership used by VFIO. + +While the group is the minimum granularity that must be used to +ensure secure user access, it's not necessarily the preferred +granularity. In IOMMUs which make use of page tables, it may be +possible to share a set of page tables between different groups, +reducing the overhead both to the platform (reduced TLB thrashing, +reduced duplicate page tables), and to the user (programming only +a single set of translations). For this reason, VFIO makes use of +a container class, which may hold one or more groups. A container +is created by simply opening the /dev/vfio/vfio character device. + +On its own, the container provides little functionality, with all +but a couple version and extension query interfaces locked away. +The user needs to add a group into the container for the next level +of functionality. To do this, the user first needs to identify the +group associated with the desired device. This can be done using +the sysfs links described in the example below. By unbinding the +device from the host driver and binding it to a VFIO driver, a new +VFIO group will appear for the group as /dev/vfio/$GROUP, where +$GROUP is the IOMMU group number of which the device is a member. +If the IOMMU group contains multiple devices, each will need to +be bound to a VFIO driver before operations on the VFIO group +are allowed (it's also sufficient to only unbind the device from +host drivers if a VFIO driver is unavailable; this will make the +group available, but not that particular device). TBD - interface +for disabling driver probing/locking a device. + +Once the group is ready, it may be added to the container by opening +the VFIO group character device (/dev/vfio/$GROUP) and using the +VFIO_GROUP_SET_CONTAINER ioctl, passing the file descriptor of the +previously opened container file. If desired and if the IOMMU driver +supports sharing the IOMMU context between groups, multiple groups may +be set to the same container. If a group fails to set to a container +with existing groups, a new empty container will need to be used +instead. + +With a group (or groups) attached to a container, the remaining +ioctls become available, enabling access to the VFIO IOMMU interfaces. +Additionally, it now becomes possible to get file descriptors for each +device within a group using an ioctl on the VFIO group file descriptor. + +The VFIO device API includes ioctls for describing the device, the I/O +regions and their read/write/mmap offsets on the device descriptor, as +well as mechanisms for describing and registering interrupt +notifications. + +VFIO Usage Example +------------------ + +Assume user wants to access PCI device 0000:06:0d.0:: + + $ readlink /sys/bus/pci/devices/0000:06:0d.0/iommu_group + ../../../../kernel/iommu_groups/26 + +This device is therefore in IOMMU group 26. This device is on the +pci bus, therefore the user will make use of vfio-pci to manage the +group:: + + # modprobe vfio-pci + +Binding this device to the vfio-pci driver creates the VFIO group +character devices for this group:: + + $ lspci -n -s 0000:06:0d.0 + 06:0d.0 0401: 1102:0002 (rev 08) + # echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind + # echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id + +Now we need to look at what other devices are in the group to free +it for use by VFIO:: + + $ ls -l /sys/bus/pci/devices/0000:06:0d.0/iommu_group/devices + total 0 + lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:00:1e.0 -> + ../../../../devices/pci0000:00/0000:00:1e.0 + lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.0 -> + ../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.0 + lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.1 -> + ../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.1 + +This device is behind a PCIe-to-PCI bridge [4]_, therefore we also +need to add device 0000:06:0d.1 to the group following the same +procedure as above. Device 0000:00:1e.0 is a bridge that does +not currently have a host driver, therefore it's not required to +bind this device to the vfio-pci driver (vfio-pci does not currently +support PCI bridges). + +The final step is to provide the user with access to the group if +unprivileged operation is desired (note that /dev/vfio/vfio provides +no capabilities on its own and is therefore expected to be set to +mode 0666 by the system):: + + # chown user:user /dev/vfio/26 + +The user now has full access to all the devices and the iommu for this +group and can access them as follows:: + + int container, group, device, i; + struct vfio_group_status group_status = + { .argsz = sizeof(group_status) }; + struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) }; + struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) }; + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + + /* Create a new container */ + container = open("/dev/vfio/vfio", O_RDWR); + + if (ioctl(container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) + /* Unknown API version */ + + if (!ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) + /* Doesn't support the IOMMU driver we want. */ + + /* Open the group */ + group = open("/dev/vfio/26", O_RDWR); + + /* Test the group is viable and available */ + ioctl(group, VFIO_GROUP_GET_STATUS, &group_status); + + if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) + /* Group is not viable (ie, not all devices bound for vfio) */ + + /* Add the group to the container */ + ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); + + /* Enable the IOMMU model we want */ + ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); + + /* Get addition IOMMU info */ + ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info); + + /* Allocate some space and setup a DMA mapping */ + dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + dma_map.size = 1024 * 1024; + dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + + ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); + + /* Get a file descriptor for the device */ + device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); + + /* Test and setup the device */ + ioctl(device, VFIO_DEVICE_GET_INFO, &device_info); + + for (i = 0; i < device_info.num_regions; i++) { + struct vfio_region_info reg = { .argsz = sizeof(reg) }; + + reg.index = i; + + ioctl(device, VFIO_DEVICE_GET_REGION_INFO, ®); + + /* Setup mappings... read/write offsets, mmaps + * For PCI devices, config space is a region */ + } + + for (i = 0; i < device_info.num_irqs; i++) { + struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + + irq.index = i; + + ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq); + + /* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */ + } + + /* Gratuitous device reset and go... */ + ioctl(device, VFIO_DEVICE_RESET); + +IOMMUFD and vfio_iommu_type1 +---------------------------- + +IOMMUFD is the new user API to manage I/O page tables from userspace. +It intends to be the portal of delivering advanced userspace DMA +features (nested translation [5]_, PASID [6]_, etc.) while also providing +a backwards compatibility interface for existing VFIO_TYPE1v2_IOMMU use +cases. Eventually the vfio_iommu_type1 driver, as well as the legacy +vfio container and group model is intended to be deprecated. + +The IOMMUFD backwards compatibility interface can be enabled two ways. +In the first method, the kernel can be configured with +CONFIG_IOMMUFD_VFIO_CONTAINER, in which case the IOMMUFD subsystem +transparently provides the entire infrastructure for the VFIO +container and IOMMU backend interfaces. The compatibility mode can +also be accessed if the VFIO container interface, ie. /dev/vfio/vfio is +simply symlink'd to /dev/iommu. Note that at the time of writing, the +compatibility mode is not entirely feature complete relative to +VFIO_TYPE1v2_IOMMU (ex. DMA mapping MMIO) and does not attempt to +provide compatibility to the VFIO_SPAPR_TCE_IOMMU interface. Therefore +it is not generally advisable at this time to switch from native VFIO +implementations to the IOMMUFD compatibility interfaces. + +Long term, VFIO users should migrate to device access through the cdev +interface described below, and native access through the IOMMUFD +provided interfaces. + +VFIO Device cdev +---------------- + +Traditionally user acquires a device fd via VFIO_GROUP_GET_DEVICE_FD +in a VFIO group. + +With CONFIG_VFIO_DEVICE_CDEV=y the user can now acquire a device fd +by directly opening a character device /dev/vfio/devices/vfioX where +"X" is the number allocated uniquely by VFIO for registered devices. +cdev interface does not support noiommu devices, so user should use +the legacy group interface if noiommu is wanted. + +The cdev only works with IOMMUFD. Both VFIO drivers and applications +must adapt to the new cdev security model which requires using +VFIO_DEVICE_BIND_IOMMUFD to claim DMA ownership before starting to +actually use the device. Once BIND succeeds then a VFIO device can +be fully accessed by the user. + +VFIO device cdev doesn't rely on VFIO group/container/iommu drivers. +Hence those modules can be fully compiled out in an environment +where no legacy VFIO application exists. + +So far SPAPR does not support IOMMUFD yet. So it cannot support device +cdev either. + +vfio device cdev access is still bound by IOMMU group semantics, ie. there +can be only one DMA owner for the group. Devices belonging to the same +group can not be bound to multiple iommufd_ctx or shared between native +kernel and vfio bus driver or other driver supporting the driver_managed_dma +flag. A violation of this ownership requirement will fail at the +VFIO_DEVICE_BIND_IOMMUFD ioctl, which gates full device access. + +Device cdev Example +------------------- + +Assume user wants to access PCI device 0000:6a:01.0:: + + $ ls /sys/bus/pci/devices/0000:6a:01.0/vfio-dev/ + vfio0 + +This device is therefore represented as vfio0. The user can verify +its existence:: + + $ ls -l /dev/vfio/devices/vfio0 + crw------- 1 root root 511, 0 Feb 16 01:22 /dev/vfio/devices/vfio0 + $ cat /sys/bus/pci/devices/0000:6a:01.0/vfio-dev/vfio0/dev + 511:0 + $ ls -l /dev/char/511\:0 + lrwxrwxrwx 1 root root 21 Feb 16 01:22 /dev/char/511:0 -> ../vfio/devices/vfio0 + +Then provide the user with access to the device if unprivileged +operation is desired:: + + $ chown user:user /dev/vfio/devices/vfio0 + +Finally the user could get cdev fd by:: + + cdev_fd = open("/dev/vfio/devices/vfio0", O_RDWR); + +An opened cdev_fd doesn't give the user any permission of accessing +the device except binding the cdev_fd to an iommufd. After that point +then the device is fully accessible including attaching it to an +IOMMUFD IOAS/HWPT to enable userspace DMA:: + + struct vfio_device_bind_iommufd bind = { + .argsz = sizeof(bind), + .flags = 0, + }; + struct iommu_ioas_alloc alloc_data = { + .size = sizeof(alloc_data), + .flags = 0, + }; + struct vfio_device_attach_iommufd_pt attach_data = { + .argsz = sizeof(attach_data), + .flags = 0, + }; + struct iommu_ioas_map map = { + .size = sizeof(map), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .__reserved = 0, + }; + + iommufd = open("/dev/iommu", O_RDWR); + + bind.iommufd = iommufd; + ioctl(cdev_fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); + + ioctl(iommufd, IOMMU_IOAS_ALLOC, &alloc_data); + attach_data.pt_id = alloc_data.out_ioas_id; + ioctl(cdev_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data); + + /* Allocate some space and setup a DMA mapping */ + map.user_va = (int64_t)mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + map.iova = 0; /* 1MB starting at 0x0 from device view */ + map.length = 1024 * 1024; + map.ioas_id = alloc_data.out_ioas_id;; + + ioctl(iommufd, IOMMU_IOAS_MAP, &map); + + /* Other device operations as stated in "VFIO Usage Example" */ + +VFIO User API +------------------------------------------------------------------------------- + +Please see include/uapi/linux/vfio.h for complete API documentation. + +VFIO bus driver API +------------------------------------------------------------------------------- + +VFIO bus drivers, such as vfio-pci make use of only a few interfaces +into VFIO core. When devices are bound and unbound to the driver, +Following interfaces are called when devices are bound to and +unbound from the driver:: + + int vfio_register_group_dev(struct vfio_device *device); + int vfio_register_emulated_iommu_dev(struct vfio_device *device); + void vfio_unregister_group_dev(struct vfio_device *device); + +The driver should embed the vfio_device in its own structure and use +vfio_alloc_device() to allocate the structure, and can register +@init/@release callbacks to manage any private state wrapping the +vfio_device:: + + vfio_alloc_device(dev_struct, member, dev, ops); + void vfio_put_device(struct vfio_device *device); + +vfio_register_group_dev() indicates to the core to begin tracking the +iommu_group of the specified dev and register the dev as owned by a VFIO bus +driver. Once vfio_register_group_dev() returns it is possible for userspace to +start accessing the driver, thus the driver should ensure it is completely +ready before calling it. The driver provides an ops structure for callbacks +similar to a file operations structure:: + + struct vfio_device_ops { + char *name; + int (*init)(struct vfio_device *vdev); + void (*release)(struct vfio_device *vdev); + int (*bind_iommufd)(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); + void (*unbind_iommufd)(struct vfio_device *vdev); + int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); + void (*detach_ioas)(struct vfio_device *vdev); + int (*open_device)(struct vfio_device *vdev); + void (*close_device)(struct vfio_device *vdev); + ssize_t (*read)(struct vfio_device *vdev, char __user *buf, + size_t count, loff_t *ppos); + ssize_t (*write)(struct vfio_device *vdev, const char __user *buf, + size_t count, loff_t *size); + long (*ioctl)(struct vfio_device *vdev, unsigned int cmd, + unsigned long arg); + int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); + void (*request)(struct vfio_device *vdev, unsigned int count); + int (*match)(struct vfio_device *vdev, char *buf); + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); + int (*device_feature)(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz); + }; + +Each function is passed the vdev that was originally registered +in the vfio_register_group_dev() or vfio_register_emulated_iommu_dev() +call above. This allows the bus driver to obtain its private data using +container_of(). + +:: + + - The init/release callbacks are issued when vfio_device is initialized + and released. + + - The open/close device callbacks are issued when the first + instance of a file descriptor for the device is created (eg. + via VFIO_GROUP_GET_DEVICE_FD) for a user session. + + - The ioctl callback provides a direct pass through for some VFIO_DEVICE_* + ioctls. + + - The [un]bind_iommufd callbacks are issued when the device is bound to + and unbound from iommufd. + + - The [de]attach_ioas callback is issued when the device is attached to + and detached from an IOAS managed by the bound iommufd. However, the + attached IOAS can also be automatically detached when the device is + unbound from iommufd. + + - The read/write/mmap callbacks implement the device region access defined + by the device's own VFIO_DEVICE_GET_REGION_INFO ioctl. + + - The request callback is issued when device is going to be unregistered, + such as when trying to unbind the device from the vfio bus driver. + + - The dma_unmap callback is issued when a range of iovas are unmapped + in the container or IOAS attached by the device. Drivers which make + use of the vfio page pinning interface must implement this callback in + order to unpin pages within the dma_unmap range. Drivers must tolerate + this callback even before calls to open_device(). + +PPC64 sPAPR implementation note +------------------------------- + +This implementation has some specifics: + +1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per + container is supported as an IOMMU table is allocated at the boot time, + one table per a IOMMU group which is a Partitionable Endpoint (PE) + (PE is often a PCI domain but not always). + + Newer systems (POWER8 with IODA2) have improved hardware design which allows + to remove this limitation and have multiple IOMMU groups per a VFIO + container. + +2) The hardware supports so called DMA windows - the PCI address range + within which DMA transfer is allowed, any attempt to access address space + out of the window leads to the whole PE isolation. + +3) PPC64 guests are paravirtualized but not fully emulated. There is an API + to map/unmap pages for DMA, and it normally maps 1..32 pages per call and + currently there is no way to reduce the number of calls. In order to make + things faster, the map/unmap handling has been implemented in real mode + which provides an excellent performance which has limitations such as + inability to do locked pages accounting in real time. + +4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O + subtree that can be treated as a unit for the purposes of partitioning and + error recovery. A PE may be a single or multi-function IOA (IO Adapter), a + function of a multi-function IOA, or multiple IOAs (possibly including + switch and bridge structures above the multiple IOAs). PPC64 guests detect + PCI errors and recover from them via EEH RTAS services, which works on the + basis of additional ioctl commands. + + So 4 additional ioctls have been added: + + VFIO_IOMMU_SPAPR_TCE_GET_INFO + returns the size and the start of the DMA window on the PCI bus. + + VFIO_IOMMU_ENABLE + enables the container. The locked pages accounting + is done at this point. This lets user first to know what + the DMA window is and adjust rlimit before doing any real job. + + VFIO_IOMMU_DISABLE + disables the container. + + VFIO_EEH_PE_OP + provides an API for EEH setup, error detection and recovery. + + The code flow from the example above should be slightly changed:: + + struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 }; + + ..... + /* Add the group to the container */ + ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); + + /* Enable the IOMMU model we want */ + ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU) + + /* Get addition sPAPR IOMMU info */ + vfio_iommu_spapr_tce_info spapr_iommu_info; + ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info); + + if (ioctl(container, VFIO_IOMMU_ENABLE)) + /* Cannot enable container, may be low rlimit */ + + /* Allocate some space and setup a DMA mapping */ + dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + + dma_map.size = 1024 * 1024; + dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + + /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ + ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); + + /* Get a file descriptor for the device */ + device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); + + .... + + /* Gratuitous device reset and go... */ + ioctl(device, VFIO_DEVICE_RESET); + + /* Make sure EEH is supported */ + ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH); + + /* Enable the EEH functionality on the device */ + pe_op.op = VFIO_EEH_PE_ENABLE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* You're suggested to create additional data struct to represent + * PE, and put child devices belonging to same IOMMU group to the + * PE instance for later reference. + */ + + /* Check the PE's state and make sure it's in functional state */ + pe_op.op = VFIO_EEH_PE_GET_STATE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Save device state using pci_save_state(). + * EEH should be enabled on the specified device. + */ + + .... + + /* Inject EEH error, which is expected to be caused by 32-bits + * config load. + */ + pe_op.op = VFIO_EEH_PE_INJECT_ERR; + pe_op.err.type = EEH_ERR_TYPE_32; + pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR; + pe_op.err.addr = 0ul; + pe_op.err.mask = 0ul; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + .... + + /* When 0xFF's returned from reading PCI config space or IO BARs + * of the PCI device. Check the PE's state to see if that has been + * frozen. + */ + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Waiting for pending PCI transactions to be completed and don't + * produce any more PCI traffic from/to the affected PE until + * recovery is finished. + */ + + /* Enable IO for the affected PE and collect logs. Usually, the + * standard part of PCI config space, AER registers are dumped + * as logs for further analysis. + */ + pe_op.op = VFIO_EEH_PE_UNFREEZE_IO; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* + * Issue PE reset: hot or fundamental reset. Usually, hot reset + * is enough. However, the firmware of some PCI adapters would + * require fundamental reset. + */ + pe_op.op = VFIO_EEH_PE_RESET_HOT; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Configure the PCI bridges for the affected PE */ + pe_op.op = VFIO_EEH_PE_CONFIGURE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Restored state we saved at initialization time. pci_restore_state() + * is good enough as an example. + */ + + /* Hopefully, error is recovered successfully. Now, you can resume to + * start PCI traffic to/from the affected PE. + */ + + .... + +5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/ + VFIO_IOMMU_DISABLE and implements 2 new ioctls: + VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY + (which are unsupported in v1 IOMMU). + + PPC64 paravirtualized guests generate a lot of map/unmap requests, + and the handling of those includes pinning/unpinning pages and updating + mm::locked_vm counter to make sure we do not exceed the rlimit. + The v2 IOMMU splits accounting and pinning into separate operations: + + - VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls + receive a user space address and size of the block to be pinned. + Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to + be called with the exact address and size used for registering + the memory block. The userspace is not expected to call these often. + The ranges are stored in a linked list in a VFIO container. + + - VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual + IOMMU table and do not do pinning; instead these check that the userspace + address is from pre-registered range. + + This separation helps in optimizing DMA for guests. + +6) sPAPR specification allows guests to have an additional DMA window(s) on + a PCI bus with a variable page size. Two ioctls have been added to support + this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE. + The platform has to support the functionality or error will be returned to + the userspace. The existing hardware supports up to 2 DMA windows, one is + 2GB long, uses 4K pages and called "default 32bit window"; the other can + be as big as entire RAM, use different page size, it is optional - guests + create those in run-time if the guest driver supports 64bit DMA. + + VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and + a number of TCE table levels (if a TCE table is going to be big enough and + the kernel may not be able to allocate enough of physically contiguous + memory). It creates a new window in the available slot and returns the bus + address where the new window starts. Due to hardware limitation, the user + space cannot choose the location of DMA windows. + + VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window + and removes it. + +------------------------------------------------------------------------------- + +.. [1] VFIO was originally an acronym for "Virtual Function I/O" in its + initial implementation by Tom Lyon while as Cisco. We've since + outgrown the acronym, but it's catchy. + +.. [2] "safe" also depends upon a device being "well behaved". It's + possible for multi-function devices to have backdoors between + functions and even for single function devices to have alternative + access to things like PCI config space through MMIO registers. To + guard against the former we can include additional precautions in the + IOMMU driver to group multi-function PCI devices together + (iommu=group_mf). The latter we can't prevent, but the IOMMU should + still provide isolation. For PCI, SR-IOV Virtual Functions are the + best indicator of "well behaved", as these are designed for + virtualization usage models. + +.. [3] As always there are trade-offs to virtual machine device + assignment that are beyond the scope of VFIO. It's expected that + future IOMMU technologies will reduce some, but maybe not all, of + these trade-offs. + +.. [4] In this case the device is below a PCI bridge, so transactions + from either function of the device are indistinguishable to the iommu:: + + -[0000:00]-+-1e.0-[06]--+-0d.0 + \-0d.1 + + 00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90) + +.. [5] Nested translation is an IOMMU feature which supports two stage + address translations. This improves the address translation efficiency + in IOMMU virtualization. + +.. [6] PASID stands for Process Address Space ID, introduced by PCI + Express. It is a prerequisite for Shared Virtual Addressing (SVA) + and Scalable I/O Virtualization (Scalable IOV). diff --git a/Documentation/driver-api/virtio/index.rst b/Documentation/driver-api/virtio/index.rst new file mode 100644 index 0000000000..528b14b291 --- /dev/null +++ b/Documentation/driver-api/virtio/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====== +Virtio +====== + +.. toctree:: + :maxdepth: 1 + + virtio + writing_virtio_drivers diff --git a/Documentation/driver-api/virtio/virtio.rst b/Documentation/driver-api/virtio/virtio.rst new file mode 100644 index 0000000000..7947b4ca69 --- /dev/null +++ b/Documentation/driver-api/virtio/virtio.rst @@ -0,0 +1,145 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _virtio: + +=============== +Virtio on Linux +=============== + +Introduction +============ + +Virtio is an open standard that defines a protocol for communication +between drivers and devices of different types, see Chapter 5 ("Device +Types") of the virtio spec (`[1]`_). Originally developed as a standard +for paravirtualized devices implemented by a hypervisor, it can be used +to interface any compliant device (real or emulated) with a driver. + +For illustrative purposes, this document will focus on the common case +of a Linux kernel running in a virtual machine and using paravirtualized +devices provided by the hypervisor, which exposes them as virtio devices +via standard mechanisms such as PCI. + + +Device - Driver communication: virtqueues +========================================= + +Although the virtio devices are really an abstraction layer in the +hypervisor, they're exposed to the guest as if they are physical devices +using a specific transport method -- PCI, MMIO or CCW -- that is +orthogonal to the device itself. The virtio spec defines these transport +methods in detail, including device discovery, capabilities and +interrupt handling. + +The communication between the driver in the guest OS and the device in +the hypervisor is done through shared memory (that's what makes virtio +devices so efficient) using specialized data structures called +virtqueues, which are actually ring buffers [#f1]_ of buffer descriptors +similar to the ones used in a network device: + +.. kernel-doc:: include/uapi/linux/virtio_ring.h + :identifiers: struct vring_desc + +All the buffers the descriptors point to are allocated by the guest and +used by the host either for reading or for writing but not for both. + +Refer to Chapter 2.5 ("Virtqueues") of the virtio spec (`[1]`_) for the +reference definitions of virtqueues and "Virtqueues and virtio ring: How +the data travels" blog post (`[2]`_) for an illustrated overview of how +the host device and the guest driver communicate. + +The :c:type:`vring_virtqueue` struct models a virtqueue, including the +ring buffers and management data. Embedded in this struct is the +:c:type:`virtqueue` struct, which is the data structure that's +ultimately used by virtio drivers: + +.. kernel-doc:: include/linux/virtio.h + :identifiers: struct virtqueue + +The callback function pointed by this struct is triggered when the +device has consumed the buffers provided by the driver. More +specifically, the trigger will be an interrupt issued by the hypervisor +(see vring_interrupt()). Interrupt request handlers are registered for +a virtqueue during the virtqueue setup process (transport-specific). + +.. kernel-doc:: drivers/virtio/virtio_ring.c + :identifiers: vring_interrupt + + +Device discovery and probing +============================ + +In the kernel, the virtio core contains the virtio bus driver and +transport-specific drivers like `virtio-pci` and `virtio-mmio`. Then +there are individual virtio drivers for specific device types that are +registered to the virtio bus driver. + +How a virtio device is found and configured by the kernel depends on how +the hypervisor defines it. Taking the `QEMU virtio-console +<https://gitlab.com/qemu-project/qemu/-/blob/master/hw/char/virtio-console.c>`__ +device as an example. When using PCI as a transport method, the device +will present itself on the PCI bus with vendor 0x1af4 (Red Hat, Inc.) +and device id 0x1003 (virtio console), as defined in the spec, so the +kernel will detect it as it would do with any other PCI device. + +During the PCI enumeration process, if a device is found to match the +virtio-pci driver (according to the virtio-pci device table, any PCI +device with vendor id = 0x1af4):: + + /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ + static const struct pci_device_id virtio_pci_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) }, + { 0 } + }; + +then the virtio-pci driver is probed and, if the probing goes well, the +device is registered to the virtio bus:: + + static int virtio_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) + { + ... + + if (force_legacy) { + rc = virtio_pci_legacy_probe(vp_dev); + /* Also try modern mode if we can't map BAR0 (no IO space). */ + if (rc == -ENODEV || rc == -ENOMEM) + rc = virtio_pci_modern_probe(vp_dev); + if (rc) + goto err_probe; + } else { + rc = virtio_pci_modern_probe(vp_dev); + if (rc == -ENODEV) + rc = virtio_pci_legacy_probe(vp_dev); + if (rc) + goto err_probe; + } + + ... + + rc = register_virtio_device(&vp_dev->vdev); + +When the device is registered to the virtio bus the kernel will look +for a driver in the bus that can handle the device and call that +driver's ``probe`` method. + +At this point, the virtqueues will be allocated and configured by +calling the appropriate ``virtio_find`` helper function, such as +virtio_find_single_vq() or virtio_find_vqs(), which will end up calling +a transport-specific ``find_vqs`` method. + + +References +========== + +_`[1]` Virtio Spec v1.2: +https://docs.oasis-open.org/virtio/virtio/v1.2/virtio-v1.2.html + +.. Check for later versions of the spec as well. + +_`[2]` Virtqueues and virtio ring: How the data travels +https://www.redhat.com/en/blog/virtqueues-and-virtio-ring-how-data-travels + +.. rubric:: Footnotes + +.. [#f1] that's why they may be also referred to as virtrings. diff --git a/Documentation/driver-api/virtio/writing_virtio_drivers.rst b/Documentation/driver-api/virtio/writing_virtio_drivers.rst new file mode 100644 index 0000000000..e14c58796d --- /dev/null +++ b/Documentation/driver-api/virtio/writing_virtio_drivers.rst @@ -0,0 +1,197 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _writing_virtio_drivers: + +====================== +Writing Virtio Drivers +====================== + +Introduction +============ + +This document serves as a basic guideline for driver programmers that +need to hack a new virtio driver or understand the essentials of the +existing ones. See :ref:`Virtio on Linux <virtio>` for a general +overview of virtio. + + +Driver boilerplate +================== + +As a bare minimum, a virtio driver needs to register in the virtio bus +and configure the virtqueues for the device according to its spec, the +configuration of the virtqueues in the driver side must match the +virtqueue definitions in the device. A basic driver skeleton could look +like this:: + + #include <linux/virtio.h> + #include <linux/virtio_ids.h> + #include <linux/virtio_config.h> + #include <linux/module.h> + + /* device private data (one per device) */ + struct virtio_dummy_dev { + struct virtqueue *vq; + }; + + static void virtio_dummy_recv_cb(struct virtqueue *vq) + { + struct virtio_dummy_dev *dev = vq->vdev->priv; + char *buf; + unsigned int len; + + while ((buf = virtqueue_get_buf(dev->vq, &len)) != NULL) { + /* process the received data */ + } + } + + static int virtio_dummy_probe(struct virtio_device *vdev) + { + struct virtio_dummy_dev *dev = NULL; + + /* initialize device data */ + dev = kzalloc(sizeof(struct virtio_dummy_dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* the device has a single virtqueue */ + dev->vq = virtio_find_single_vq(vdev, virtio_dummy_recv_cb, "input"); + if (IS_ERR(dev->vq)) { + kfree(dev); + return PTR_ERR(dev->vq); + + } + vdev->priv = dev; + + /* from this point on, the device can notify and get callbacks */ + virtio_device_ready(vdev); + + return 0; + } + + static void virtio_dummy_remove(struct virtio_device *vdev) + { + struct virtio_dummy_dev *dev = vdev->priv; + + /* + * disable vq interrupts: equivalent to + * vdev->config->reset(vdev) + */ + virtio_reset_device(vdev); + + /* detach unused buffers */ + while ((buf = virtqueue_detach_unused_buf(dev->vq)) != NULL) { + kfree(buf); + } + + /* remove virtqueues */ + vdev->config->del_vqs(vdev); + + kfree(dev); + } + + static const struct virtio_device_id id_table[] = { + { VIRTIO_ID_DUMMY, VIRTIO_DEV_ANY_ID }, + { 0 }, + }; + + static struct virtio_driver virtio_dummy_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtio_dummy_probe, + .remove = virtio_dummy_remove, + }; + + module_virtio_driver(virtio_dummy_driver); + MODULE_DEVICE_TABLE(virtio, id_table); + MODULE_DESCRIPTION("Dummy virtio driver"); + MODULE_LICENSE("GPL"); + +The device id ``VIRTIO_ID_DUMMY`` here is a placeholder, virtio drivers +should be added only for devices that are defined in the spec, see +include/uapi/linux/virtio_ids.h. Device ids need to be at least reserved +in the virtio spec before being added to that file. + +If your driver doesn't have to do anything special in its ``init`` and +``exit`` methods, you can use the module_virtio_driver() helper to +reduce the amount of boilerplate code. + +The ``probe`` method does the minimum driver setup in this case +(memory allocation for the device data) and initializes the +virtqueue. virtio_device_ready() is used to enable the virtqueue and to +notify the device that the driver is ready to manage the device +("DRIVER_OK"). The virtqueues are anyway enabled automatically by the +core after ``probe`` returns. + +.. kernel-doc:: include/linux/virtio_config.h + :identifiers: virtio_device_ready + +In any case, the virtqueues need to be enabled before adding buffers to +them. + +Sending and receiving data +========================== + +The virtio_dummy_recv_cb() callback in the code above will be triggered +when the device notifies the driver after it finishes processing a +descriptor or descriptor chain, either for reading or writing. However, +that's only the second half of the virtio device-driver communication +process, as the communication is always started by the driver regardless +of the direction of the data transfer. + +To configure a buffer transfer from the driver to the device, first you +have to add the buffers -- packed as `scatterlists` -- to the +appropriate virtqueue using any of the virtqueue_add_inbuf(), +virtqueue_add_outbuf() or virtqueue_add_sgs(), depending on whether you +need to add one input `scatterlist` (for the device to fill in), one +output `scatterlist` (for the device to consume) or multiple +`scatterlists`, respectively. Then, once the virtqueue is set up, a call +to virtqueue_kick() sends a notification that will be serviced by the +hypervisor that implements the device:: + + struct scatterlist sg[1]; + sg_init_one(sg, buffer, BUFLEN); + virtqueue_add_inbuf(dev->vq, sg, 1, buffer, GFP_ATOMIC); + virtqueue_kick(dev->vq); + +.. kernel-doc:: drivers/virtio/virtio_ring.c + :identifiers: virtqueue_add_inbuf + +.. kernel-doc:: drivers/virtio/virtio_ring.c + :identifiers: virtqueue_add_outbuf + +.. kernel-doc:: drivers/virtio/virtio_ring.c + :identifiers: virtqueue_add_sgs + +Then, after the device has read or written the buffers prepared by the +driver and notifies it back, the driver can call virtqueue_get_buf() to +read the data produced by the device (if the virtqueue was set up with +input buffers) or simply to reclaim the buffers if they were already +consumed by the device: + +.. kernel-doc:: drivers/virtio/virtio_ring.c + :identifiers: virtqueue_get_buf_ctx + +The virtqueue callbacks can be disabled and re-enabled using the +virtqueue_disable_cb() and the family of virtqueue_enable_cb() functions +respectively. See drivers/virtio/virtio_ring.c for more details: + +.. kernel-doc:: drivers/virtio/virtio_ring.c + :identifiers: virtqueue_disable_cb + +.. kernel-doc:: drivers/virtio/virtio_ring.c + :identifiers: virtqueue_enable_cb + +But note that some spurious callbacks can still be triggered under +certain scenarios. The way to disable callbacks reliably is to reset the +device or the virtqueue (virtio_reset_device()). + + +References +========== + +_`[1]` Virtio Spec v1.2: +https://docs.oasis-open.org/virtio/virtio/v1.2/virtio-v1.2.html + +Check for later versions of the spec as well. diff --git a/Documentation/driver-api/vme.rst b/Documentation/driver-api/vme.rst new file mode 100644 index 0000000000..c0b475369d --- /dev/null +++ b/Documentation/driver-api/vme.rst @@ -0,0 +1,297 @@ +VME Device Drivers +================== + +Driver registration +------------------- + +As with other subsystems within the Linux kernel, VME device drivers register +with the VME subsystem, typically called from the devices init routine. This is +achieved via a call to :c:func:`vme_register_driver`. + +A pointer to a structure of type :c:type:`struct vme_driver <vme_driver>` must +be provided to the registration function. Along with the maximum number of +devices your driver is able to support. + +At the minimum, the '.name', '.match' and '.probe' elements of +:c:type:`struct vme_driver <vme_driver>` should be correctly set. The '.name' +element is a pointer to a string holding the device driver's name. + +The '.match' function allows control over which VME devices should be registered +with the driver. The match function should return 1 if a device should be +probed and 0 otherwise. This example match function (from vme_user.c) limits +the number of devices probed to one: + +.. code-block:: c + + #define USER_BUS_MAX 1 + ... + static int vme_user_match(struct vme_dev *vdev) + { + if (vdev->id.num >= USER_BUS_MAX) + return 0; + return 1; + } + +The '.probe' element should contain a pointer to the probe routine. The +probe routine is passed a :c:type:`struct vme_dev <vme_dev>` pointer as an +argument. + +Here, the 'num' field refers to the sequential device ID for this specific +driver. The bridge number (or bus number) can be accessed using +dev->bridge->num. + +A function is also provided to unregister the driver from the VME core called +:c:func:`vme_unregister_driver` and should usually be called from the device +driver's exit routine. + + +Resource management +------------------- + +Once a driver has registered with the VME core the provided match routine will +be called the number of times specified during the registration. If a match +succeeds, a non-zero value should be returned. A zero return value indicates +failure. For all successful matches, the probe routine of the corresponding +driver is called. The probe routine is passed a pointer to the devices +device structure. This pointer should be saved, it will be required for +requesting VME resources. + +The driver can request ownership of one or more master windows +(:c:func:`vme_master_request`), slave windows (:c:func:`vme_slave_request`) +and/or dma channels (:c:func:`vme_dma_request`). Rather than allowing the device +driver to request a specific window or DMA channel (which may be used by a +different driver) the API allows a resource to be assigned based on the required +attributes of the driver in question. For slave windows these attributes are +split into the VME address spaces that need to be accessed in 'aspace' and VME +bus cycle types required in 'cycle'. Master windows add a further set of +attributes in 'width' specifying the required data transfer widths. These +attributes are defined as bitmasks and as such any combination of the +attributes can be requested for a single window, the core will assign a window +that meets the requirements, returning a pointer of type vme_resource that +should be used to identify the allocated resource when it is used. For DMA +controllers, the request function requires the potential direction of any +transfers to be provided in the route attributes. This is typically VME-to-MEM +and/or MEM-to-VME, though some hardware can support VME-to-VME and MEM-to-MEM +transfers as well as test pattern generation. If an unallocated window fitting +the requirements can not be found a NULL pointer will be returned. + +Functions are also provided to free window allocations once they are no longer +required. These functions (:c:func:`vme_master_free`, :c:func:`vme_slave_free` +and :c:func:`vme_dma_free`) should be passed the pointer to the resource +provided during resource allocation. + + +Master windows +-------------- + +Master windows provide access from the local processor[s] out onto the VME bus. +The number of windows available and the available access modes is dependent on +the underlying chipset. A window must be configured before it can be used. + + +Master window configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once a master window has been assigned :c:func:`vme_master_set` can be used to +configure it and :c:func:`vme_master_get` to retrieve the current settings. The +address spaces, transfer widths and cycle types are the same as described +under resource management, however some of the options are mutually exclusive. +For example, only one address space may be specified. + + +Master window access +~~~~~~~~~~~~~~~~~~~~ + +The function :c:func:`vme_master_read` can be used to read from and +:c:func:`vme_master_write` used to write to configured master windows. + +In addition to simple reads and writes, :c:func:`vme_master_rmw` is provided to +do a read-modify-write transaction. Parts of a VME window can also be mapped +into user space memory using :c:func:`vme_master_mmap`. + + +Slave windows +------------- + +Slave windows provide devices on the VME bus access into mapped portions of the +local memory. The number of windows available and the access modes that can be +used is dependent on the underlying chipset. A window must be configured before +it can be used. + + +Slave window configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once a slave window has been assigned :c:func:`vme_slave_set` can be used to +configure it and :c:func:`vme_slave_get` to retrieve the current settings. + +The address spaces, transfer widths and cycle types are the same as described +under resource management, however some of the options are mutually exclusive. +For example, only one address space may be specified. + + +Slave window buffer allocation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Functions are provided to allow the user to allocate +(:c:func:`vme_alloc_consistent`) and free (:c:func:`vme_free_consistent`) +contiguous buffers which will be accessible by the VME bridge. These functions +do not have to be used, other methods can be used to allocate a buffer, though +care must be taken to ensure that they are contiguous and accessible by the VME +bridge. + + +Slave window access +~~~~~~~~~~~~~~~~~~~ + +Slave windows map local memory onto the VME bus, the standard methods for +accessing memory should be used. + + +DMA channels +------------ + +The VME DMA transfer provides the ability to run link-list DMA transfers. The +API introduces the concept of DMA lists. Each DMA list is a link-list which can +be passed to a DMA controller. Multiple lists can be created, extended, +executed, reused and destroyed. + + +List Management +~~~~~~~~~~~~~~~ + +The function :c:func:`vme_new_dma_list` is provided to create and +:c:func:`vme_dma_list_free` to destroy DMA lists. Execution of a list will not +automatically destroy the list, thus enabling a list to be reused for repetitive +tasks. + + +List Population +~~~~~~~~~~~~~~~ + +An item can be added to a list using :c:func:`vme_dma_list_add` (the source and +destination attributes need to be created before calling this function, this is +covered under "Transfer Attributes"). + +.. note:: + + The detailed attributes of the transfers source and destination + are not checked until an entry is added to a DMA list, the request + for a DMA channel purely checks the directions in which the + controller is expected to transfer data. As a result it is + possible for this call to return an error, for example if the + source or destination is in an unsupported VME address space. + +Transfer Attributes +~~~~~~~~~~~~~~~~~~~ + +The attributes for the source and destination are handled separately from adding +an item to a list. This is due to the diverse attributes required for each type +of source and destination. There are functions to create attributes for PCI, VME +and pattern sources and destinations (where appropriate): + + - PCI source or destination: :c:func:`vme_dma_pci_attribute` + - VME source or destination: :c:func:`vme_dma_vme_attribute` + - Pattern source: :c:func:`vme_dma_pattern_attribute` + +The function :c:func:`vme_dma_free_attribute` should be used to free an +attribute. + + +List Execution +~~~~~~~~~~~~~~ + +The function :c:func:`vme_dma_list_exec` queues a list for execution and will +return once the list has been executed. + + +Interrupts +---------- + +The VME API provides functions to attach and detach callbacks to specific VME +level and status ID combinations and for the generation of VME interrupts with +specific VME level and status IDs. + + +Attaching Interrupt Handlers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The function :c:func:`vme_irq_request` can be used to attach and +:c:func:`vme_irq_free` to free a specific VME level and status ID combination. +Any given combination can only be assigned a single callback function. A void +pointer parameter is provided, the value of which is passed to the callback +function, the use of this pointer is user undefined. The callback parameters are +as follows. Care must be taken in writing a callback function, callback +functions run in interrupt context: + +.. code-block:: c + + void callback(int level, int statid, void *priv); + + +Interrupt Generation +~~~~~~~~~~~~~~~~~~~~ + +The function :c:func:`vme_irq_generate` can be used to generate a VME interrupt +at a given VME level and VME status ID. + + +Location monitors +----------------- + +The VME API provides the following functionality to configure the location +monitor. + + +Location Monitor Management +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The function :c:func:`vme_lm_request` is provided to request the use of a block +of location monitors and :c:func:`vme_lm_free` to free them after they are no +longer required. Each block may provide a number of location monitors, +monitoring adjacent locations. The function :c:func:`vme_lm_count` can be used +to determine how many locations are provided. + + +Location Monitor Configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once a bank of location monitors has been allocated, the function +:c:func:`vme_lm_set` is provided to configure the location and mode of the +location monitor. The function :c:func:`vme_lm_get` can be used to retrieve +existing settings. + + +Location Monitor Use +~~~~~~~~~~~~~~~~~~~~ + +The function :c:func:`vme_lm_attach` enables a callback to be attached and +:c:func:`vme_lm_detach` allows on to be detached from each location monitor +location. Each location monitor can monitor a number of adjacent locations. The +callback function is declared as follows. + +.. code-block:: c + + void callback(void *data); + + +Slot Detection +-------------- + +The function :c:func:`vme_slot_num` returns the slot ID of the provided bridge. + + +Bus Detection +------------- + +The function :c:func:`vme_bus_num` returns the bus ID of the provided bridge. + + +VME API +------- + +.. kernel-doc:: drivers/staging/vme_user/vme.h + :internal: + +.. kernel-doc:: drivers/staging/vme_user/vme.c + :export: diff --git a/Documentation/driver-api/w1.rst b/Documentation/driver-api/w1.rst new file mode 100644 index 0000000000..bda3ad60f6 --- /dev/null +++ b/Documentation/driver-api/w1.rst @@ -0,0 +1,67 @@ +====================== +W1: Dallas' 1-wire bus +====================== + +:Author: David Fries + +W1 API internal to the kernel +============================= + +include/linux/w1.h +~~~~~~~~~~~~~~~~~~ + +W1 kernel API functions. + +.. kernel-doc:: include/linux/w1.h + :internal: + +drivers/w1/w1.c +~~~~~~~~~~~~~~~ + +W1 core functions. + +.. kernel-doc:: drivers/w1/w1.c + :internal: + +drivers/w1/w1_family.c +~~~~~~~~~~~~~~~~~~~~~~~ + +Allows registering device family operations. + +.. kernel-doc:: drivers/w1/w1_family.c + :export: + +drivers/w1/w1_internal.h +~~~~~~~~~~~~~~~~~~~~~~~~ + +W1 internal initialization for master devices. + +.. kernel-doc:: drivers/w1/w1_internal.h + :internal: + +drivers/w1/w1_int.c +~~~~~~~~~~~~~~~~~~~~ + +W1 internal initialization for master devices. + +.. kernel-doc:: drivers/w1/w1_int.c + :export: + +drivers/w1/w1_netlink.h +~~~~~~~~~~~~~~~~~~~~~~~~ + +W1 external netlink API structures and commands. + +.. kernel-doc:: drivers/w1/w1_netlink.h + :internal: + +drivers/w1/w1_io.c +~~~~~~~~~~~~~~~~~~~ + +W1 input/output. + +.. kernel-doc:: drivers/w1/w1_io.c + :export: + +.. kernel-doc:: drivers/w1/w1_io.c + :internal: diff --git a/Documentation/driver-api/wmi.rst b/Documentation/driver-api/wmi.rst new file mode 100644 index 0000000000..6ca58c8249 --- /dev/null +++ b/Documentation/driver-api/wmi.rst @@ -0,0 +1,21 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +============== +WMI Driver API +============== + +The WMI driver core supports a more modern bus-based interface for interacting +with WMI devices, and an older GUID-based interface. The latter interface is +considered to be deprecated, so new WMI drivers should generally avoid it since +it has some issues with multiple WMI devices and events sharing the same GUIDs +and/or notification IDs. The modern bus-based interface instead maps each +WMI device to a :c:type:`struct wmi_device <wmi_device>`, so it supports +WMI devices sharing GUIDs and/or notification IDs. Drivers can then register +a :c:type:`struct wmi_driver <wmi_driver>`, which will be bound to compatible +WMI devices by the driver core. + +.. kernel-doc:: include/linux/wmi.h + :internal: + +.. kernel-doc:: drivers/platform/x86/wmi.c + :export: diff --git a/Documentation/driver-api/xilinx/eemi.rst b/Documentation/driver-api/xilinx/eemi.rst new file mode 100644 index 0000000000..c1bc47b900 --- /dev/null +++ b/Documentation/driver-api/xilinx/eemi.rst @@ -0,0 +1,40 @@ +==================================== +Xilinx Zynq MPSoC EEMI Documentation +==================================== + +Xilinx Zynq MPSoC Firmware Interface +------------------------------------- +The zynqmp-firmware node describes the interface to platform firmware. +ZynqMP has an interface to communicate with secure firmware. Firmware +driver provides an interface to firmware APIs. Interface APIs can be +used by any driver to communicate with PMC(Platform Management Controller). + +Embedded Energy Management Interface (EEMI) +---------------------------------------------- +The embedded energy management interface is used to allow software +components running across different processing clusters on a chip or +device to communicate with a power management controller (PMC) on a +device to issue or respond to power management requests. + +Any driver who wants to communicate with PMC using EEMI APIs use the +functions provided for each function. + +IOCTL +------ +IOCTL API is for device control and configuration. It is not a system +IOCTL but it is an EEMI API. This API can be used by master to control +any device specific configuration. IOCTL definitions can be platform +specific. This API also manage shared device configuration. + +The following IOCTL IDs are valid for device control: +- IOCTL_SET_PLL_FRAC_MODE 8 +- IOCTL_GET_PLL_FRAC_MODE 9 +- IOCTL_SET_PLL_FRAC_DATA 10 +- IOCTL_GET_PLL_FRAC_DATA 11 + +Refer EEMI API guide [0] for IOCTL specific parameters and other EEMI APIs. + +References +---------- +[0] Embedded Energy Management Interface (EEMI) API guide: + https://www.xilinx.com/support/documentation/user_guides/ug1200-eemi-api.pdf diff --git a/Documentation/driver-api/xilinx/index.rst b/Documentation/driver-api/xilinx/index.rst new file mode 100644 index 0000000000..13f7589ed4 --- /dev/null +++ b/Documentation/driver-api/xilinx/index.rst @@ -0,0 +1,16 @@ + +=========== +Xilinx FPGA +=========== + +.. toctree:: + :maxdepth: 1 + + eemi + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/xillybus.rst b/Documentation/driver-api/xillybus.rst new file mode 100644 index 0000000000..a3ab832cb2 --- /dev/null +++ b/Documentation/driver-api/xillybus.rst @@ -0,0 +1,379 @@ +========================================== +Xillybus driver for generic FPGA interface +========================================== + +:Author: Eli Billauer, Xillybus Ltd. (http://xillybus.com) +:Email: eli.billauer@gmail.com or as advertised on Xillybus' site. + +.. Contents: + + - Introduction + -- Background + -- Xillybus Overview + + - Usage + -- User interface + -- Synchronization + -- Seekable pipes + + - Internals + -- Source code organization + -- Pipe attributes + -- Host never reads from the FPGA + -- Channels, pipes, and the message channel + -- Data streaming + -- Data granularity + -- Probing + -- Buffer allocation + -- The "nonempty" message (supporting poll) + + +Introduction +============ + +Background +---------- + +An FPGA (Field Programmable Gate Array) is a piece of logic hardware, which +can be programmed to become virtually anything that is usually found as a +dedicated chipset: For instance, a display adapter, network interface card, +or even a processor with its peripherals. FPGAs are the LEGO of hardware: +Based upon certain building blocks, you make your own toys the way you like +them. It's usually pointless to reimplement something that is already +available on the market as a chipset, so FPGAs are mostly used when some +special functionality is needed, and the production volume is relatively low +(hence not justifying the development of an ASIC). + +The challenge with FPGAs is that everything is implemented at a very low +level, even lower than assembly language. In order to allow FPGA designers to +focus on their specific project, and not reinvent the wheel over and over +again, pre-designed building blocks, IP cores, are often used. These are the +FPGA parallels of library functions. IP cores may implement certain +mathematical functions, a functional unit (e.g. a USB interface), an entire +processor (e.g. ARM) or anything that might come handy. Think of them as a +building block, with electrical wires dangling on the sides for connection to +other blocks. + +One of the daunting tasks in FPGA design is communicating with a fullblown +operating system (actually, with the processor running it): Implementing the +low-level bus protocol and the somewhat higher-level interface with the host +(registers, interrupts, DMA etc.) is a project in itself. When the FPGA's +function is a well-known one (e.g. a video adapter card, or a NIC), it can +make sense to design the FPGA's interface logic specifically for the project. +A special driver is then written to present the FPGA as a well-known interface +to the kernel and/or user space. In that case, there is no reason to treat the +FPGA differently than any device on the bus. + +It's however common that the desired data communication doesn't fit any well- +known peripheral function. Also, the effort of designing an elegant +abstraction for the data exchange is often considered too big. In those cases, +a quicker and possibly less elegant solution is sought: The driver is +effectively written as a user space program, leaving the kernel space part +with just elementary data transport. This still requires designing some +interface logic for the FPGA, and write a simple ad-hoc driver for the kernel. + +Xillybus Overview +----------------- + +Xillybus is an IP core and a Linux driver. Together, they form a kit for +elementary data transport between an FPGA and the host, providing pipe-like +data streams with a straightforward user interface. It's intended as a low- +effort solution for mixed FPGA-host projects, for which it makes sense to +have the project-specific part of the driver running in a user-space program. + +Since the communication requirements may vary significantly from one FPGA +project to another (the number of data pipes needed in each direction and +their attributes), there isn't one specific chunk of logic being the Xillybus +IP core. Rather, the IP core is configured and built based upon a +specification given by its end user. + +Xillybus presents independent data streams, which resemble pipes or TCP/IP +communication to the user. At the host side, a character device file is used +just like any pipe file. On the FPGA side, hardware FIFOs are used to stream +the data. This is contrary to a common method of communicating through fixed- +sized buffers (even though such buffers are used by Xillybus under the hood). +There may be more than a hundred of these streams on a single IP core, but +also no more than one, depending on the configuration. + +In order to ease the deployment of the Xillybus IP core, it contains a simple +data structure which completely defines the core's configuration. The Linux +driver fetches this data structure during its initialization process, and sets +up the DMA buffers and character devices accordingly. As a result, a single +driver is used to work out of the box with any Xillybus IP core. + +The data structure just mentioned should not be confused with PCI's +configuration space or the Flattened Device Tree. + +Usage +===== + +User interface +-------------- + +On the host, all interface with Xillybus is done through /dev/xillybus_* +device files, which are generated automatically as the drivers loads. The +names of these files depend on the IP core that is loaded in the FPGA (see +Probing below). To communicate with the FPGA, open the device file that +corresponds to the hardware FIFO you want to send data or receive data from, +and use plain write() or read() calls, just like with a regular pipe. In +particular, it makes perfect sense to go:: + + $ cat mydata > /dev/xillybus_thisfifo + + $ cat /dev/xillybus_thatfifo > hisdata + +possibly pressing CTRL-C as some stage, even though the xillybus_* pipes have +the capability to send an EOF (but may not use it). + +The driver and hardware are designed to behave sensibly as pipes, including: + +* Supporting non-blocking I/O (by setting O_NONBLOCK on open() ). + +* Supporting poll() and select(). + +* Being bandwidth efficient under load (using DMA) but also handle small + pieces of data sent across (like TCP/IP) by autoflushing. + +A device file can be read only, write only or bidirectional. Bidirectional +device files are treated like two independent pipes (except for sharing a +"channel" structure in the implementation code). + +Synchronization +--------------- + +Xillybus pipes are configured (on the IP core) to be either synchronous or +asynchronous. For a synchronous pipe, write() returns successfully only after +some data has been submitted and acknowledged by the FPGA. This slows down +bulk data transfers, and is nearly impossible for use with streams that +require data at a constant rate: There is no data transmitted to the FPGA +between write() calls, in particular when the process loses the CPU. + +When a pipe is configured asynchronous, write() returns if there was enough +room in the buffers to store any of the data in the buffers. + +For FPGA to host pipes, asynchronous pipes allow data transfer from the FPGA +as soon as the respective device file is opened, regardless of if the data +has been requested by a read() call. On synchronous pipes, only the amount +of data requested by a read() call is transmitted. + +In summary, for synchronous pipes, data between the host and FPGA is +transmitted only to satisfy the read() or write() call currently handled +by the driver, and those calls wait for the transmission to complete before +returning. + +Note that the synchronization attribute has nothing to do with the possibility +that read() or write() completes less bytes than requested. There is a +separate configuration flag ("allowpartial") that determines whether such a +partial completion is allowed. + +Seekable pipes +-------------- + +A synchronous pipe can be configured to have the stream's position exposed +to the user logic at the FPGA. Such a pipe is also seekable on the host API. +With this feature, a memory or register interface can be attached on the +FPGA side to the seekable stream. Reading or writing to a certain address in +the attached memory is done by seeking to the desired address, and calling +read() or write() as required. + + +Internals +========= + +Source code organization +------------------------ + +The Xillybus driver consists of a core module, xillybus_core.c, and modules +that depend on the specific bus interface (xillybus_of.c and xillybus_pcie.c). + +The bus specific modules are those probed when a suitable device is found by +the kernel. Since the DMA mapping and synchronization functions, which are bus +dependent by their nature, are used by the core module, a +xilly_endpoint_hardware structure is passed to the core module on +initialization. This structure is populated with pointers to wrapper functions +which execute the DMA-related operations on the bus. + +Pipe attributes +--------------- + +Each pipe has a number of attributes which are set when the FPGA component +(IP core) is built. They are fetched from the IDT (the data structure which +defines the core's configuration, see Probing below) by xilly_setupchannels() +in xillybus_core.c as follows: + +* is_writebuf: The pipe's direction. A non-zero value means it's an FPGA to + host pipe (the FPGA "writes"). + +* channelnum: The pipe's identification number in communication between the + host and FPGA. + +* format: The underlying data width. See Data Granularity below. + +* allowpartial: A non-zero value means that a read() or write() (whichever + applies) may return with less than the requested number of bytes. The common + choice is a non-zero value, to match standard UNIX behavior. + +* synchronous: A non-zero value means that the pipe is synchronous. See + Synchronization above. + +* bufsize: Each DMA buffer's size. Always a power of two. + +* bufnum: The number of buffers allocated for this pipe. Always a power of two. + +* exclusive_open: A non-zero value forces exclusive opening of the associated + device file. If the device file is bidirectional, and already opened only in + one direction, the opposite direction may be opened once. + +* seekable: A non-zero value indicates that the pipe is seekable. See + Seekable pipes above. + +* supports_nonempty: A non-zero value (which is typical) indicates that the + hardware will send the messages that are necessary to support select() and + poll() for this pipe. + +Host never reads from the FPGA +------------------------------ + +Even though PCI Express is hotpluggable in general, a typical motherboard +doesn't expect a card to go away all of the sudden. But since the PCIe card +is based upon reprogrammable logic, a sudden disappearance from the bus is +quite likely as a result of an accidental reprogramming of the FPGA while the +host is up. In practice, nothing happens immediately in such a situation. But +if the host attempts to read from an address that is mapped to the PCI Express +device, that leads to an immediate freeze of the system on some motherboards, +even though the PCIe standard requires a graceful recovery. + +In order to avoid these freezes, the Xillybus driver refrains completely from +reading from the device's register space. All communication from the FPGA to +the host is done through DMA. In particular, the Interrupt Service Routine +doesn't follow the common practice of checking a status register when it's +invoked. Rather, the FPGA prepares a small buffer which contains short +messages, which inform the host what the interrupt was about. + +This mechanism is used on non-PCIe buses as well for the sake of uniformity. + + +Channels, pipes, and the message channel +---------------------------------------- + +Each of the (possibly bidirectional) pipes presented to the user is allocated +a data channel between the FPGA and the host. The distinction between channels +and pipes is necessary only because of channel 0, which is used for interrupt- +related messages from the FPGA, and has no pipe attached to it. + +Data streaming +-------------- + +Even though a non-segmented data stream is presented to the user at both +sides, the implementation relies on a set of DMA buffers which is allocated +for each channel. For the sake of illustration, let's take the FPGA to host +direction: As data streams into the respective channel's interface in the +FPGA, the Xillybus IP core writes it to one of the DMA buffers. When the +buffer is full, the FPGA informs the host about that (appending a +XILLYMSG_OPCODE_RELEASEBUF message channel 0 and sending an interrupt if +necessary). The host responds by making the data available for reading through +the character device. When all data has been read, the host writes on the +FPGA's buffer control register, allowing the buffer's overwriting. Flow +control mechanisms exist on both sides to prevent underflows and overflows. + +This is not good enough for creating a TCP/IP-like stream: If the data flow +stops momentarily before a DMA buffer is filled, the intuitive expectation is +that the partial data in buffer will arrive anyhow, despite the buffer not +being completed. This is implemented by adding a field in the +XILLYMSG_OPCODE_RELEASEBUF message, through which the FPGA informs not just +which buffer is submitted, but how much data it contains. + +But the FPGA will submit a partially filled buffer only if directed to do so +by the host. This situation occurs when the read() method has been blocking +for XILLY_RX_TIMEOUT jiffies (currently 10 ms), after which the host commands +the FPGA to submit a DMA buffer as soon as it can. This timeout mechanism +balances between bus bandwidth efficiency (preventing a lot of partially +filled buffers being sent) and a latency held fairly low for tails of data. + +A similar setting is used in the host to FPGA direction. The handling of +partial DMA buffers is somewhat different, though. The user can tell the +driver to submit all data it has in the buffers to the FPGA, by issuing a +write() with the byte count set to zero. This is similar to a flush request, +but it doesn't block. There is also an autoflushing mechanism, which triggers +an equivalent flush roughly XILLY_RX_TIMEOUT jiffies after the last write(). +This allows the user to be oblivious about the underlying buffering mechanism +and yet enjoy a stream-like interface. + +Note that the issue of partial buffer flushing is irrelevant for pipes having +the "synchronous" attribute nonzero, since synchronous pipes don't allow data +to lay around in the DMA buffers between read() and write() anyhow. + +Data granularity +---------------- + +The data arrives or is sent at the FPGA as 8, 16 or 32 bit wide words, as +configured by the "format" attribute. Whenever possible, the driver attempts +to hide this when the pipe is accessed differently from its natural alignment. +For example, reading single bytes from a pipe with 32 bit granularity works +with no issues. Writing single bytes to pipes with 16 or 32 bit granularity +will also work, but the driver can't send partially completed words to the +FPGA, so the transmission of up to one word may be held until it's fully +occupied with user data. + +This somewhat complicates the handling of host to FPGA streams, because +when a buffer is flushed, it may contain up to 3 bytes don't form a word in +the FPGA, and hence can't be sent. To prevent loss of data, these leftover +bytes need to be moved to the next buffer. The parts in xillybus_core.c +that mention "leftovers" in some way are related to this complication. + +Probing +------- + +As mentioned earlier, the number of pipes that are created when the driver +loads and their attributes depend on the Xillybus IP core in the FPGA. During +the driver's initialization, a blob containing configuration info, the +Interface Description Table (IDT), is sent from the FPGA to the host. The +bootstrap process is done in three phases: + +1. Acquire the length of the IDT, so a buffer can be allocated for it. This + is done by sending a quiesce command to the device, since the acknowledge + for this command contains the IDT's buffer length. + +2. Acquire the IDT itself. + +3. Create the interfaces according to the IDT. + +Buffer allocation +----------------- + +In order to simplify the logic that prevents illegal boundary crossings of +PCIe packets, the following rule applies: If a buffer is smaller than 4kB, +it must not cross a 4kB boundary. Otherwise, it must be 4kB aligned. The +xilly_setupchannels() functions allocates these buffers by requesting whole +pages from the kernel, and diving them into DMA buffers as necessary. Since +all buffers' sizes are powers of two, it's possible to pack any set of such +buffers, with a maximal waste of one page of memory. + +All buffers are allocated when the driver is loaded. This is necessary, +since large continuous physical memory segments are sometimes requested, +which are more likely to be available when the system is freshly booted. + +The allocation of buffer memory takes place in the same order they appear in +the IDT. The driver relies on a rule that the pipes are sorted with decreasing +buffer size in the IDT. If a requested buffer is larger or equal to a page, +the necessary number of pages is requested from the kernel, and these are +used for this buffer. If the requested buffer is smaller than a page, one +single page is requested from the kernel, and that page is partially used. +Or, if there already is a partially used page at hand, the buffer is packed +into that page. It can be shown that all pages requested from the kernel +(except possibly for the last) are 100% utilized this way. + +The "nonempty" message (supporting poll) +---------------------------------------- + +In order to support the "poll" method (and hence select() ), there is a small +catch regarding the FPGA to host direction: The FPGA may have filled a DMA +buffer with some data, but not submitted that buffer. If the host waited for +the buffer's submission by the FPGA, there would be a possibility that the +FPGA side has sent data, but a select() call would still block, because the +host has not received any notification about this. This is solved with +XILLYMSG_OPCODE_NONEMPTY messages sent by the FPGA when a channel goes from +completely empty to containing some data. + +These messages are used only to support poll() and select(). The IP core can +be configured not to send them for a slight reduction of bandwidth. diff --git a/Documentation/driver-api/zorro.rst b/Documentation/driver-api/zorro.rst new file mode 100644 index 0000000000..664072b017 --- /dev/null +++ b/Documentation/driver-api/zorro.rst @@ -0,0 +1,104 @@ +======================================== +Writing Device Drivers for Zorro Devices +======================================== + +:Author: Written by Geert Uytterhoeven <geert@linux-m68k.org> +:Last revised: September 5, 2003 + + +Introduction +------------ + +The Zorro bus is the bus used in the Amiga family of computers. Thanks to +AutoConfig(tm), it's 100% Plug-and-Play. + +There are two types of Zorro buses, Zorro II and Zorro III: + + - The Zorro II address space is 24-bit and lies within the first 16 MB of the + Amiga's address map. + + - Zorro III is a 32-bit extension of Zorro II, which is backwards compatible + with Zorro II. The Zorro III address space lies outside the first 16 MB. + + +Probing for Zorro Devices +------------------------- + +Zorro devices are found by calling ``zorro_find_device()``, which returns a +pointer to the ``next`` Zorro device with the specified Zorro ID. A probe loop +for the board with Zorro ID ``ZORRO_PROD_xxx`` looks like:: + + struct zorro_dev *z = NULL; + + while ((z = zorro_find_device(ZORRO_PROD_xxx, z))) { + if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE, + "My explanation")) + ... + } + +``ZORRO_WILDCARD`` acts as a wildcard and finds any Zorro device. If your driver +supports different types of boards, you can use a construct like:: + + struct zorro_dev *z = NULL; + + while ((z = zorro_find_device(ZORRO_WILDCARD, z))) { + if (z->id != ZORRO_PROD_xxx1 && z->id != ZORRO_PROD_xxx2 && ...) + continue; + if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE, + "My explanation")) + ... + } + + +Zorro Resources +--------------- + +Before you can access a Zorro device's registers, you have to make sure it's +not yet in use. This is done using the I/O memory space resource management +functions:: + + request_mem_region() + release_mem_region() + +Shortcuts to claim the whole device's address space are provided as well:: + + zorro_request_device + zorro_release_device + + +Accessing the Zorro Address Space +--------------------------------- + +The address regions in the Zorro device resources are Zorro bus address +regions. Due to the identity bus-physical address mapping on the Zorro bus, +they are CPU physical addresses as well. + +The treatment of these regions depends on the type of Zorro space: + + - Zorro II address space is always mapped and does not have to be mapped + explicitly using z_ioremap(). + + Conversion from bus/physical Zorro II addresses to kernel virtual addresses + and vice versa is done using:: + + virt_addr = ZTWO_VADDR(bus_addr); + bus_addr = ZTWO_PADDR(virt_addr); + + - Zorro III address space must be mapped explicitly using z_ioremap() first + before it can be accessed:: + + virt_addr = z_ioremap(bus_addr, size); + ... + z_iounmap(virt_addr); + + +References +---------- + +#. linux/include/linux/zorro.h +#. linux/include/uapi/linux/zorro.h +#. linux/include/uapi/linux/zorro_ids.h +#. linux/arch/m68k/include/asm/zorro.h +#. linux/drivers/zorro +#. /proc/bus/zorro + |