diff options
Diffstat (limited to 'modules.d/95nvmf')
-rw-r--r-- | modules.d/95nvmf/95-nvmf-initqueue.rules | 10 | ||||
-rwxr-xr-x | modules.d/95nvmf/module-setup.sh | 148 | ||||
-rwxr-xr-x | modules.d/95nvmf/nbftroot.sh | 5 | ||||
-rwxr-xr-x | modules.d/95nvmf/nvmf-autoconnect.sh | 54 | ||||
-rwxr-xr-x | modules.d/95nvmf/parse-nvmf-boot-connections.sh | 326 |
5 files changed, 543 insertions, 0 deletions
diff --git a/modules.d/95nvmf/95-nvmf-initqueue.rules b/modules.d/95nvmf/95-nvmf-initqueue.rules new file mode 100644 index 0000000..d26d7b0 --- /dev/null +++ b/modules.d/95nvmf/95-nvmf-initqueue.rules @@ -0,0 +1,10 @@ +# +# nvmf-initqueue.rules +# +# D-Bus doesn't run in the initrd, which means that we cannot use our +# usual trick of starting custom systemd services. +# So use a rule to create initqueue entries instead. + +ACTION=="change", SUBSYSTEM=="fc", ENV{FC_EVENT}=="nvmediscovery", \ + ENV{NVMEFC_HOST_TRADDR}=="*", ENV{NVMEFC_TRADDR}=="*", \ + RUN+="/sbin/initqueue --onetime --unique --name nvmf-connect-$env{NVMEFC_TRADDR}-$env{NVMEFC_HOST_TRADDR} /usr/sbin/nvme connect-all --transport=fc --traddr=$env{NVMEFC_TRADDR} --host-traddr=$env{NVMEFC_HOST_TRADDR}" diff --git a/modules.d/95nvmf/module-setup.sh b/modules.d/95nvmf/module-setup.sh new file mode 100755 index 0000000..a8f3034 --- /dev/null +++ b/modules.d/95nvmf/module-setup.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +# called by dracut +check() { + require_binaries nvme jq || return 1 + [ -f /etc/nvme/hostnqn ] || return 255 + [ -f /etc/nvme/hostid ] || return 255 + + is_nvmf() { + local _dev=$1 + local trtype + + [[ -L "/sys/dev/block/$_dev" ]] || return 0 + cd -P "/sys/dev/block/$_dev" || return 0 + if [ -f partition ]; then + cd .. + fi + for d in device/nvme*; do + [ -L "$d" ] || continue + if readlink "$d" | grep -q nvme-fabrics; then + read -r trtype < "$d"/transport + break + fi + done + [[ $trtype == "fc" ]] || [[ $trtype == "tcp" ]] || [[ $trtype == "rdma" ]] + } + + has_nbft() { + local f found= + for f in /sys/firmware/acpi/tables/NBFT*; do + [ -f "$f" ] || continue + found=1 + break + done + [[ $found ]] + } + + [[ $hostonly ]] || [[ $mount_needs ]] && { + pushd . > /dev/null + for_each_host_dev_and_slaves is_nvmf + local _is_nvmf=$? + popd > /dev/null || exit + [[ $_is_nvmf == 0 ]] || return 255 + if [ ! -f /sys/class/fc/fc_udev_device/nvme_discovery ] \ + && [ ! -f /etc/nvme/discovery.conf ] \ + && [ ! -f /etc/nvme/config.json ] && ! has_nbft; then + echo "No discovery arguments present" + return 255 + fi + } + return 0 +} + +# called by dracut +depends() { + echo bash rootfs-block network + return 0 +} + +# called by dracut +installkernel() { + instmods nvme_fc lpfc qla2xxx + hostonly="" instmods nvme_tcp nvme_fabrics 8021q +} + +# called by dracut +cmdline() { + local _hostnqn + local _hostid + + gen_nvmf_cmdline() { + local _dev=$1 + local trtype + local traddr + local host_traddr + local trsvcid + local _address + local -a _address_parts + + [[ -L "/sys/dev/block/$_dev" ]] || return 0 + cd -P "/sys/dev/block/$_dev" || return 0 + if [ -f partition ]; then + cd .. + fi + for d in device/nvme*; do + [ -L "$d" ] || continue + if readlink "$d" | grep -q nvme-fabrics; then + read -r trtype < "$d"/transport + break + fi + done + + [ -z "$trtype" ] && return 0 + nvme list-subsys "${PWD##*/}" | while read -r _ _ trtype _address _; do + [[ -z $trtype || $trtype != "${trtype#NQN}" ]] && continue + unset traddr + unset host_traddr + unset trsvcid + mapfile -t -d ',' _address_parts < <(printf "%s" "$_address") + for i in "${_address_parts[@]}"; do + [[ $i =~ ^traddr= ]] && traddr="${i#traddr=}" + [[ $i =~ ^host_traddr= ]] && host_traddr="${i#host_traddr=}" + [[ $i =~ ^trsvcid= ]] && trsvcid="${i#trsvcid=}" + done + [[ -z $traddr && -z $host_traddr && -z $trsvcid ]] && continue + echo -n " rd.nvmf.discover=$trtype,$traddr,$host_traddr,$trsvcid" + done + } + + if [ -f /etc/nvme/hostnqn ]; then + read -r _hostnqn < /etc/nvme/hostnqn + echo -n " rd.nvmf.hostnqn=${_hostnqn}" + fi + if [ -f /etc/nvme/hostid ]; then + read -r _hostid < /etc/nvme/hostid + echo -n " rd.nvmf.hostid=${_hostid}" + fi + + [[ $hostonly ]] || [[ $mount_needs ]] && { + pushd . > /dev/null + for_each_host_dev_and_slaves gen_nvmf_cmdline + popd > /dev/null || exit + } +} + +# called by dracut +install() { + if [[ $hostonly_cmdline == "yes" ]]; then + local _nvmf_args + _nvmf_args=$(cmdline) + [[ "$_nvmf_args" ]] && printf "%s" "$_nvmf_args" >> "${initdir}/etc/cmdline.d/95nvmf-args.conf" + fi + inst_simple "/etc/nvme/hostnqn" + inst_simple "/etc/nvme/hostid" + + inst_multiple ip sed + + inst_script "${moddir}/nvmf-autoconnect.sh" /sbin/nvmf-autoconnect.sh + inst_script "${moddir}/nbftroot.sh" /sbin/nbftroot + + inst_multiple nvme jq + inst_hook cmdline 92 "$moddir/parse-nvmf-boot-connections.sh" + inst_simple "/etc/nvme/discovery.conf" + inst_simple "/etc/nvme/config.json" + inst_rules /usr/lib/udev/rules.d/71-nvmf-iopolicy-netapp.rules + inst_rules "$moddir/95-nvmf-initqueue.rules" + dracut_need_initqueue +} diff --git a/modules.d/95nvmf/nbftroot.sh b/modules.d/95nvmf/nbftroot.sh new file mode 100755 index 0000000..0f33499 --- /dev/null +++ b/modules.d/95nvmf/nbftroot.sh @@ -0,0 +1,5 @@ +#! /bin/sh +# This script is called from /sbin/netroot + +/sbin/nvmf-autoconnect.sh online +exit 0 diff --git a/modules.d/95nvmf/nvmf-autoconnect.sh b/modules.d/95nvmf/nvmf-autoconnect.sh new file mode 100755 index 0000000..35ee948 --- /dev/null +++ b/modules.d/95nvmf/nvmf-autoconnect.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# Argument $1 is "settled", "online", or "timeout", indicating +# the queue from which the script is called. +# In the "timeout" case, try everything. +# Otherwise, try options according to the priorities below. + +[ "$RD_DEBUG" != yes ] || set -x + +if [ "$1" = timeout ]; then + [ ! -f /sys/class/fc/fc_udev_device/nvme_discovery ] \ + || echo add > /sys/class/fc/fc_udev_device/nvme_discovery + /usr/sbin/nvme connect-all + exit 0 +fi + +NVMF_HOSTNQN_OK= +[ ! -f "/etc/nvme/hostnqn" ] || [ ! -f "/etc/nvme/hostid" ] || NVMF_HOSTNQN_OK=1 + +# Only nvme-cli 2.5 or newer supports the options --nbft and --no-nbft +# for the connect-all command. +# Make sure we don't use unsupported options with earlier versions. +NBFT_SUPPORTED= +# shellcheck disable=SC2016 +/usr/sbin/nvme connect-all --help 2>&1 | sed -n '/[[:space:]]--nbft[[:space:]]/q1;$q0' \ + || NBFT_SUPPORTED=1 + +if [ -e /tmp/nvmf-fc-auto ] && [ "$NVMF_HOSTNQN_OK" ] \ + && [ -f /sys/class/fc/fc_udev_device/nvme_discovery ]; then + # prio 1: cmdline override "rd.nvmf.discovery=fc,auto" + echo add > /sys/class/fc/fc_udev_device/nvme_discovery + exit 0 +fi +if [ "$NBFT_SUPPORTED" ] && [ -e /tmp/valid_nbft_entry_found ]; then + # prio 2: NBFT + /usr/sbin/nvme connect-all --nbft + exit 0 +fi +if [ -f /etc/nvme/discovery.conf ] || [ -f /etc/nvme/config.json ] \ + && [ "$NVMF_HOSTNQN_OK" ]; then + # prio 3: configuration from initrd and/or kernel command line + # We can get here even if "rd.nvmf.nonbft" was given, thus use --no-nbft + if [ "$NBFT_SUPPORTED" ]; then + /usr/sbin/nvme connect-all --no-nbft + else + /usr/sbin/nvme connect-all + fi + exit 0 +fi +if [ "$NVMF_HOSTNQN_OK" ] \ + && [ -f /sys/class/fc/fc_udev_device/nvme_discovery ]; then + # prio 4: no discovery entries, try NVMeoFC autoconnect + echo add > /sys/class/fc/fc_udev_device/nvme_discovery +fi +exit 0 diff --git a/modules.d/95nvmf/parse-nvmf-boot-connections.sh b/modules.d/95nvmf/parse-nvmf-boot-connections.sh new file mode 100755 index 0000000..6601837 --- /dev/null +++ b/modules.d/95nvmf/parse-nvmf-boot-connections.sh @@ -0,0 +1,326 @@ +#!/bin/sh +# +# Supported formats: +# rd.nvmf.hostnqn=<hostnqn> +# rd.nvmf.hostid=<hostid> +# rd.nvmf.discover=<transport>,<traddr>,<host-traddr>,<trsvcid> +# +# Examples: +# rd.nvmf.hostnqn=nqn.2014-08.org.nvmexpress:uuid:37303738-3034-584d-5137-333230423843 +# rd.nvmf.discover=rdma,192.168.1.3,,4420 +# rd.nvmf.discover=tcp,192.168.1.3,,4420 +# rd.nvmf.discover=tcp,192.168.1.3 +# rd.nvmf.discover=fc,nn-0x200400a098d85236:pn-0x201400a098d85236,nn-0x200000109b7db455:pn-0x100000109b7db455 +# rd.nvmf.discover=fc,auto +# +# Note: FC does autodiscovery, so typically there is no need to +# specify any discover parameters for FC. +# + +command -v getarg > /dev/null || . /lib/dracut-lib.sh +command -v is_ip > /dev/null || . /lib/net-lib.sh + +## Sample NBFT output from nvme show-nbft -H -s -d -o json +# [ +# { +# "filename":"/sys/firmware/acpi/tables/NBFT", +# "host":{ +# "nqn":"nqn.2014-08.org.nvmexpress:uuid:d6f07002-7eb5-4841-a185-400e296afae4", +# "id":"111919da-21ea-cc4e-bafe-216d8372dd31", +# "host_id_configured":0, +# "host_nqn_configured":0, +# "primary_admin_host_flag":"not indicated" +# }, +# "subsystem":[ +# { +# "index":1, +# "num_hfis":1, +# "hfis":[ +# 1 +# ], +# "transport":"tcp", +# "transport_address":"192.168.100.216", +# "transport_svcid":"4420", +# "subsys_port_id":0, +# "nsid":1, +# "nid_type":"uuid", +# "nid":"424d1c8a-8ef9-4681-b2fc-8c343bd8fa69", +# "subsys_nqn":"timberland-01", +# "controller_id":0, +# "asqsz":0, +# "pdu_header_digest_required":0, +# "data_digest_required":0 +# } +# ], +# "hfi":[ +# { +# "index":1, +# "transport":"tcp", +# "pcidev":"0:0:2.0", +# "mac_addr":"52:54:00:4f:97:e9", +# "vlan":0, +# "ip_origin":63, +# "ipaddr":"192.168.100.217", +# "subnet_mask_prefix":24, +# "gateway_ipaddr":"0.0.0.0", +# "route_metric":0, +# "primary_dns_ipaddr":"0.0.0.0", +# "secondary_dns_ipaddr":"0.0.0.0", +# "dhcp_server_ipaddr":"", +# "this_hfi_is_default_route":1 +# } +# ], +# "discovery":[ +# ] +# } +# ] +# +# If the IP address is derived from DHCP, it sets the field +# "hfi.dhcp_server_ipaddr" to a non-emtpy value. +# +# + +nbft_run_jq() { + local st + local opts="-e" + + while [ $# -gt 0 ]; do + case $1 in + -*) + opts="$opts $1" + ;; + *) + break + ;; + esac + shift + done + # Not quoting is intentional here. We won't get glob expressions passed. + # shellcheck disable=SC2086 + jq $opts "$1" << EOF +$2 +EOF + st=$? + if [ $st -ne 0 ]; then + warn "NBFT: jq error while processing \"$1\"" + return $st + else + return 0 + fi +} + +nbft_check_empty_address() { + # suppress meaningless or empty IP addresses + # "null" is returned by jq if no match found for expression + case $1 in + null | "::" | "0.0.0.0") ;; + *) + echo "$1" + ;; + esac +} + +nbft_parse_hfi() { + # false positive of shellcheck - no expansion in variable assignments + # shellcheck disable=2086 + local hfi_json=$1 + local mac iface ipaddr prefix vlan gateway dns1 dns2 hostname adrfam dhcp + + mac=$(nbft_run_jq -r .mac_addr "$hfi_json") || return 1 + iface=$(set_ifname nbft "$mac") + + vlan=$(nbft_run_jq .vlan "$hfi_json") || vlan=0 + # treat VLAN zero as "no vlan" + [ "$vlan" -ne 0 ] || vlan= + + [ ! -e /tmp/net."${iface}${vlan:+.$vlan}".has_ibft_config ] || return 0 + + dhcp=$(nbft_run_jq -r .dhcp_server_ipaddr "$hfi_json") + # We need to check $? here as the above is an assignment + # shellcheck disable=2181 + if [ $? -eq 0 ] && [ "$dhcp" ] && [ "$dhcp" != null ]; then + case $dhcp in + *:*) + echo ip="$iface${vlan:+.$vlan}:dhcp6" + ;; + *.*.*.*) + echo ip="$iface${vlan:+.$vlan}:dhcp" + ;; + *) + warn "Invalid value for dhcp_server_ipaddr: $dhcp" + return 1 + ;; + esac + else + ipaddr=$(nbft_run_jq -r .ipaddr "$hfi_json") || return 1 + + case $ipaddr in + *.*.*.*) + adrfam=ipv4 + ;; + *:*) + adrfam=ipv6 + ;; + *) + warn "invalid address: $ipaddr" + return 1 + ;; + esac + prefix=$(nbft_run_jq -r .subnet_mask_prefix "$hfi_json") + # Need to check $? here as he above is an assignment + # shellcheck disable=2181 + if [ $? -ne 0 ] && [ "$adrfam" = ipv6 ]; then + prefix=128 + fi + # Use brackets for IPv6 + if [ "$adrfam" = ipv6 ]; then + ipaddr="[$ipaddr]" + fi + + gateway=$(nbft_check_empty_address \ + "$(nbft_run_jq -r .gateway_ipaddr "$hfi_json")") + dns1=$(nbft_check_empty_address \ + "$(nbft_run_jq -r .primary_dns_ipaddr "$hfi_json")") + dns2=$(nbft_check_empty_address \ + "$(nbft_run_jq -r .secondary_dns_ipaddr "$hfi_json")") + hostname=$(nbft_run_jq -r .host_name "$hfi_json" 2> /dev/null) || hostname= + + echo "ip=$ipaddr::$gateway:$prefix:$hostname:$iface${vlan:+.$vlan}:none${dns1:+:$dns1}${dns2:+:$dns2}" + fi + + if [ "$vlan" ]; then + echo "vlan=$iface.$vlan:$iface" + echo "$mac" > "/tmp/net.$iface.$vlan.has_ibft_config" + else + echo "$mac" > "/tmp/net.$iface.has_ibft_config" + fi + : > /tmp/valid_nbft_entry_found +} + +nbft_parse() { + local nbft_json n_nbft all_hfi_json n_hfi + local j=0 i + + nbft_json=$(nvme nbft show -H -o json) || return 0 + n_nbft=$(nbft_run_jq ". | length" "$nbft_json") || return 0 + + while [ "$j" -lt "$n_nbft" ]; do + all_hfi_json=$(nbft_run_jq ".[$j].hfi" "$nbft_json") || continue + n_hfi=$(nbft_run_jq ". | length" "$all_hfi_json") || continue + i=0 + + while [ "$i" -lt "$n_hfi" ]; do + nbft_parse_hfi "$(nbft_run_jq ".[$i]" "$all_hfi_json")" + i=$((i + 1)) + done + j=$((j + 1)) + done >> /etc/cmdline.d/40-nbft.conf +} + +if getargbool 0 rd.nonvmf; then + warn "rd.nonvmf=0: skipping nvmf" + return 0 +fi + +if getargbool 0 rd.nvmf.nostatic; then + rm -f /etc/cmdline.d/95nvmf-args.conf + rm -f /etc/nvme/discovery.conf /etc/nvme/config.json +fi + +if ! getargbool 0 rd.nvmf.nonbft; then + for _x in /sys/firmware/acpi/tables/NBFT*; do + if [ -f "$_x" ]; then + nbft_parse + break + fi + done +fi + +initqueue --onetime modprobe --all -b -q nvme_tcp nvme_core nvme_fabrics + +parse_nvmf_discover() { + traddr="none" + trtype="none" + hosttraddr="none" + trsvcid=4420 + OLDIFS="$IFS" + IFS=, + # shellcheck disable=SC2086 + set -- $1 + IFS="$OLDIFS" + + case $# in + 2) + [ -n "$1" ] && trtype=$1 + [ -n "$2" ] && traddr=$2 + ;; + 3) + [ -n "$1" ] && trtype=$1 + [ -n "$2" ] && traddr=$2 + [ -n "$3" ] && hosttraddr=$3 + ;; + 4) + [ -n "$1" ] && trtype=$1 + [ -n "$2" ] && traddr=$2 + [ -n "$3" ] && hosttraddr=$3 + [ -n "$4" ] && trsvcid=$4 + ;; + *) + warn "Invalid arguments for rd.nvmf.discover=$1" + return 0 + ;; + esac + if [ "$traddr" = "none" ]; then + warn "traddr is mandatory for $trtype" + return 0 + fi + if [ "$trtype" = "tcp" ]; then + : > /tmp/nvmf_needs_network + elif [ "$trtype" = "fc" ]; then + if [ "$traddr" = "auto" ]; then + rm -f /etc/nvme/discovery.conf /etc/nvme/config.json + return 1 + fi + if [ "$hosttraddr" = "none" ]; then + warn "host traddr is mandatory for fc" + return 0 + fi + elif [ "$trtype" != "rdma" ]; then + warn "unsupported transport $trtype" + return 0 + fi + if [ "$trtype" = "fc" ]; then + echo "--transport=$trtype --traddr=$traddr --host-traddr=$hosttraddr" >> /etc/nvme/discovery.conf + else + echo "--transport=$trtype --traddr=$traddr --host-traddr=$hosttraddr --trsvcid=$trsvcid" >> /etc/nvme/discovery.conf + fi + return 0 +} + +nvmf_hostnqn=$(getarg rd.nvmf.hostnqn -d nvmf.hostnqn=) +if [ -n "$nvmf_hostnqn" ]; then + echo "$nvmf_hostnqn" > /etc/nvme/hostnqn +fi +nvmf_hostid=$(getarg rd.nvmf.hostid -d nvmf.hostid=) +if [ -n "$nvmf_hostid" ]; then + echo "$nvmf_hostid" > /etc/nvme/hostid +fi + +rm -f /tmp/nvmf-fc-auto +for d in $(getargs rd.nvmf.discover -d nvmf.discover=); do + parse_nvmf_discover "$d" || { + : > /tmp/nvmf-fc-auto + break + } +done + +if [ -e /tmp/nvmf_needs_network ] || [ -e /tmp/valid_nbft_entry_found ]; then + echo "rd.neednet=1" > /etc/cmdline.d/nvmf-neednet.conf + # netroot is a global variable that is present in all "sourced" scripts + # shellcheck disable=SC2034 + netroot=nbft + rm -f /tmp/nvmf_needs_network +fi + +/sbin/initqueue --settled --onetime --name nvmf-connect-settled /sbin/nvmf-autoconnect.sh settled +/sbin/initqueue --timeout --onetime --name nvmf-connect-timeout /sbin/nvmf-autoconnect.sh timeout |