summaryrefslogtreecommitdiffstats
path: root/modules.d/95nvmf
diff options
context:
space:
mode:
Diffstat (limited to 'modules.d/95nvmf')
-rw-r--r--modules.d/95nvmf/95-nvmf-initqueue.rules10
-rwxr-xr-xmodules.d/95nvmf/module-setup.sh148
-rwxr-xr-xmodules.d/95nvmf/nbftroot.sh5
-rwxr-xr-xmodules.d/95nvmf/nvmf-autoconnect.sh54
-rwxr-xr-xmodules.d/95nvmf/parse-nvmf-boot-connections.sh326
5 files changed, 543 insertions, 0 deletions
diff --git a/modules.d/95nvmf/95-nvmf-initqueue.rules b/modules.d/95nvmf/95-nvmf-initqueue.rules
new file mode 100644
index 0000000..d26d7b0
--- /dev/null
+++ b/modules.d/95nvmf/95-nvmf-initqueue.rules
@@ -0,0 +1,10 @@
+#
+# nvmf-initqueue.rules
+#
+# D-Bus doesn't run in the initrd, which means that we cannot use our
+# usual trick of starting custom systemd services.
+# So use a rule to create initqueue entries instead.
+
+ACTION=="change", SUBSYSTEM=="fc", ENV{FC_EVENT}=="nvmediscovery", \
+ ENV{NVMEFC_HOST_TRADDR}=="*", ENV{NVMEFC_TRADDR}=="*", \
+ RUN+="/sbin/initqueue --onetime --unique --name nvmf-connect-$env{NVMEFC_TRADDR}-$env{NVMEFC_HOST_TRADDR} /usr/sbin/nvme connect-all --transport=fc --traddr=$env{NVMEFC_TRADDR} --host-traddr=$env{NVMEFC_HOST_TRADDR}"
diff --git a/modules.d/95nvmf/module-setup.sh b/modules.d/95nvmf/module-setup.sh
new file mode 100755
index 0000000..a8f3034
--- /dev/null
+++ b/modules.d/95nvmf/module-setup.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+# called by dracut
+check() {
+ require_binaries nvme jq || return 1
+ [ -f /etc/nvme/hostnqn ] || return 255
+ [ -f /etc/nvme/hostid ] || return 255
+
+ is_nvmf() {
+ local _dev=$1
+ local trtype
+
+ [[ -L "/sys/dev/block/$_dev" ]] || return 0
+ cd -P "/sys/dev/block/$_dev" || return 0
+ if [ -f partition ]; then
+ cd ..
+ fi
+ for d in device/nvme*; do
+ [ -L "$d" ] || continue
+ if readlink "$d" | grep -q nvme-fabrics; then
+ read -r trtype < "$d"/transport
+ break
+ fi
+ done
+ [[ $trtype == "fc" ]] || [[ $trtype == "tcp" ]] || [[ $trtype == "rdma" ]]
+ }
+
+ has_nbft() {
+ local f found=
+ for f in /sys/firmware/acpi/tables/NBFT*; do
+ [ -f "$f" ] || continue
+ found=1
+ break
+ done
+ [[ $found ]]
+ }
+
+ [[ $hostonly ]] || [[ $mount_needs ]] && {
+ pushd . > /dev/null
+ for_each_host_dev_and_slaves is_nvmf
+ local _is_nvmf=$?
+ popd > /dev/null || exit
+ [[ $_is_nvmf == 0 ]] || return 255
+ if [ ! -f /sys/class/fc/fc_udev_device/nvme_discovery ] \
+ && [ ! -f /etc/nvme/discovery.conf ] \
+ && [ ! -f /etc/nvme/config.json ] && ! has_nbft; then
+ echo "No discovery arguments present"
+ return 255
+ fi
+ }
+ return 0
+}
+
+# called by dracut
+depends() {
+ echo bash rootfs-block network
+ return 0
+}
+
+# called by dracut
+installkernel() {
+ instmods nvme_fc lpfc qla2xxx
+ hostonly="" instmods nvme_tcp nvme_fabrics 8021q
+}
+
+# called by dracut
+cmdline() {
+ local _hostnqn
+ local _hostid
+
+ gen_nvmf_cmdline() {
+ local _dev=$1
+ local trtype
+ local traddr
+ local host_traddr
+ local trsvcid
+ local _address
+ local -a _address_parts
+
+ [[ -L "/sys/dev/block/$_dev" ]] || return 0
+ cd -P "/sys/dev/block/$_dev" || return 0
+ if [ -f partition ]; then
+ cd ..
+ fi
+ for d in device/nvme*; do
+ [ -L "$d" ] || continue
+ if readlink "$d" | grep -q nvme-fabrics; then
+ read -r trtype < "$d"/transport
+ break
+ fi
+ done
+
+ [ -z "$trtype" ] && return 0
+ nvme list-subsys "${PWD##*/}" | while read -r _ _ trtype _address _; do
+ [[ -z $trtype || $trtype != "${trtype#NQN}" ]] && continue
+ unset traddr
+ unset host_traddr
+ unset trsvcid
+ mapfile -t -d ',' _address_parts < <(printf "%s" "$_address")
+ for i in "${_address_parts[@]}"; do
+ [[ $i =~ ^traddr= ]] && traddr="${i#traddr=}"
+ [[ $i =~ ^host_traddr= ]] && host_traddr="${i#host_traddr=}"
+ [[ $i =~ ^trsvcid= ]] && trsvcid="${i#trsvcid=}"
+ done
+ [[ -z $traddr && -z $host_traddr && -z $trsvcid ]] && continue
+ echo -n " rd.nvmf.discover=$trtype,$traddr,$host_traddr,$trsvcid"
+ done
+ }
+
+ if [ -f /etc/nvme/hostnqn ]; then
+ read -r _hostnqn < /etc/nvme/hostnqn
+ echo -n " rd.nvmf.hostnqn=${_hostnqn}"
+ fi
+ if [ -f /etc/nvme/hostid ]; then
+ read -r _hostid < /etc/nvme/hostid
+ echo -n " rd.nvmf.hostid=${_hostid}"
+ fi
+
+ [[ $hostonly ]] || [[ $mount_needs ]] && {
+ pushd . > /dev/null
+ for_each_host_dev_and_slaves gen_nvmf_cmdline
+ popd > /dev/null || exit
+ }
+}
+
+# called by dracut
+install() {
+ if [[ $hostonly_cmdline == "yes" ]]; then
+ local _nvmf_args
+ _nvmf_args=$(cmdline)
+ [[ "$_nvmf_args" ]] && printf "%s" "$_nvmf_args" >> "${initdir}/etc/cmdline.d/95nvmf-args.conf"
+ fi
+ inst_simple "/etc/nvme/hostnqn"
+ inst_simple "/etc/nvme/hostid"
+
+ inst_multiple ip sed
+
+ inst_script "${moddir}/nvmf-autoconnect.sh" /sbin/nvmf-autoconnect.sh
+ inst_script "${moddir}/nbftroot.sh" /sbin/nbftroot
+
+ inst_multiple nvme jq
+ inst_hook cmdline 92 "$moddir/parse-nvmf-boot-connections.sh"
+ inst_simple "/etc/nvme/discovery.conf"
+ inst_simple "/etc/nvme/config.json"
+ inst_rules /usr/lib/udev/rules.d/71-nvmf-iopolicy-netapp.rules
+ inst_rules "$moddir/95-nvmf-initqueue.rules"
+ dracut_need_initqueue
+}
diff --git a/modules.d/95nvmf/nbftroot.sh b/modules.d/95nvmf/nbftroot.sh
new file mode 100755
index 0000000..0f33499
--- /dev/null
+++ b/modules.d/95nvmf/nbftroot.sh
@@ -0,0 +1,5 @@
+#! /bin/sh
+# This script is called from /sbin/netroot
+
+/sbin/nvmf-autoconnect.sh online
+exit 0
diff --git a/modules.d/95nvmf/nvmf-autoconnect.sh b/modules.d/95nvmf/nvmf-autoconnect.sh
new file mode 100755
index 0000000..35ee948
--- /dev/null
+++ b/modules.d/95nvmf/nvmf-autoconnect.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+# Argument $1 is "settled", "online", or "timeout", indicating
+# the queue from which the script is called.
+# In the "timeout" case, try everything.
+# Otherwise, try options according to the priorities below.
+
+[ "$RD_DEBUG" != yes ] || set -x
+
+if [ "$1" = timeout ]; then
+ [ ! -f /sys/class/fc/fc_udev_device/nvme_discovery ] \
+ || echo add > /sys/class/fc/fc_udev_device/nvme_discovery
+ /usr/sbin/nvme connect-all
+ exit 0
+fi
+
+NVMF_HOSTNQN_OK=
+[ ! -f "/etc/nvme/hostnqn" ] || [ ! -f "/etc/nvme/hostid" ] || NVMF_HOSTNQN_OK=1
+
+# Only nvme-cli 2.5 or newer supports the options --nbft and --no-nbft
+# for the connect-all command.
+# Make sure we don't use unsupported options with earlier versions.
+NBFT_SUPPORTED=
+# shellcheck disable=SC2016
+/usr/sbin/nvme connect-all --help 2>&1 | sed -n '/[[:space:]]--nbft[[:space:]]/q1;$q0' \
+ || NBFT_SUPPORTED=1
+
+if [ -e /tmp/nvmf-fc-auto ] && [ "$NVMF_HOSTNQN_OK" ] \
+ && [ -f /sys/class/fc/fc_udev_device/nvme_discovery ]; then
+ # prio 1: cmdline override "rd.nvmf.discovery=fc,auto"
+ echo add > /sys/class/fc/fc_udev_device/nvme_discovery
+ exit 0
+fi
+if [ "$NBFT_SUPPORTED" ] && [ -e /tmp/valid_nbft_entry_found ]; then
+ # prio 2: NBFT
+ /usr/sbin/nvme connect-all --nbft
+ exit 0
+fi
+if [ -f /etc/nvme/discovery.conf ] || [ -f /etc/nvme/config.json ] \
+ && [ "$NVMF_HOSTNQN_OK" ]; then
+ # prio 3: configuration from initrd and/or kernel command line
+ # We can get here even if "rd.nvmf.nonbft" was given, thus use --no-nbft
+ if [ "$NBFT_SUPPORTED" ]; then
+ /usr/sbin/nvme connect-all --no-nbft
+ else
+ /usr/sbin/nvme connect-all
+ fi
+ exit 0
+fi
+if [ "$NVMF_HOSTNQN_OK" ] \
+ && [ -f /sys/class/fc/fc_udev_device/nvme_discovery ]; then
+ # prio 4: no discovery entries, try NVMeoFC autoconnect
+ echo add > /sys/class/fc/fc_udev_device/nvme_discovery
+fi
+exit 0
diff --git a/modules.d/95nvmf/parse-nvmf-boot-connections.sh b/modules.d/95nvmf/parse-nvmf-boot-connections.sh
new file mode 100755
index 0000000..6601837
--- /dev/null
+++ b/modules.d/95nvmf/parse-nvmf-boot-connections.sh
@@ -0,0 +1,326 @@
+#!/bin/sh
+#
+# Supported formats:
+# rd.nvmf.hostnqn=<hostnqn>
+# rd.nvmf.hostid=<hostid>
+# rd.nvmf.discover=<transport>,<traddr>,<host-traddr>,<trsvcid>
+#
+# Examples:
+# rd.nvmf.hostnqn=nqn.2014-08.org.nvmexpress:uuid:37303738-3034-584d-5137-333230423843
+# rd.nvmf.discover=rdma,192.168.1.3,,4420
+# rd.nvmf.discover=tcp,192.168.1.3,,4420
+# rd.nvmf.discover=tcp,192.168.1.3
+# rd.nvmf.discover=fc,nn-0x200400a098d85236:pn-0x201400a098d85236,nn-0x200000109b7db455:pn-0x100000109b7db455
+# rd.nvmf.discover=fc,auto
+#
+# Note: FC does autodiscovery, so typically there is no need to
+# specify any discover parameters for FC.
+#
+
+command -v getarg > /dev/null || . /lib/dracut-lib.sh
+command -v is_ip > /dev/null || . /lib/net-lib.sh
+
+## Sample NBFT output from nvme show-nbft -H -s -d -o json
+# [
+# {
+# "filename":"/sys/firmware/acpi/tables/NBFT",
+# "host":{
+# "nqn":"nqn.2014-08.org.nvmexpress:uuid:d6f07002-7eb5-4841-a185-400e296afae4",
+# "id":"111919da-21ea-cc4e-bafe-216d8372dd31",
+# "host_id_configured":0,
+# "host_nqn_configured":0,
+# "primary_admin_host_flag":"not indicated"
+# },
+# "subsystem":[
+# {
+# "index":1,
+# "num_hfis":1,
+# "hfis":[
+# 1
+# ],
+# "transport":"tcp",
+# "transport_address":"192.168.100.216",
+# "transport_svcid":"4420",
+# "subsys_port_id":0,
+# "nsid":1,
+# "nid_type":"uuid",
+# "nid":"424d1c8a-8ef9-4681-b2fc-8c343bd8fa69",
+# "subsys_nqn":"timberland-01",
+# "controller_id":0,
+# "asqsz":0,
+# "pdu_header_digest_required":0,
+# "data_digest_required":0
+# }
+# ],
+# "hfi":[
+# {
+# "index":1,
+# "transport":"tcp",
+# "pcidev":"0:0:2.0",
+# "mac_addr":"52:54:00:4f:97:e9",
+# "vlan":0,
+# "ip_origin":63,
+# "ipaddr":"192.168.100.217",
+# "subnet_mask_prefix":24,
+# "gateway_ipaddr":"0.0.0.0",
+# "route_metric":0,
+# "primary_dns_ipaddr":"0.0.0.0",
+# "secondary_dns_ipaddr":"0.0.0.0",
+# "dhcp_server_ipaddr":"",
+# "this_hfi_is_default_route":1
+# }
+# ],
+# "discovery":[
+# ]
+# }
+# ]
+#
+# If the IP address is derived from DHCP, it sets the field
+# "hfi.dhcp_server_ipaddr" to a non-emtpy value.
+#
+#
+
+nbft_run_jq() {
+ local st
+ local opts="-e"
+
+ while [ $# -gt 0 ]; do
+ case $1 in
+ -*)
+ opts="$opts $1"
+ ;;
+ *)
+ break
+ ;;
+ esac
+ shift
+ done
+ # Not quoting is intentional here. We won't get glob expressions passed.
+ # shellcheck disable=SC2086
+ jq $opts "$1" << EOF
+$2
+EOF
+ st=$?
+ if [ $st -ne 0 ]; then
+ warn "NBFT: jq error while processing \"$1\""
+ return $st
+ else
+ return 0
+ fi
+}
+
+nbft_check_empty_address() {
+ # suppress meaningless or empty IP addresses
+ # "null" is returned by jq if no match found for expression
+ case $1 in
+ null | "::" | "0.0.0.0") ;;
+ *)
+ echo "$1"
+ ;;
+ esac
+}
+
+nbft_parse_hfi() {
+ # false positive of shellcheck - no expansion in variable assignments
+ # shellcheck disable=2086
+ local hfi_json=$1
+ local mac iface ipaddr prefix vlan gateway dns1 dns2 hostname adrfam dhcp
+
+ mac=$(nbft_run_jq -r .mac_addr "$hfi_json") || return 1
+ iface=$(set_ifname nbft "$mac")
+
+ vlan=$(nbft_run_jq .vlan "$hfi_json") || vlan=0
+ # treat VLAN zero as "no vlan"
+ [ "$vlan" -ne 0 ] || vlan=
+
+ [ ! -e /tmp/net."${iface}${vlan:+.$vlan}".has_ibft_config ] || return 0
+
+ dhcp=$(nbft_run_jq -r .dhcp_server_ipaddr "$hfi_json")
+ # We need to check $? here as the above is an assignment
+ # shellcheck disable=2181
+ if [ $? -eq 0 ] && [ "$dhcp" ] && [ "$dhcp" != null ]; then
+ case $dhcp in
+ *:*)
+ echo ip="$iface${vlan:+.$vlan}:dhcp6"
+ ;;
+ *.*.*.*)
+ echo ip="$iface${vlan:+.$vlan}:dhcp"
+ ;;
+ *)
+ warn "Invalid value for dhcp_server_ipaddr: $dhcp"
+ return 1
+ ;;
+ esac
+ else
+ ipaddr=$(nbft_run_jq -r .ipaddr "$hfi_json") || return 1
+
+ case $ipaddr in
+ *.*.*.*)
+ adrfam=ipv4
+ ;;
+ *:*)
+ adrfam=ipv6
+ ;;
+ *)
+ warn "invalid address: $ipaddr"
+ return 1
+ ;;
+ esac
+ prefix=$(nbft_run_jq -r .subnet_mask_prefix "$hfi_json")
+ # Need to check $? here as he above is an assignment
+ # shellcheck disable=2181
+ if [ $? -ne 0 ] && [ "$adrfam" = ipv6 ]; then
+ prefix=128
+ fi
+ # Use brackets for IPv6
+ if [ "$adrfam" = ipv6 ]; then
+ ipaddr="[$ipaddr]"
+ fi
+
+ gateway=$(nbft_check_empty_address \
+ "$(nbft_run_jq -r .gateway_ipaddr "$hfi_json")")
+ dns1=$(nbft_check_empty_address \
+ "$(nbft_run_jq -r .primary_dns_ipaddr "$hfi_json")")
+ dns2=$(nbft_check_empty_address \
+ "$(nbft_run_jq -r .secondary_dns_ipaddr "$hfi_json")")
+ hostname=$(nbft_run_jq -r .host_name "$hfi_json" 2> /dev/null) || hostname=
+
+ echo "ip=$ipaddr::$gateway:$prefix:$hostname:$iface${vlan:+.$vlan}:none${dns1:+:$dns1}${dns2:+:$dns2}"
+ fi
+
+ if [ "$vlan" ]; then
+ echo "vlan=$iface.$vlan:$iface"
+ echo "$mac" > "/tmp/net.$iface.$vlan.has_ibft_config"
+ else
+ echo "$mac" > "/tmp/net.$iface.has_ibft_config"
+ fi
+ : > /tmp/valid_nbft_entry_found
+}
+
+nbft_parse() {
+ local nbft_json n_nbft all_hfi_json n_hfi
+ local j=0 i
+
+ nbft_json=$(nvme nbft show -H -o json) || return 0
+ n_nbft=$(nbft_run_jq ". | length" "$nbft_json") || return 0
+
+ while [ "$j" -lt "$n_nbft" ]; do
+ all_hfi_json=$(nbft_run_jq ".[$j].hfi" "$nbft_json") || continue
+ n_hfi=$(nbft_run_jq ". | length" "$all_hfi_json") || continue
+ i=0
+
+ while [ "$i" -lt "$n_hfi" ]; do
+ nbft_parse_hfi "$(nbft_run_jq ".[$i]" "$all_hfi_json")"
+ i=$((i + 1))
+ done
+ j=$((j + 1))
+ done >> /etc/cmdline.d/40-nbft.conf
+}
+
+if getargbool 0 rd.nonvmf; then
+ warn "rd.nonvmf=0: skipping nvmf"
+ return 0
+fi
+
+if getargbool 0 rd.nvmf.nostatic; then
+ rm -f /etc/cmdline.d/95nvmf-args.conf
+ rm -f /etc/nvme/discovery.conf /etc/nvme/config.json
+fi
+
+if ! getargbool 0 rd.nvmf.nonbft; then
+ for _x in /sys/firmware/acpi/tables/NBFT*; do
+ if [ -f "$_x" ]; then
+ nbft_parse
+ break
+ fi
+ done
+fi
+
+initqueue --onetime modprobe --all -b -q nvme_tcp nvme_core nvme_fabrics
+
+parse_nvmf_discover() {
+ traddr="none"
+ trtype="none"
+ hosttraddr="none"
+ trsvcid=4420
+ OLDIFS="$IFS"
+ IFS=,
+ # shellcheck disable=SC2086
+ set -- $1
+ IFS="$OLDIFS"
+
+ case $# in
+ 2)
+ [ -n "$1" ] && trtype=$1
+ [ -n "$2" ] && traddr=$2
+ ;;
+ 3)
+ [ -n "$1" ] && trtype=$1
+ [ -n "$2" ] && traddr=$2
+ [ -n "$3" ] && hosttraddr=$3
+ ;;
+ 4)
+ [ -n "$1" ] && trtype=$1
+ [ -n "$2" ] && traddr=$2
+ [ -n "$3" ] && hosttraddr=$3
+ [ -n "$4" ] && trsvcid=$4
+ ;;
+ *)
+ warn "Invalid arguments for rd.nvmf.discover=$1"
+ return 0
+ ;;
+ esac
+ if [ "$traddr" = "none" ]; then
+ warn "traddr is mandatory for $trtype"
+ return 0
+ fi
+ if [ "$trtype" = "tcp" ]; then
+ : > /tmp/nvmf_needs_network
+ elif [ "$trtype" = "fc" ]; then
+ if [ "$traddr" = "auto" ]; then
+ rm -f /etc/nvme/discovery.conf /etc/nvme/config.json
+ return 1
+ fi
+ if [ "$hosttraddr" = "none" ]; then
+ warn "host traddr is mandatory for fc"
+ return 0
+ fi
+ elif [ "$trtype" != "rdma" ]; then
+ warn "unsupported transport $trtype"
+ return 0
+ fi
+ if [ "$trtype" = "fc" ]; then
+ echo "--transport=$trtype --traddr=$traddr --host-traddr=$hosttraddr" >> /etc/nvme/discovery.conf
+ else
+ echo "--transport=$trtype --traddr=$traddr --host-traddr=$hosttraddr --trsvcid=$trsvcid" >> /etc/nvme/discovery.conf
+ fi
+ return 0
+}
+
+nvmf_hostnqn=$(getarg rd.nvmf.hostnqn -d nvmf.hostnqn=)
+if [ -n "$nvmf_hostnqn" ]; then
+ echo "$nvmf_hostnqn" > /etc/nvme/hostnqn
+fi
+nvmf_hostid=$(getarg rd.nvmf.hostid -d nvmf.hostid=)
+if [ -n "$nvmf_hostid" ]; then
+ echo "$nvmf_hostid" > /etc/nvme/hostid
+fi
+
+rm -f /tmp/nvmf-fc-auto
+for d in $(getargs rd.nvmf.discover -d nvmf.discover=); do
+ parse_nvmf_discover "$d" || {
+ : > /tmp/nvmf-fc-auto
+ break
+ }
+done
+
+if [ -e /tmp/nvmf_needs_network ] || [ -e /tmp/valid_nbft_entry_found ]; then
+ echo "rd.neednet=1" > /etc/cmdline.d/nvmf-neednet.conf
+ # netroot is a global variable that is present in all "sourced" scripts
+ # shellcheck disable=SC2034
+ netroot=nbft
+ rm -f /tmp/nvmf_needs_network
+fi
+
+/sbin/initqueue --settled --onetime --name nvmf-connect-settled /sbin/nvmf-autoconnect.sh settled
+/sbin/initqueue --timeout --onetime --name nvmf-connect-timeout /sbin/nvmf-autoconnect.sh timeout