diff options
Diffstat (limited to 'src/spdk/scripts/setup.sh')
-rwxr-xr-x | src/spdk/scripts/setup.sh | 885 |
1 files changed, 885 insertions, 0 deletions
diff --git a/src/spdk/scripts/setup.sh b/src/spdk/scripts/setup.sh new file mode 100755 index 000000000..ca271e173 --- /dev/null +++ b/src/spdk/scripts/setup.sh @@ -0,0 +1,885 @@ +#!/usr/bin/env bash + +set -e + +os=$(uname -s) + +if [[ $os != Linux && $os != FreeBSD ]]; then + echo "Not supported platform ($os), aborting" + exit 1 +fi + +rootdir=$(readlink -f $(dirname $0))/.. +source "$rootdir/scripts/common.sh" + +function usage() { + if [[ $os == Linux ]]; then + options="[config|reset|status|cleanup|help]" + else + options="[config|reset|help]" + fi + + [[ -n $2 ]] && ( + echo "$2" + echo "" + ) + echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices" + echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script" + echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored." + echo "All hugepage operations use default hugepage size on the system (hugepagesz)." + echo "Usage: $(basename $1) $options" + echo + echo "$options - as following:" + echo "config Default mode. Allocate hugepages and bind PCI devices." + if [[ $os == Linux ]]; then + echo "cleanup Remove any orphaned files that can be left in the system after SPDK application exit" + fi + echo "reset Rebind PCI devices back to their original drivers." + echo " Also cleanup any leftover spdk files/resources." + echo " Hugepage memory size will remain unchanged." + if [[ $os == Linux ]]; then + echo "status Print status of all SPDK-compatible devices on the system." + fi + echo "help Print this help message." + echo + echo "The following environment variables can be specified." + echo "HUGEMEM Size of hugepage memory to allocate (in MB). 2048 by default." + echo " For NUMA systems, the hugepages will be evenly distributed" + echo " between CPU nodes" + echo "NRHUGE Number of hugepages to allocate. This variable overwrites HUGEMEM." + echo "HUGENODE Specific NUMA node to allocate hugepages on. To allocate" + echo " hugepages on multiple nodes run this script multiple times -" + echo " once for each node." + echo "PCI_WHITELIST" + echo "PCI_BLACKLIST Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)." + echo " Each device must be specified as a full PCI address." + echo " E.g. PCI_WHITELIST=\"0000:01:00.0 0000:02:00.0\"" + echo " To blacklist all PCI devices use a non-valid address." + echo " E.g. PCI_WHITELIST=\"none\"" + echo " If PCI_WHITELIST and PCI_BLACKLIST are empty or unset, all PCI devices" + echo " will be bound." + echo " Each device in PCI_BLACKLIST will be ignored (driver won't be changed)." + echo " PCI_BLACKLIST has precedence over PCI_WHITELIST." + echo "TARGET_USER User that will own hugepage mountpoint directory and vfio groups." + echo " By default the current user will be used." + echo "DRIVER_OVERRIDE Disable automatic vfio-pci/uio_pci_generic selection and forcefully" + echo " bind devices to the given driver." + echo " E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko" + exit 0 +} + +# In monolithic kernels the lsmod won't work. So +# back that with a /sys/modules. We also check +# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might +# contain needed info (like in Fedora-like OS). +function check_for_driver() { + if lsmod | grep -q ${1//-/_}; then + return 1 + fi + + if [[ -d /sys/module/${1} || -d \ + /sys/module/${1//-/_} || -d \ + /sys/bus/pci/drivers/${1} || -d \ + /sys/bus/pci/drivers/${1//-/_} ]]; then + return 2 + fi + return 0 +} + +function pci_dev_echo() { + local bdf="$1" + local vendor + local device + vendor="$(cat /sys/bus/pci/devices/$bdf/vendor)" + device="$(cat /sys/bus/pci/devices/$bdf/device)" + shift + echo "$bdf (${vendor#0x} ${device#0x}): $*" +} + +function linux_bind_driver() { + bdf="$1" + driver_name="$2" + old_driver_name="no driver" + ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') + + if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then + old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) + + if [ "$driver_name" = "$old_driver_name" ]; then + pci_dev_echo "$bdf" "Already using the $old_driver_name driver" + return 0 + fi + + echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true + echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" + fi + + pci_dev_echo "$bdf" "$old_driver_name -> $driver_name" + + echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true + echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true + + iommu_group=$(basename $(readlink -f /sys/bus/pci/devices/$bdf/iommu_group)) + if [ -e "/dev/vfio/$iommu_group" ]; then + if [ -n "$TARGET_USER" ]; then + chown "$TARGET_USER" "/dev/vfio/$iommu_group" + fi + fi +} + +function linux_unbind_driver() { + local bdf="$1" + local ven_dev_id + ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') + local old_driver_name="no driver" + + if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then + old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) + echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true + echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" + fi + + pci_dev_echo "$bdf" "$old_driver_name -> no driver" +} + +function linux_hugetlbfs_mounts() { + mount | grep ' type hugetlbfs ' | awk '{ print $3 }' +} + +function get_nvme_name_from_bdf() { + local blknames=() + + set +e + nvme_devs=$(lsblk -d --output NAME | grep "^nvme") + set -e + for dev in $nvme_devs; do + link_name=$(readlink /sys/block/$dev/device/device) || true + if [ -z "$link_name" ]; then + link_name=$(readlink /sys/block/$dev/device) + fi + link_bdf=$(basename "$link_name") + if [ "$link_bdf" = "$1" ]; then + blknames+=($dev) + fi + done + + printf '%s\n' "${blknames[@]}" +} + +function get_virtio_names_from_bdf() { + blk_devs=$(lsblk --nodeps --output NAME) + virtio_names=() + + for dev in $blk_devs; do + if readlink "/sys/block/$dev" | grep -q "$1"; then + virtio_names+=("$dev") + fi + done + + eval "$2=( " "${virtio_names[@]}" " )" +} + +function configure_linux_pci() { + local driver_path="" + driver_name="" + if [[ -n "${DRIVER_OVERRIDE}" ]]; then + driver_path="$DRIVER_OVERRIDE" + driver_name="${DRIVER_OVERRIDE##*/}" + # modprobe and the sysfs don't use the .ko suffix. + driver_name=${driver_name%.ko} + # path = name -> there is no path + if [[ "$driver_path" = "$driver_name" ]]; then + driver_path="" + fi + # igb_uio is a common driver to override with and it depends on uio. + if [[ "$driver_name" = "igb_uio" ]]; then + modprobe uio + fi + elif [[ -n "$(ls /sys/kernel/iommu_groups)" || (-e \ + /sys/module/vfio/parameters/enable_unsafe_noiommu_mode && \ + "$(cat /sys/module/vfio/parameters/enable_unsafe_noiommu_mode)" == "Y") ]]; then + driver_name=vfio-pci + elif modinfo uio_pci_generic > /dev/null 2>&1; then + driver_name=uio_pci_generic + elif [[ -r "$rootdir/dpdk/build/kmod/igb_uio.ko" ]]; then + driver_path="$rootdir/dpdk/build/kmod/igb_uio.ko" + driver_name="igb_uio" + modprobe uio + echo "WARNING: uio_pci_generic not detected - using $driver_name" + else + echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please either enable the vfio-pci or uio_pci_generic" + echo "kernel modules, or have SPDK build the igb_uio driver by running ./configure --with-igb-uio-driver and recompiling." + return 1 + fi + + # modprobe assumes the directory of the module. If the user passes in a path, we should use insmod + if [[ -n "$driver_path" ]]; then + insmod $driver_path || true + else + modprobe $driver_name + fi + + # NVMe + for bdf in ${pci_bus_cache["0x010802"]}; do + blknames=() + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller at $bdf" + continue + fi + + mount=false + for blkname in $(get_nvme_name_from_bdf $bdf); do + mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w) + if [ "$mountpoints" != "0" ]; then + mount=true + blknames+=($blkname) + fi + done + + if ! $mount; then + linux_bind_driver "$bdf" "$driver_name" + else + for name in "${blknames[@]}"; do + pci_dev_echo "$bdf" "Active mountpoints on /dev/$name, so not binding PCI dev" + done + fi + done + + # IOAT + TMP=$(mktemp) + #collect all the device_id info of ioat devices. + grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device" + continue + fi + + linux_bind_driver "$bdf" "$driver_name" + done + done < $TMP + rm $TMP + + # IDXD + TMP=$(mktemp) + #collect all the device_id info of idxd devices. + grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device" + continue + fi + + linux_bind_driver "$bdf" "$driver_name" + done + done < $TMP + rm $TMP + + # virtio + TMP=$(mktemp) + #collect all the device_id info of virtio devices. + grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at $bdf" + continue + fi + blknames=() + get_virtio_names_from_bdf "$bdf" blknames + for blkname in "${blknames[@]}"; do + if [ "$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)" != "0" ]; then + pci_dev_echo "$bdf" "Active mountpoints on /dev/$blkname, so not binding" + continue 2 + fi + done + + linux_bind_driver "$bdf" "$driver_name" + done + done < $TMP + rm $TMP + + # VMD + TMP=$(mktemp) + #collect all the device_id info of vmd devices. + grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + if [[ -z "$PCI_WHITELIST" ]] || ! pci_can_use $bdf; then + echo "Skipping un-whitelisted VMD device at $bdf" + continue + fi + + linux_bind_driver "$bdf" "$driver_name" + echo " VMD generic kdrv: " "$bdf" "$driver_name" + done + done < $TMP + rm $TMP + + echo "1" > "/sys/bus/pci/rescan" +} + +function cleanup_linux() { + shopt -s extglob nullglob + dirs_to_clean="" + dirs_to_clean="$(echo {/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) " + if [[ -d $XDG_RUNTIME_DIR && $XDG_RUNTIME_DIR != *" "* ]]; then + dirs_to_clean+="$(readlink -e assert_not_empty $XDG_RUNTIME_DIR/dpdk/spdk{,_pid}+([0-9]) || true) " + fi + + files_to_clean="" + for dir in $dirs_to_clean; do + files_to_clean+="$(echo $dir/*) " + done + shopt -u extglob nullglob + + files_to_clean+="$(ls -1 /dev/shm/* \ + | grep -E '(spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz)_trace|spdk_iscsi_conns' || true) " + files_to_clean="$(readlink -e assert_not_empty $files_to_clean || true)" + if [[ -z "$files_to_clean" ]]; then + echo "Clean" + return 0 + fi + + shopt -s extglob + for fd_dir in $(echo /proc/+([0-9])); do + opened_files+="$(readlink -e assert_not_empty $fd_dir/fd/* || true)" + done + shopt -u extglob + + if [[ -z "$opened_files" ]]; then + echo "Can't get list of opened files!" + exit 1 + fi + + echo 'Cleaning' + for f in $files_to_clean; do + if ! echo "$opened_files" | grep -E -q "^$f\$"; then + echo "Removing: $f" + rm $f + else + echo "Still open: $f" + fi + done + + for dir in $dirs_to_clean; do + if ! echo "$opened_files" | grep -E -q "^$dir\$"; then + echo "Removing: $dir" + rmdir $dir + else + echo "Still open: $dir" + fi + done + echo "Clean" + + unset dirs_to_clean files_to_clean opened_files +} + +function configure_linux() { + configure_linux_pci + hugetlbfs_mounts=$(linux_hugetlbfs_mounts) + + if [ -z "$hugetlbfs_mounts" ]; then + hugetlbfs_mounts=/mnt/huge + echo "Mounting hugetlbfs at $hugetlbfs_mounts" + mkdir -p "$hugetlbfs_mounts" + mount -t hugetlbfs nodev "$hugetlbfs_mounts" + fi + + if [ -z "$HUGENODE" ]; then + hugepages_target="/proc/sys/vm/nr_hugepages" + else + hugepages_target="/sys/devices/system/node/node${HUGENODE}/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages" + fi + + echo "$NRHUGE" > "$hugepages_target" + allocated_hugepages=$(cat $hugepages_target) + if [ "$allocated_hugepages" -lt "$NRHUGE" ]; then + echo "" + echo "## ERROR: requested $NRHUGE hugepages but only $allocated_hugepages could be allocated." + echo "## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine." + exit 1 + fi + + if [ "$driver_name" = "vfio-pci" ]; then + if [ -n "$TARGET_USER" ]; then + for mount in $hugetlbfs_mounts; do + chown "$TARGET_USER" "$mount" + chmod g+w "$mount" + done + + MEMLOCK_AMNT=$(su "$TARGET_USER" -c "ulimit -l") + if [[ $MEMLOCK_AMNT != "unlimited" ]]; then + MEMLOCK_MB=$((MEMLOCK_AMNT / 1024)) + cat <<- MEMLOCK + "$TARGET_USER" user memlock limit: $MEMLOCK_MB MB + + This is the maximum amount of memory you will be + able to use with DPDK and VFIO if run as user "$TARGET_USER". + To change this, please adjust limits.conf memlock limit for user "$TARGET_USER". + MEMLOCK + if ((MEMLOCK_AMNT < 65536)); then + echo "" + echo "## WARNING: memlock limit is less than 64MB" + echo -n "## DPDK with VFIO may not be able to initialize " + echo "if run as user \"$TARGET_USER\"." + fi + fi + fi + fi + + if [ ! -f /dev/cpu/0/msr ]; then + # Some distros build msr as a module. Make sure it's loaded to ensure + # DPDK can easily figure out the TSC rate rather than relying on 100ms + # sleeps. + modprobe msr || true + fi +} + +function reset_linux_pci() { + # NVMe + set +e + check_for_driver nvme + driver_loaded=$? + set -e + for bdf in ${pci_bus_cache["0x010802"]}; do + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller $blkname" + continue + fi + if [ $driver_loaded -ne 0 ]; then + linux_bind_driver "$bdf" nvme + else + linux_unbind_driver "$bdf" + fi + done + + # IOAT + TMP=$(mktemp) + #collect all the device_id info of ioat devices. + grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + + set +e + check_for_driver ioatdma + driver_loaded=$? + set -e + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device" + continue + fi + if [ $driver_loaded -ne 0 ]; then + linux_bind_driver "$bdf" ioatdma + else + linux_unbind_driver "$bdf" + fi + done + done < $TMP + rm $TMP + + # IDXD + TMP=$(mktemp) + #collect all the device_id info of idxd devices. + grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + set +e + check_for_driver idxd + driver_loaded=$? + set -e + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device" + continue + fi + if [ $driver_loaded -ne 0 ]; then + linux_bind_driver "$bdf" idxd + else + linux_unbind_driver "$bdf" + fi + done + done < $TMP + rm $TMP + + # virtio + TMP=$(mktemp) + #collect all the device_id info of virtio devices. + grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + + # TODO: check if virtio-pci is loaded first and just unbind if it is not loaded + # Requires some more investigation - for example, some kernels do not seem to have + # virtio-pci but just virtio_scsi instead. Also need to make sure we get the + # underscore vs. dash right in the virtio_scsi name. + modprobe virtio-pci || true + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do + if ! pci_can_use $bdf; then + pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at" + continue + fi + linux_bind_driver "$bdf" virtio-pci + done + done < $TMP + rm $TMP + + # VMD + TMP=$(mktemp) + #collect all the device_id info of vmd devices. + grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}' > $TMP + + set +e + check_for_driver vmd + driver_loaded=$? + set -e + while IFS= read -r dev_id; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + if ! pci_can_use $bdf; then + echo "Skipping un-whitelisted VMD device at $bdf" + continue + fi + if [ $driver_loaded -ne 0 ]; then + linux_bind_driver "$bdf" vmd + else + linux_unbind_driver "$bdf" + fi + done + done < $TMP + rm $TMP + + echo "1" > "/sys/bus/pci/rescan" +} + +function reset_linux() { + reset_linux_pci + for mount in $(linux_hugetlbfs_mounts); do + rm -f "$mount"/spdk*map_* + done + rm -f /run/.spdk* +} + +function status_linux() { + echo "Hugepages" + printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total" + + numa_nodes=0 + shopt -s nullglob + for path in /sys/devices/system/node/node*/hugepages/hugepages-*/; do + numa_nodes=$((numa_nodes + 1)) + free_pages=$(cat $path/free_hugepages) + all_pages=$(cat $path/nr_hugepages) + + [[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]] + + node=${BASH_REMATCH[1]} + huge_size=${BASH_REMATCH[2]} + + printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages + done + shopt -u nullglob + + # fall back to system-wide hugepages + if [ "$numa_nodes" = "0" ]; then + free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }') + all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }') + node="-" + huge_size="$HUGEPGSZ" + + printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages + fi + + echo "" + echo "NVMe devices" + + echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name" + for bdf in ${pci_bus_cache["0x010802"]}; do + driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') + if [ "$numa_nodes" = "0" ]; then + node="-" + else + node=$(cat /sys/bus/pci/devices/$bdf/numa_node) + if ((node == -1)); then + node=unknown + fi + fi + device=$(cat /sys/bus/pci/devices/$bdf/device) + vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) + if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then + name="\t"$(ls /sys/bus/pci/devices/$bdf/nvme) + else + name="-" + fi + echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}\t\t$name" + done + + echo "" + echo "I/OAT Engine" + + #collect all the device_id info of ioat devices. + TMP=$(grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}') + echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver" + for dev_id in $TMP; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') + if [ "$numa_nodes" = "0" ]; then + node="-" + else + node=$(cat /sys/bus/pci/devices/$bdf/numa_node) + if ((node == -1)); then + node=unknown + fi + fi + device=$(cat /sys/bus/pci/devices/$bdf/device) + vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) + echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}" + done + done + + echo "" + echo "IDXD Engine" + + #collect all the device_id info of idxd devices. + TMP=$(grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}') + echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver" + for dev_id in $TMP; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') + if [ "$numa_nodes" = "0" ]; then + node="-" + else + node=$(cat /sys/bus/pci/devices/$bdf/numa_node) + fi + device=$(cat /sys/bus/pci/devices/$bdf/device) + vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) + echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}" + done + done + + echo "" + echo "virtio" + + #collect all the device_id info of virtio devices. + TMP=$(grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}') + echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name" + for dev_id in $TMP; do + for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do + driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') + if [ "$numa_nodes" = "0" ]; then + node="-" + else + node=$(cat /sys/bus/pci/devices/$bdf/numa_node) + if ((node == -1)); then + node=unknown + fi + fi + device=$(cat /sys/bus/pci/devices/$bdf/device) + vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) + blknames=() + get_virtio_names_from_bdf "$bdf" blknames + echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t\t${driver:--}\t\t" "${blknames[@]}" + done + done + + echo "" + echo "VMD" + + #collect all the device_id info of vmd devices. + TMP=$(grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ + | awk -F"x" '{print $2}') + echo -e "BDF\t\tNuma Node\tDriver Name" + for dev_id in $TMP; do + for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do + driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') + node=$(cat /sys/bus/pci/devices/$bdf/numa_node) + if ((node == -1)); then + node=unknown + fi + echo -e "$bdf\t$node\t\t$driver" + done + done +} + +function status_freebsd() { + local id pci + local ioat idxd vmd + + status_print() ( + local dev driver + + echo -e "BDF\t\tVendor\tDevice\tDriver" + + for id; do + for pci in ${pci_bus_cache["$id"]}; do + driver=$(pciconf -l "pci$pci") + driver=${driver%@*} + printf '%s\t%s\t%s\t%s\n' \ + "$pci" \ + "${pci_ids_vendor["$pci"]}" \ + "${pci_ids_device["$pci"]}" \ + "$driver" + done + done + ) + + devs=PCI_DEVICE_ID_INTEL_IOAT + devs+="|PCI_DEVICE_ID_INTEL_IDXD" + devs+="|PCI_DEVICE_ID_INTEL_VMD" + + local dev_type dev_id + while read -r _ dev_type dev_id; do + case "$dev_type" in + *IOAT*) ioat+=("0x8086:$dev_id") ;; + *IDXD*) idxd+=("0x8086:$dev_id") ;; + *VMD*) vmd+=("0x8086:$dev_id") ;; + esac + done < <(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h") + + local contigmem=present + if ! kldstat -q -m contigmem; then + contigmem="not present" + fi + + cat <<- BSD_INFO + Contigmem ($contigmem) + Buffer Size: $(kenv hw.contigmem.buffer_size) + Num Buffers: $(kenv hw.contigmem.num_buffers) + + NVMe devices + $(status_print 0x010802) + + I/IOAT DMA + $(status_print "${ioat[@]}") + + IDXD DMA + $(status_print "${idxd[@]}") + + VMD + $(status_print "${vmd[@]}") + BSD_INFO +} + +function configure_freebsd_pci() { + local devs ids id + local BDFS + + devs=PCI_DEVICE_ID_INTEL_IOAT + devs+="|PCI_DEVICE_ID_INTEL_IDXD" + devs+="|PCI_DEVICE_ID_INTEL_VMD" + + ids=($(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h" | awk '{print $3}')) + + if [[ -n ${pci_bus_cache["0x010802"]} ]]; then + BDFS+=(${pci_bus_cache["0x010802"]}) + fi + + for id in "${ids[@]}"; do + [[ -n ${pci_bus_cache["0x8086:$id"]} ]] || continue + BDFS+=(${pci_bus_cache["0x8086:$id"]}) + done + + # Drop the domain part from all the addresses + BDFS=("${BDFS[@]#*:}") + + local IFS="," + kldunload nic_uio.ko || true + kenv hw.nic_uio.bdfs="${BDFS[*]}" + kldload nic_uio.ko +} + +function configure_freebsd() { + configure_freebsd_pci + # If contigmem is already loaded but the HUGEMEM specified doesn't match the + # previous value, unload contigmem so that we can reload with the new value. + if kldstat -q -m contigmem; then + if [ $(kenv hw.contigmem.num_buffers) -ne "$((HUGEMEM / 256))" ]; then + kldunload contigmem.ko + fi + fi + if ! kldstat -q -m contigmem; then + kenv hw.contigmem.num_buffers=$((HUGEMEM / 256)) + kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024)) + kldload contigmem.ko + fi +} + +function reset_freebsd() { + kldunload contigmem.ko || true + kldunload nic_uio.ko || true +} + +CMD=reset cache_pci_bus + +mode=$1 + +if [ -z "$mode" ]; then + mode="config" +fi + +: ${HUGEMEM:=2048} +: ${PCI_WHITELIST:=""} +: ${PCI_BLACKLIST:=""} + +if [ -n "$NVME_WHITELIST" ]; then + PCI_WHITELIST="$PCI_WHITELIST $NVME_WHITELIST" +fi + +if [ -n "$SKIP_PCI" ]; then + PCI_WHITELIST="none" +fi + +if [ -z "$TARGET_USER" ]; then + TARGET_USER="$SUDO_USER" + if [ -z "$TARGET_USER" ]; then + TARGET_USER=$(logname 2> /dev/null) || true + fi +fi + +if [[ $os == Linux ]]; then + HUGEPGSZ=$(($(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9'))) + HUGEPGSZ_MB=$((HUGEPGSZ / 1024)) + : ${NRHUGE=$(((HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB))} + + if [ "$mode" == "config" ]; then + configure_linux + elif [ "$mode" == "cleanup" ]; then + cleanup_linux + elif [ "$mode" == "reset" ]; then + reset_linux + elif [ "$mode" == "status" ]; then + status_linux + elif [ "$mode" == "help" ]; then + usage $0 + else + usage $0 "Invalid argument '$mode'" + fi +else + if [ "$mode" == "config" ]; then + configure_freebsd + elif [ "$mode" == "reset" ]; then + reset_freebsd + elif [ "$mode" == "cleanup" ]; then + echo "setup.sh cleanup function not yet supported on $os" + elif [ "$mode" == "status" ]; then + status_freebsd + elif [ "$mode" == "help" ]; then + usage $0 + else + usage $0 "Invalid argument '$mode'" + fi +fi |