From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- .../dpdk/doc/guides/bbdevs/features/default.ini | 16 + .../doc/guides/bbdevs/features/fpga_5gnr_fec.ini | 11 + .../doc/guides/bbdevs/features/fpga_lte_fec.ini | 10 + src/spdk/dpdk/doc/guides/bbdevs/features/mbc.ini | 14 + src/spdk/dpdk/doc/guides/bbdevs/features/null.ini | 7 + .../dpdk/doc/guides/bbdevs/features/turbo_sw.ini | 11 + src/spdk/dpdk/doc/guides/bbdevs/fpga_5gnr_fec.rst | 297 ++ src/spdk/dpdk/doc/guides/bbdevs/fpga_lte_fec.rst | 316 ++ src/spdk/dpdk/doc/guides/bbdevs/index.rst | 15 + src/spdk/dpdk/doc/guides/bbdevs/null.rst | 49 + src/spdk/dpdk/doc/guides/bbdevs/overview.rst | 12 + src/spdk/dpdk/doc/guides/bbdevs/turbo_sw.rst | 181 + .../doc/guides/compressdevs/features/default.ini | 27 + .../dpdk/doc/guides/compressdevs/features/isal.ini | 18 + .../doc/guides/compressdevs/features/octeontx.ini | 10 + .../dpdk/doc/guides/compressdevs/features/qat.ini | 17 + .../dpdk/doc/guides/compressdevs/features/zlib.ini | 10 + src/spdk/dpdk/doc/guides/compressdevs/index.rst | 16 + src/spdk/dpdk/doc/guides/compressdevs/isal.rst | 149 + src/spdk/dpdk/doc/guides/compressdevs/octeontx.rst | 105 + src/spdk/dpdk/doc/guides/compressdevs/overview.rst | 32 + src/spdk/dpdk/doc/guides/compressdevs/qat_comp.rst | 53 + src/spdk/dpdk/doc/guides/compressdevs/zlib.rst | 69 + src/spdk/dpdk/doc/guides/conf.py | 435 ++ .../dpdk/doc/guides/contributing/abi_policy.rst | 337 ++ .../doc/guides/contributing/abi_versioning.rst | 697 +++ .../dpdk/doc/guides/contributing/cheatsheet.rst | 11 + .../dpdk/doc/guides/contributing/coding_style.rst | 1003 ++++ src/spdk/dpdk/doc/guides/contributing/design.rst | 177 + .../dpdk/doc/guides/contributing/documentation.rst | 751 +++ .../contributing/img/abi_stability_policy.svg | 1059 +++++ .../guides/contributing/img/patch_cheatsheet.svg | 1484 ++++++ .../doc/guides/contributing/img/what_is_an_abi.svg | 382 ++ src/spdk/dpdk/doc/guides/contributing/index.rst | 19 + src/spdk/dpdk/doc/guides/contributing/patches.rst | 691 +++ src/spdk/dpdk/doc/guides/contributing/stable.rst | 125 + .../dpdk/doc/guides/contributing/vulnerability.rst | 325 ++ src/spdk/dpdk/doc/guides/cryptodevs/aesni_gcm.rst | 121 + src/spdk/dpdk/doc/guides/cryptodevs/aesni_mb.rst | 161 + src/spdk/dpdk/doc/guides/cryptodevs/armv8.rst | 69 + src/spdk/dpdk/doc/guides/cryptodevs/caam_jr.rst | 150 + src/spdk/dpdk/doc/guides/cryptodevs/ccp.rst | 140 + src/spdk/dpdk/doc/guides/cryptodevs/dpaa2_sec.rst | 206 + src/spdk/dpdk/doc/guides/cryptodevs/dpaa_sec.rst | 150 + .../doc/guides/cryptodevs/features/aesni_gcm.ini | 41 + .../doc/guides/cryptodevs/features/aesni_mb.ini | 64 + .../dpdk/doc/guides/cryptodevs/features/armv8.ini | 34 + .../doc/guides/cryptodevs/features/caam_jr.ini | 51 + .../dpdk/doc/guides/cryptodevs/features/ccp.ini | 65 + .../doc/guides/cryptodevs/features/default.ini | 116 + .../doc/guides/cryptodevs/features/dpaa2_sec.ini | 55 + .../doc/guides/cryptodevs/features/dpaa_sec.ini | 55 + .../dpdk/doc/guides/cryptodevs/features/kasumi.ini | 31 + .../dpdk/doc/guides/cryptodevs/features/mvsam.ini | 59 + .../dpdk/doc/guides/cryptodevs/features/nitrox.ini | 41 + .../dpdk/doc/guides/cryptodevs/features/null.ini | 31 + .../doc/guides/cryptodevs/features/octeontx.ini | 78 + .../doc/guides/cryptodevs/features/octeontx2.ini | 78 + .../doc/guides/cryptodevs/features/openssl.ini | 67 + .../dpdk/doc/guides/cryptodevs/features/qat.ini | 81 + .../dpdk/doc/guides/cryptodevs/features/snow3g.ini | 31 + .../dpdk/doc/guides/cryptodevs/features/virtio.ini | 32 + .../dpdk/doc/guides/cryptodevs/features/zuc.ini | 31 + .../guides/cryptodevs/img/scheduler-overview.svg | 277 ++ src/spdk/dpdk/doc/guides/cryptodevs/index.rst | 31 + src/spdk/dpdk/doc/guides/cryptodevs/kasumi.rst | 138 + src/spdk/dpdk/doc/guides/cryptodevs/mvsam.rst | 112 + src/spdk/dpdk/doc/guides/cryptodevs/nitrox.rst | 53 + src/spdk/dpdk/doc/guides/cryptodevs/null.rst | 71 + src/spdk/dpdk/doc/guides/cryptodevs/octeontx.rst | 147 + src/spdk/dpdk/doc/guides/cryptodevs/octeontx2.rst | 159 + src/spdk/dpdk/doc/guides/cryptodevs/openssl.rst | 113 + src/spdk/dpdk/doc/guides/cryptodevs/overview.rst | 76 + src/spdk/dpdk/doc/guides/cryptodevs/qat.rst | 698 +++ src/spdk/dpdk/doc/guides/cryptodevs/scheduler.rst | 182 + src/spdk/dpdk/doc/guides/cryptodevs/snow3g.rst | 119 + src/spdk/dpdk/doc/guides/cryptodevs/virtio.rst | 117 + src/spdk/dpdk/doc/guides/cryptodevs/zuc.rst | 119 + src/spdk/dpdk/doc/guides/custom.css | 7 + src/spdk/dpdk/doc/guides/eventdevs/dpaa.rst | 102 + src/spdk/dpdk/doc/guides/eventdevs/dpaa2.rst | 118 + src/spdk/dpdk/doc/guides/eventdevs/dsw.rst | 96 + src/spdk/dpdk/doc/guides/eventdevs/index.rst | 20 + src/spdk/dpdk/doc/guides/eventdevs/octeontx.rst | 148 + src/spdk/dpdk/doc/guides/eventdevs/octeontx2.rst | 174 + src/spdk/dpdk/doc/guides/eventdevs/opdl.rst | 136 + src/spdk/dpdk/doc/guides/eventdevs/sw.rst | 132 + src/spdk/dpdk/doc/guides/faq/faq.rst | 197 + src/spdk/dpdk/doc/guides/faq/index.rst | 13 + .../dpdk/doc/guides/freebsd_gsg/build_dpdk.rst | 252 + .../doc/guides/freebsd_gsg/build_sample_apps.rst | 117 + .../guides/freebsd_gsg/freebsd_eal_parameters.rst | 20 + src/spdk/dpdk/doc/guides/freebsd_gsg/index.rst | 17 + .../doc/guides/freebsd_gsg/install_from_ports.rst | 125 + src/spdk/dpdk/doc/guides/freebsd_gsg/intro.rst | 55 + .../dpdk/doc/guides/howto/debug_troubleshoot.rst | 460 ++ .../dpdk/doc/guides/howto/flow_bifurcation.rst | 70 + .../doc/guides/howto/img/dtg_consumer_ring.svg | 24 + src/spdk/dpdk/doc/guides/howto/img/dtg_crypto.svg | 21 + .../guides/howto/img/dtg_distributor_worker.svg | 36 + src/spdk/dpdk/doc/guides/howto/img/dtg_mempool.svg | 27 + src/spdk/dpdk/doc/guides/howto/img/dtg_pdump.svg | 33 + .../doc/guides/howto/img/dtg_producer_ring.svg | 24 + src/spdk/dpdk/doc/guides/howto/img/dtg_qos_tx.svg | 29 + src/spdk/dpdk/doc/guides/howto/img/dtg_rx_rate.svg | 25 + .../dpdk/doc/guides/howto/img/dtg_rx_tx_drop.svg | 33 + .../doc/guides/howto/img/dtg_sample_app_model.svg | 110 + src/spdk/dpdk/doc/guides/howto/img/dtg_service.svg | 20 + .../guides/howto/img/flow_bifurcation_overview.svg | 544 +++ .../doc/guides/howto/img/lm_bond_virtio_sriov.svg | 666 +++ .../dpdk/doc/guides/howto/img/lm_vhost_user.svg | 644 +++ .../guides/howto/img/packet_capture_framework.svg | 471 ++ src/spdk/dpdk/doc/guides/howto/img/pvp_2nics.svg | 556 +++ .../use_models_for_running_dpdk_in_containers.svg | 398 ++ .../doc/guides/howto/img/vf_daemon_overview.svg | 408 ++ .../howto/img/virtio_user_as_exceptional_path.svg | 207 + .../img/virtio_user_for_container_networking.svg | 685 +++ src/spdk/dpdk/doc/guides/howto/index.rst | 22 + .../dpdk/doc/guides/howto/lm_bond_virtio_sriov.rst | 686 +++ .../dpdk/doc/guides/howto/lm_virtio_vhost_user.rst | 441 ++ src/spdk/dpdk/doc/guides/howto/openwrt.rst | 163 + .../doc/guides/howto/packet_capture_framework.rst | 111 + .../doc/guides/howto/pvp_reference_benchmark.rst | 372 ++ src/spdk/dpdk/doc/guides/howto/rte_flow.rst | 305 ++ src/spdk/dpdk/doc/guides/howto/telemetry.rst | 80 + src/spdk/dpdk/doc/guides/howto/vfd.rst | 379 ++ .../howto/virtio_user_as_exceptional_path.rst | 118 + .../howto/virtio_user_for_container_networking.rst | 118 + src/spdk/dpdk/doc/guides/index.rst | 29 + src/spdk/dpdk/doc/guides/linux_gsg/build_dpdk.rst | 249 + .../doc/guides/linux_gsg/build_sample_apps.rst | 250 + .../linux_gsg/cross_build_dpdk_for_arm64.rst | 143 + .../dpdk/doc/guides/linux_gsg/eal_args.include.rst | 212 + src/spdk/dpdk/doc/guides/linux_gsg/enable_func.rst | 159 + src/spdk/dpdk/doc/guides/linux_gsg/index.rst | 22 + src/spdk/dpdk/doc/guides/linux_gsg/intro.rst | 36 + .../dpdk/doc/guides/linux_gsg/linux_drivers.rst | 196 + .../doc/guides/linux_gsg/linux_eal_parameters.rst | 116 + .../guides/linux_gsg/nic_perf_intel_platform.rst | 188 + src/spdk/dpdk/doc/guides/linux_gsg/quick_start.rst | 304 ++ src/spdk/dpdk/doc/guides/linux_gsg/sys_reqs.rst | 219 + src/spdk/dpdk/doc/guides/mempool/index.rst | 15 + src/spdk/dpdk/doc/guides/mempool/octeontx.rst | 74 + src/spdk/dpdk/doc/guides/mempool/octeontx2.rst | 100 + src/spdk/dpdk/doc/guides/meson.build | 24 + src/spdk/dpdk/doc/guides/nics/af_packet.rst | 67 + src/spdk/dpdk/doc/guides/nics/af_xdp.rst | 79 + src/spdk/dpdk/doc/guides/nics/ark.rst | 231 + src/spdk/dpdk/doc/guides/nics/atlantic.rst | 53 + src/spdk/dpdk/doc/guides/nics/avp.rst | 85 + src/spdk/dpdk/doc/guides/nics/axgbe.rst | 89 + src/spdk/dpdk/doc/guides/nics/bnx2x.rst | 234 + src/spdk/dpdk/doc/guides/nics/bnxt.rst | 897 ++++ src/spdk/dpdk/doc/guides/nics/build_and_test.rst | 157 + src/spdk/dpdk/doc/guides/nics/cxgbe.rst | 856 ++++ src/spdk/dpdk/doc/guides/nics/dpaa.rst | 310 ++ src/spdk/dpdk/doc/guides/nics/dpaa2.rst | 561 +++ src/spdk/dpdk/doc/guides/nics/e1000em.rst | 155 + src/spdk/dpdk/doc/guides/nics/ena.rst | 265 ++ src/spdk/dpdk/doc/guides/nics/enetc.rst | 117 + src/spdk/dpdk/doc/guides/nics/enic.rst | 604 +++ src/spdk/dpdk/doc/guides/nics/fail_safe.rst | 246 + src/spdk/dpdk/doc/guides/nics/features.rst | 934 ++++ src/spdk/dpdk/doc/guides/nics/features/af_xdp.ini | 11 + .../dpdk/doc/guides/nics/features/afpacket.ini | 6 + src/spdk/dpdk/doc/guides/nics/features/ark.ini | 15 + .../dpdk/doc/guides/nics/features/atlantic.ini | 38 + src/spdk/dpdk/doc/guides/nics/features/avp.ini | 16 + src/spdk/dpdk/doc/guides/nics/features/axgbe.ini | 20 + src/spdk/dpdk/doc/guides/nics/features/bnx2x.ini | 18 + src/spdk/dpdk/doc/guides/nics/features/bnxt.ini | 50 + src/spdk/dpdk/doc/guides/nics/features/cxgbe.ini | 35 + src/spdk/dpdk/doc/guides/nics/features/cxgbevf.ini | 29 + src/spdk/dpdk/doc/guides/nics/features/default.ini | 79 + src/spdk/dpdk/doc/guides/nics/features/dpaa.ini | 24 + src/spdk/dpdk/doc/guides/nics/features/dpaa2.ini | 28 + src/spdk/dpdk/doc/guides/nics/features/e1000.ini | 32 + src/spdk/dpdk/doc/guides/nics/features/ena.ini | 24 + src/spdk/dpdk/doc/guides/nics/features/enetc.ini | 20 + src/spdk/dpdk/doc/guides/nics/features/enic.ini | 41 + .../dpdk/doc/guides/nics/features/failsafe.ini | 31 + src/spdk/dpdk/doc/guides/nics/features/fm10k.ini | 39 + .../dpdk/doc/guides/nics/features/fm10k_vf.ini | 31 + src/spdk/dpdk/doc/guides/nics/features/hinic.ini | 42 + src/spdk/dpdk/doc/guides/nics/features/hns3.ini | 35 + src/spdk/dpdk/doc/guides/nics/features/hns3_vf.ini | 32 + src/spdk/dpdk/doc/guides/nics/features/i40e.ini | 54 + src/spdk/dpdk/doc/guides/nics/features/i40e_vf.ini | 39 + src/spdk/dpdk/doc/guides/nics/features/iavf.ini | 36 + src/spdk/dpdk/doc/guides/nics/features/ice.ini | 45 + src/spdk/dpdk/doc/guides/nics/features/igb.ini | 47 + src/spdk/dpdk/doc/guides/nics/features/igb_vf.ini | 30 + src/spdk/dpdk/doc/guides/nics/features/igc.ini | 38 + src/spdk/dpdk/doc/guides/nics/features/ionic.ini | 36 + src/spdk/dpdk/doc/guides/nics/features/ipn3ke.ini | 51 + src/spdk/dpdk/doc/guides/nics/features/ixgbe.ini | 58 + .../dpdk/doc/guides/nics/features/ixgbe_vf.ini | 41 + .../dpdk/doc/guides/nics/features/liquidio.ini | 31 + src/spdk/dpdk/doc/guides/nics/features/memif.ini | 14 + src/spdk/dpdk/doc/guides/nics/features/mlx4.ini | 40 + src/spdk/dpdk/doc/guides/nics/features/mlx5.ini | 52 + src/spdk/dpdk/doc/guides/nics/features/mvneta.ini | 19 + src/spdk/dpdk/doc/guides/nics/features/mvpp2.ini | 25 + src/spdk/dpdk/doc/guides/nics/features/netvsc.ini | 24 + src/spdk/dpdk/doc/guides/nics/features/nfb.ini | 17 + src/spdk/dpdk/doc/guides/nics/features/nfp.ini | 29 + .../dpdk/doc/guides/nics/features/octeontx.ini | 29 + .../dpdk/doc/guides/nics/features/octeontx2.ini | 56 + .../doc/guides/nics/features/octeontx2_vec.ini | 50 + .../dpdk/doc/guides/nics/features/octeontx2_vf.ini | 47 + src/spdk/dpdk/doc/guides/nics/features/pcap.ini | 15 + src/spdk/dpdk/doc/guides/nics/features/pfe.ini | 17 + src/spdk/dpdk/doc/guides/nics/features/qede.ini | 40 + src/spdk/dpdk/doc/guides/nics/features/qede_vf.ini | 38 + src/spdk/dpdk/doc/guides/nics/features/sfc_efx.ini | 43 + .../dpdk/doc/guides/nics/features/szedata2.ini | 18 + src/spdk/dpdk/doc/guides/nics/features/tap.ini | 28 + .../dpdk/doc/guides/nics/features/thunderx.ini | 31 + src/spdk/dpdk/doc/guides/nics/features/vhost.ini | 13 + src/spdk/dpdk/doc/guides/nics/features/virtio.ini | 30 + src/spdk/dpdk/doc/guides/nics/features/vmxnet3.ini | 30 + src/spdk/dpdk/doc/guides/nics/fm10k.rst | 174 + src/spdk/dpdk/doc/guides/nics/hinic.rst | 68 + src/spdk/dpdk/doc/guides/nics/hns3.rst | 62 + src/spdk/dpdk/doc/guides/nics/i40e.rst | 821 ++++ src/spdk/dpdk/doc/guides/nics/ice.rst | 317 ++ src/spdk/dpdk/doc/guides/nics/igb.rst | 38 + src/spdk/dpdk/doc/guides/nics/igc.rst | 121 + src/spdk/dpdk/doc/guides/nics/img/console.png | Bin 0 -> 40850 bytes .../dpdk/doc/guides/nics/img/fast_pkt_proc.png | Bin 0 -> 355905 bytes .../dpdk/doc/guides/nics/img/forward_stats.png | Bin 0 -> 8849 bytes .../dpdk/doc/guides/nics/img/host_vm_comms.png | Bin 0 -> 16487 bytes .../doc/guides/nics/img/host_vm_comms_qemu.png | Bin 0 -> 15383 bytes src/spdk/dpdk/doc/guides/nics/img/ice_dcf.svg | 516 ++ .../doc/guides/nics/img/intel_perf_test_setup.svg | 507 ++ .../dpdk/doc/guides/nics/img/inter_vm_comms.png | Bin 0 -> 370244 bytes src/spdk/dpdk/doc/guides/nics/img/mvpp2_tm.svg | 71 + .../dpdk/doc/guides/nics/img/perf_benchmark.png | Bin 0 -> 392248 bytes .../dpdk/doc/guides/nics/img/single_port_nic.png | Bin 0 -> 425314 bytes .../nics/img/szedata2_nfb200g_architecture.svg | 214 + src/spdk/dpdk/doc/guides/nics/img/vm_vm_comms.png | Bin 0 -> 172288 bytes src/spdk/dpdk/doc/guides/nics/img/vmxnet3_int.png | Bin 0 -> 107542 bytes src/spdk/dpdk/doc/guides/nics/img/vswitch_vm.png | Bin 0 -> 123082 bytes src/spdk/dpdk/doc/guides/nics/index.rst | 65 + src/spdk/dpdk/doc/guides/nics/intel_vf.rst | 617 +++ src/spdk/dpdk/doc/guides/nics/ionic.rst | 41 + src/spdk/dpdk/doc/guides/nics/ipn3ke.rst | 107 + src/spdk/dpdk/doc/guides/nics/ixgbe.rst | 314 ++ src/spdk/dpdk/doc/guides/nics/kni.rst | 170 + src/spdk/dpdk/doc/guides/nics/liquidio.rst | 196 + src/spdk/dpdk/doc/guides/nics/memif.rst | 292 ++ src/spdk/dpdk/doc/guides/nics/mlx4.rst | 493 ++ src/spdk/dpdk/doc/guides/nics/mlx5.rst | 1526 ++++++ src/spdk/dpdk/doc/guides/nics/mvneta.rst | 171 + src/spdk/dpdk/doc/guides/nics/mvpp2.rst | 785 +++ src/spdk/dpdk/doc/guides/nics/netvsc.rst | 118 + src/spdk/dpdk/doc/guides/nics/nfb.rst | 164 + src/spdk/dpdk/doc/guides/nics/nfp.rst | 168 + src/spdk/dpdk/doc/guides/nics/null.rst | 43 + src/spdk/dpdk/doc/guides/nics/octeontx.rst | 186 + src/spdk/dpdk/doc/guides/nics/octeontx2.rst | 406 ++ src/spdk/dpdk/doc/guides/nics/overview.rst | 34 + src/spdk/dpdk/doc/guides/nics/pcap_ring.rst | 322 ++ src/spdk/dpdk/doc/guides/nics/pfe.rst | 180 + src/spdk/dpdk/doc/guides/nics/qede.rst | 333 ++ src/spdk/dpdk/doc/guides/nics/sfc_efx.rst | 404 ++ src/spdk/dpdk/doc/guides/nics/softnic.rst | 370 ++ src/spdk/dpdk/doc/guides/nics/szedata2.rst | 162 + src/spdk/dpdk/doc/guides/nics/tap.rst | 300 ++ src/spdk/dpdk/doc/guides/nics/thunderx.rst | 430 ++ src/spdk/dpdk/doc/guides/nics/vdev_netvsc.rst | 104 + src/spdk/dpdk/doc/guides/nics/vhost.rst | 108 + src/spdk/dpdk/doc/guides/nics/virtio.rst | 567 +++ src/spdk/dpdk/doc/guides/nics/vmxnet3.rst | 162 + src/spdk/dpdk/doc/guides/platform/bluefield.rst | 143 + src/spdk/dpdk/doc/guides/platform/dpaa.rst | 103 + src/spdk/dpdk/doc/guides/platform/dpaa2.rst | 113 + .../img/octeontx2_packet_flow_hw_accelerators.svg | 2804 +++++++++++ .../img/octeontx2_resource_virtualization.svg | 2418 ++++++++++ src/spdk/dpdk/doc/guides/platform/index.rst | 17 + src/spdk/dpdk/doc/guides/platform/octeontx.rst | 161 + src/spdk/dpdk/doc/guides/platform/octeontx2.rst | 552 +++ src/spdk/dpdk/doc/guides/prog_guide/bbdev.rst | 1203 +++++ src/spdk/dpdk/doc/guides/prog_guide/bpf_lib.rst | 38 + .../dpdk/doc/guides/prog_guide/build-sdk-meson.rst | 203 + src/spdk/dpdk/doc/guides/prog_guide/build_app.rst | 99 + .../dpdk/doc/guides/prog_guide/compressdev.rst | 637 +++ .../dpdk/doc/guides/prog_guide/cryptodev_lib.rst | 1131 +++++ .../doc/guides/prog_guide/dev_kit_build_system.rst | 331 ++ .../guides/prog_guide/dev_kit_root_make_help.rst | 188 + src/spdk/dpdk/doc/guides/prog_guide/efd_lib.rst | 428 ++ .../guides/prog_guide/env_abstraction_layer.rst | 954 ++++ .../doc/guides/prog_guide/event_crypto_adapter.rst | 301 ++ .../prog_guide/event_ethernet_rx_adapter.rst | 192 + .../prog_guide/event_ethernet_tx_adapter.rst | 166 + .../doc/guides/prog_guide/event_timer_adapter.rst | 300 ++ src/spdk/dpdk/doc/guides/prog_guide/eventdev.rst | 390 ++ .../guides/prog_guide/ext_app_lib_make_help.rst | 98 + .../dpdk/doc/guides/prog_guide/extend_dpdk.rst | 109 + .../doc/guides/prog_guide/flow_classify_lib.rst | 415 ++ .../prog_guide/generic_receive_offload_lib.rst | 212 + .../generic_segmentation_offload_lib.rst | 239 + src/spdk/dpdk/doc/guides/prog_guide/glossary.rst | 244 + src/spdk/dpdk/doc/guides/prog_guide/graph_lib.rst | 397 ++ src/spdk/dpdk/doc/guides/prog_guide/hash_lib.rst | 298 ++ .../guides/prog_guide/img/anatomy_of_a_node.svg | 1078 +++++ .../prog_guide/img/architecture-overview.svg | 980 ++++ .../doc/guides/prog_guide/img/blk_diag_dropper.png | Bin 0 -> 55303 bytes .../dpdk/doc/guides/prog_guide/img/bond-mode-0.svg | 640 +++ .../dpdk/doc/guides/prog_guide/img/bond-mode-1.svg | 726 +++ .../dpdk/doc/guides/prog_guide/img/bond-mode-2.svg | 704 +++ .../dpdk/doc/guides/prog_guide/img/bond-mode-3.svg | 704 +++ .../dpdk/doc/guides/prog_guide/img/bond-mode-4.svg | 786 +++ .../dpdk/doc/guides/prog_guide/img/bond-mode-5.svg | 644 +++ .../doc/guides/prog_guide/img/bond-overview.svg | 123 + .../dpdk/doc/guides/prog_guide/img/crypto_op.svg | 75 + .../guides/prog_guide/img/crypto_xform_chain.svg | 149 + .../guides/prog_guide/img/cryptodev_sym_sess.svg | 417 ++ .../guides/prog_guide/img/data_struct_per_port.png | Bin 0 -> 58769 bytes .../guides/prog_guide/img/drop_probability_eq3.png | Bin 0 -> 3205 bytes .../guides/prog_guide/img/drop_probability_eq4.png | Bin 0 -> 2737 bytes .../prog_guide/img/drop_probability_graph.png | Bin 0 -> 62349 bytes src/spdk/dpdk/doc/guides/prog_guide/img/efd_i1.svg | 130 + .../dpdk/doc/guides/prog_guide/img/efd_i10.svg | 384 ++ .../dpdk/doc/guides/prog_guide/img/efd_i11.svg | 319 ++ .../dpdk/doc/guides/prog_guide/img/efd_i12.svg | 1008 ++++ src/spdk/dpdk/doc/guides/prog_guide/img/efd_i2.svg | 280 ++ src/spdk/dpdk/doc/guides/prog_guide/img/efd_i3.svg | 634 +++ src/spdk/dpdk/doc/guides/prog_guide/img/efd_i4.svg | 203 + src/spdk/dpdk/doc/guides/prog_guide/img/efd_i5.svg | 183 + src/spdk/dpdk/doc/guides/prog_guide/img/efd_i6.svg | 1254 +++++ src/spdk/dpdk/doc/guides/prog_guide/img/efd_i7.svg | 790 ++++ src/spdk/dpdk/doc/guides/prog_guide/img/efd_i8.svg | 182 + src/spdk/dpdk/doc/guides/prog_guide/img/efd_i9.svg | 390 ++ .../doc/guides/prog_guide/img/eq2_expression.png | Bin 0 -> 1614 bytes .../dpdk/doc/guides/prog_guide/img/eq2_factor.png | Bin 0 -> 995 bytes .../img/event_crypto_adapter_op_forward.svg | 1078 +++++ .../prog_guide/img/event_crypto_adapter_op_new.svg | 1061 +++++ .../doc/guides/prog_guide/img/eventdev_usage.svg | 549 +++ .../doc/guides/prog_guide/img/ewma_filter_eq_1.png | Bin 0 -> 840 bytes .../doc/guides/prog_guide/img/ewma_filter_eq_2.png | Bin 0 -> 1462 bytes .../prog_guide/img/ex_data_flow_tru_dropper.png | Bin 0 -> 32578 bytes .../dpdk/doc/guides/prog_guide/img/figure32.png | Bin 0 -> 11603 bytes .../dpdk/doc/guides/prog_guide/img/figure33.png | Bin 0 -> 65216 bytes .../dpdk/doc/guides/prog_guide/img/figure34.png | Bin 0 -> 11581 bytes .../dpdk/doc/guides/prog_guide/img/figure35.png | Bin 0 -> 75012 bytes .../dpdk/doc/guides/prog_guide/img/figure37.png | Bin 0 -> 6934 bytes .../dpdk/doc/guides/prog_guide/img/figure38.png | Bin 0 -> 7372 bytes .../dpdk/doc/guides/prog_guide/img/figure39.png | Bin 0 -> 55986 bytes .../guides/prog_guide/img/flow_tru_droppper.png | Bin 0 -> 30870 bytes .../doc/guides/prog_guide/img/graph_mem_layout.svg | 702 +++ .../guides/prog_guide/img/gro-key-algorithm.svg | 223 + .../prog_guide/img/gso-output-segment-format.svg | 313 ++ .../guides/prog_guide/img/gso-three-seg-mbuf.svg | 477 ++ .../doc/guides/prog_guide/img/hier_sched_blk.png | Bin 0 -> 36328 bytes .../doc/guides/prog_guide/img/kernel_nic_intf.png | Bin 0 -> 185839 bytes .../doc/guides/prog_guide/img/kni_traffic_flow.png | Bin 0 -> 366308 bytes .../doc/guides/prog_guide/img/link_the_nodes.svg | 3330 +++++++++++++ .../doc/guides/prog_guide/img/linuxapp_launch.svg | 731 +++ .../doc/guides/prog_guide/img/m_definition.png | Bin 0 -> 1261 bytes .../dpdk/doc/guides/prog_guide/img/malloc_heap.svg | 333 ++ src/spdk/dpdk/doc/guides/prog_guide/img/mbuf1.svg | 549 +++ src/spdk/dpdk/doc/guides/prog_guide/img/mbuf2.svg | 1229 +++++ .../dpdk/doc/guides/prog_guide/img/member_i1.svg | 1613 +++++++ .../dpdk/doc/guides/prog_guide/img/member_i2.svg | 36 + .../dpdk/doc/guides/prog_guide/img/member_i3.svg | 148 + .../dpdk/doc/guides/prog_guide/img/member_i4.svg | 450 ++ .../dpdk/doc/guides/prog_guide/img/member_i5.svg | 163 + .../dpdk/doc/guides/prog_guide/img/member_i6.svg | 332 ++ .../dpdk/doc/guides/prog_guide/img/member_i7.svg | 399 ++ .../guides/prog_guide/img/memory-management.svg | 2133 +++++++++ .../guides/prog_guide/img/memory-management2.svg | 2270 +++++++++ .../dpdk/doc/guides/prog_guide/img/mempool.svg | 2403 ++++++++++ .../guides/prog_guide/img/multi_process_memory.svg | 494 ++ .../guides/prog_guide/img/packet_distributor1.png | Bin 0 -> 99482 bytes .../guides/prog_guide/img/packet_distributor2.png | Bin 0 -> 102867 bytes .../doc/guides/prog_guide/img/pipe_prefetch_sm.png | Bin 0 -> 71898 bytes .../guides/prog_guide/img/pkt_drop_probability.png | Bin 0 -> 46368 bytes .../doc/guides/prog_guide/img/pkt_flow_kni.png | Bin 0 -> 51088 bytes .../prog_guide/img/pkt_proc_pipeline_qos.png | Bin 0 -> 93198 bytes .../guides/prog_guide/img/prefetch_pipeline.png | Bin 0 -> 56358 bytes .../doc/guides/prog_guide/img/rcu_general_info.svg | 509 ++ .../doc/guides/prog_guide/img/ring-dequeue1.svg | 659 +++ .../doc/guides/prog_guide/img/ring-dequeue2.svg | 622 +++ .../doc/guides/prog_guide/img/ring-dequeue3.svg | 617 +++ .../doc/guides/prog_guide/img/ring-enqueue1.svg | 568 +++ .../doc/guides/prog_guide/img/ring-enqueue2.svg | 612 +++ .../doc/guides/prog_guide/img/ring-enqueue3.svg | 607 +++ .../doc/guides/prog_guide/img/ring-modulo1.svg | 775 +++ .../doc/guides/prog_guide/img/ring-modulo2.svg | 820 ++++ .../doc/guides/prog_guide/img/ring-mp-enqueue1.svg | 707 +++ .../doc/guides/prog_guide/img/ring-mp-enqueue2.svg | 748 +++ .../doc/guides/prog_guide/img/ring-mp-enqueue3.svg | 790 ++++ .../doc/guides/prog_guide/img/ring-mp-enqueue4.svg | 785 +++ .../doc/guides/prog_guide/img/ring-mp-enqueue5.svg | 693 +++ src/spdk/dpdk/doc/guides/prog_guide/img/ring1.svg | 355 ++ .../guides/prog_guide/img/sched_hier_per_port.svg | 492 ++ .../dpdk/doc/guides/prog_guide/img/stateful-op.svg | 116 + .../guides/prog_guide/img/stateless-op-shared.svg | 124 + .../doc/guides/prog_guide/img/stateless-op.svg | 140 + .../dpdk/doc/guides/prog_guide/img/tbl24_tbl8.png | Bin 0 -> 95193 bytes .../doc/guides/prog_guide/img/tbl24_tbl8_tbl8.png | Bin 0 -> 114003 bytes .../doc/guides/prog_guide/img/turbo_tb_decode.svg | 1471 ++++++ .../doc/guides/prog_guide/img/turbo_tb_encode.svg | 1948 ++++++++ .../doc/guides/prog_guide/img/vhost_net_arch.png | Bin 0 -> 251431 bytes src/spdk/dpdk/doc/guides/prog_guide/index.rst | 75 + src/spdk/dpdk/doc/guides/prog_guide/intro.rst | 56 + .../prog_guide/ip_fragment_reassembly_lib.rst | 110 + src/spdk/dpdk/doc/guides/prog_guide/ipsec_lib.rst | 324 ++ .../doc/guides/prog_guide/kernel_nic_interface.rst | 323 ++ .../prog_guide/link_bonding_poll_mode_drv_lib.rst | 498 ++ src/spdk/dpdk/doc/guides/prog_guide/lpm6_lib.rst | 208 + src/spdk/dpdk/doc/guides/prog_guide/lpm_lib.rst | 198 + src/spdk/dpdk/doc/guides/prog_guide/lto.rst | 43 + src/spdk/dpdk/doc/guides/prog_guide/mbuf_lib.rst | 252 + src/spdk/dpdk/doc/guides/prog_guide/member_lib.rst | 392 ++ .../dpdk/doc/guides/prog_guide/mempool_lib.rst | 155 + src/spdk/dpdk/doc/guides/prog_guide/meson_ut.rst | 66 + .../dpdk/doc/guides/prog_guide/metrics_lib.rst | 296 ++ .../doc/guides/prog_guide/multi_proc_support.rst | 353 ++ src/spdk/dpdk/doc/guides/prog_guide/overview.rst | 170 + .../prog_guide/packet_classif_access_ctrl.rst | 550 +++ .../doc/guides/prog_guide/packet_distrib_lib.rst | 94 + .../doc/guides/prog_guide/packet_framework.rst | 1150 +++++ src/spdk/dpdk/doc/guides/prog_guide/pdump_lib.rst | 86 + .../doc/guides/prog_guide/perf_opt_guidelines.rst | 19 + .../dpdk/doc/guides/prog_guide/poll_mode_drv.rst | 617 +++ src/spdk/dpdk/doc/guides/prog_guide/power_man.rst | 202 + .../dpdk/doc/guides/prog_guide/profile_app.rst | 101 + .../dpdk/doc/guides/prog_guide/qos_framework.rst | 1741 +++++++ src/spdk/dpdk/doc/guides/prog_guide/rawdev.rst | 107 + src/spdk/dpdk/doc/guides/prog_guide/rcu_lib.rst | 251 + .../dpdk/doc/guides/prog_guide/reorder_lib.rst | 88 + src/spdk/dpdk/doc/guides/prog_guide/ring_lib.rst | 454 ++ src/spdk/dpdk/doc/guides/prog_guide/rte_flow.rst | 3251 +++++++++++++ .../dpdk/doc/guides/prog_guide/rte_security.rst | 659 +++ .../dpdk/doc/guides/prog_guide/service_cores.rst | 54 + src/spdk/dpdk/doc/guides/prog_guide/source_org.rst | 63 + src/spdk/dpdk/doc/guides/prog_guide/stack_lib.rst | 83 + .../guides/prog_guide/switch_representation.rst | 835 ++++ .../dpdk/doc/guides/prog_guide/telemetry_lib.rst | 62 + .../prog_guide/thread_safety_dpdk_functions.rst | 75 + src/spdk/dpdk/doc/guides/prog_guide/timer_lib.rst | 77 + src/spdk/dpdk/doc/guides/prog_guide/trace_lib.rst | 357 ++ .../doc/guides/prog_guide/traffic_management.rst | 223 + .../prog_guide/traffic_metering_and_policing.rst | 74 + src/spdk/dpdk/doc/guides/prog_guide/vhost_lib.rst | 381 ++ .../guides/prog_guide/writing_efficient_code.rst | 220 + src/spdk/dpdk/doc/guides/rawdevs/dpaa2_cmdif.rst | 104 + src/spdk/dpdk/doc/guides/rawdevs/dpaa2_qdma.rst | 101 + src/spdk/dpdk/doc/guides/rawdevs/ifpga.rst | 112 + src/spdk/dpdk/doc/guides/rawdevs/index.rst | 20 + src/spdk/dpdk/doc/guides/rawdevs/ioat.rst | 265 ++ src/spdk/dpdk/doc/guides/rawdevs/ntb.rst | 154 + src/spdk/dpdk/doc/guides/rawdevs/octeontx2_dma.rst | 115 + src/spdk/dpdk/doc/guides/rawdevs/octeontx2_ep.rst | 89 + src/spdk/dpdk/doc/guides/rel_notes/deprecation.rst | 140 + src/spdk/dpdk/doc/guides/rel_notes/index.rst | 33 + .../dpdk/doc/guides/rel_notes/known_issues.rst | 890 ++++ .../dpdk/doc/guides/rel_notes/release_16_04.rst | 652 +++ .../dpdk/doc/guides/rel_notes/release_16_07.rst | 552 +++ .../dpdk/doc/guides/rel_notes/release_16_11.rst | 603 +++ .../dpdk/doc/guides/rel_notes/release_17_02.rst | 694 +++ .../dpdk/doc/guides/rel_notes/release_17_05.rst | 839 ++++ .../dpdk/doc/guides/rel_notes/release_17_08.rst | 617 +++ .../dpdk/doc/guides/rel_notes/release_17_11.rst | 838 ++++ .../dpdk/doc/guides/rel_notes/release_18_02.rst | 570 +++ .../dpdk/doc/guides/rel_notes/release_18_05.rst | 983 ++++ .../dpdk/doc/guides/rel_notes/release_18_08.rst | 549 +++ .../dpdk/doc/guides/rel_notes/release_18_11.rst | 863 ++++ .../dpdk/doc/guides/rel_notes/release_19_02.rst | 665 +++ .../dpdk/doc/guides/rel_notes/release_19_05.rst | 726 +++ .../dpdk/doc/guides/rel_notes/release_19_08.rst | 748 +++ .../dpdk/doc/guides/rel_notes/release_19_11.rst | 919 ++++ src/spdk/dpdk/doc/guides/rel_notes/release_1_8.rst | 36 + .../dpdk/doc/guides/rel_notes/release_20_02.rst | 591 +++ .../dpdk/doc/guides/rel_notes/release_20_05.rst | 689 +++ src/spdk/dpdk/doc/guides/rel_notes/release_2_0.rst | 111 + src/spdk/dpdk/doc/guides/rel_notes/release_2_1.rst | 1014 ++++ src/spdk/dpdk/doc/guides/rel_notes/release_2_2.rst | 624 +++ .../dpdk/doc/guides/sample_app_ug/bbdev_app.rst | 133 + .../dpdk/doc/guides/sample_app_ug/cmd_line.rst | 158 + .../dpdk/doc/guides/sample_app_ug/compiling.rst | 108 + .../dpdk/doc/guides/sample_app_ug/dist_app.rst | 152 + src/spdk/dpdk/doc/guides/sample_app_ug/ethtool.rst | 116 + .../doc/guides/sample_app_ug/eventdev_pipeline.rst | 145 + .../doc/guides/sample_app_ug/fips_validation.rst | 132 + .../doc/guides/sample_app_ug/flow_classify.rst | 544 +++ .../doc/guides/sample_app_ug/flow_filtering.rst | 515 ++ .../dpdk/doc/guides/sample_app_ug/hello_world.rst | 94 + .../img/client_svr_sym_multi_proc_app.png | Bin 0 -> 192400 bytes .../dpdk/doc/guides/sample_app_ug/img/dist_app.svg | 474 ++ .../doc/guides/sample_app_ug/img/dist_perf.svg | 462 ++ .../doc/guides/sample_app_ug/img/example_rules.png | Bin 0 -> 4342 bytes .../sample_app_ug/img/exception_path_example.svg | 102 + .../guides/sample_app_ug/img/ipsec_endpoints.svg | 850 ++++ .../doc/guides/sample_app_ug/img/ipv4_acl_rule.png | Bin 0 -> 2791 bytes .../doc/guides/sample_app_ug/img/kernel_nic.png | Bin 0 -> 36245 bytes .../sample_app_ug/img/l2_fwd_benchmark_setup.svg | 520 ++ .../sample_app_ug/img/l2_fwd_encrypt_flow.svg | 194 + .../img/l2_fwd_virtenv_benchmark_setup.png | Bin 0 -> 86633 bytes .../doc/guides/sample_app_ug/img/l2_fwd_vm2vm.svg | 311 ++ .../guides/sample_app_ug/img/load_bal_app_arch.png | Bin 0 -> 96131 bytes .../sample_app_ug/img/overlay_networking.svg | 1847 ++++++++ .../sample_app_ug/img/performance_thread_1.svg | 799 ++++ .../sample_app_ug/img/performance_thread_2.svg | 865 ++++ .../guides/sample_app_ug/img/pipeline_overview.png | Bin 0 -> 16728 bytes .../doc/guides/sample_app_ug/img/ptpclient.svg | 528 +++ .../sample_app_ug/img/qos_sched_app_arch.png | Bin 0 -> 65558 bytes .../img/quickassist_block_diagram.png | Bin 0 -> 30748 bytes .../sample_app_ug/img/ring_pipeline_perf_setup.png | Bin 0 -> 32456 bytes .../guides/sample_app_ug/img/server_node_efd.svg | 1254 +++++ .../sample_app_ug/img/sym_multi_proc_app.png | Bin 0 -> 198226 bytes .../sample_app_ug/img/tep_termination_arch.svg | 1400 ++++++ .../guides/sample_app_ug/img/test_pipeline_app.png | Bin 0 -> 67410 bytes .../guides/sample_app_ug/img/threads_pipelines.png | Bin 0 -> 15578 bytes .../sample_app_ug/img/vm_power_mgr_highlevel.svg | 1345 ++++++ .../img/vm_power_mgr_vm_request_seq.svg | 700 +++ .../guides/sample_app_ug/img/vmdq_dcb_example.svg | 764 +++ src/spdk/dpdk/doc/guides/sample_app_ug/index.rst | 60 + src/spdk/dpdk/doc/guides/sample_app_ug/intro.rst | 96 + src/spdk/dpdk/doc/guides/sample_app_ug/ioat.rst | 581 +++ src/spdk/dpdk/doc/guides/sample_app_ug/ip_frag.rst | 140 + .../dpdk/doc/guides/sample_app_ug/ip_pipeline.rst | 561 +++ .../doc/guides/sample_app_ug/ip_reassembly.rst | 238 + .../dpdk/doc/guides/sample_app_ug/ipsec_secgw.rst | 958 ++++ .../doc/guides/sample_app_ug/ipv4_multicast.rst | 325 ++ .../dpdk/doc/guides/sample_app_ug/keep_alive.rst | 144 + .../guides/sample_app_ug/kernel_nic_interface.rst | 318 ++ .../doc/guides/sample_app_ug/l2_forward_cat.rst | 207 + .../doc/guides/sample_app_ug/l2_forward_crypto.rst | 484 ++ .../doc/guides/sample_app_ug/l2_forward_event.rst | 692 +++ .../guides/sample_app_ug/l2_forward_job_stats.rst | 550 +++ .../sample_app_ug/l2_forward_real_virtual.rst | 455 ++ .../dpdk/doc/guides/sample_app_ug/l3_forward.rst | 384 ++ .../sample_app_ug/l3_forward_access_ctrl.rst | 339 ++ .../doc/guides/sample_app_ug/l3_forward_graph.rst | 334 ++ .../guides/sample_app_ug/l3_forward_power_man.rst | 461 ++ .../doc/guides/sample_app_ug/link_status_intr.rst | 415 ++ .../doc/guides/sample_app_ug/multi_process.rst | 323 ++ src/spdk/dpdk/doc/guides/sample_app_ug/ntb.rst | 94 + .../doc/guides/sample_app_ug/packet_ordering.rst | 60 + .../guides/sample_app_ug/performance_thread.rst | 1221 +++++ .../dpdk/doc/guides/sample_app_ug/ptpclient.rst | 252 + .../dpdk/doc/guides/sample_app_ug/qos_metering.rst | 155 + .../doc/guides/sample_app_ug/qos_scheduler.rst | 375 ++ .../doc/guides/sample_app_ug/rxtx_callbacks.rst | 207 + .../doc/guides/sample_app_ug/server_node_efd.rst | 450 ++ .../doc/guides/sample_app_ug/service_cores.rst | 145 + .../dpdk/doc/guides/sample_app_ug/skeleton.rst | 290 ++ .../doc/guides/sample_app_ug/tep_termination.rst | 233 + .../doc/guides/sample_app_ug/test_pipeline.rst | 239 + src/spdk/dpdk/doc/guides/sample_app_ug/timer.rst | 176 + src/spdk/dpdk/doc/guides/sample_app_ug/vdpa.rst | 120 + src/spdk/dpdk/doc/guides/sample_app_ug/vhost.rst | 210 + .../dpdk/doc/guides/sample_app_ug/vhost_blk.rst | 65 + .../dpdk/doc/guides/sample_app_ug/vhost_crypto.rst | 80 + .../guides/sample_app_ug/vm_power_management.rst | 942 ++++ .../guides/sample_app_ug/vmdq_dcb_forwarding.rst | 266 ++ .../doc/guides/sample_app_ug/vmdq_forwarding.rst | 208 + .../dpdk/doc/guides/testpmd_app_ug/build_app.rst | 34 + src/spdk/dpdk/doc/guides/testpmd_app_ug/index.rst | 16 + src/spdk/dpdk/doc/guides/testpmd_app_ug/intro.rst | 14 + .../dpdk/doc/guides/testpmd_app_ug/run_app.rst | 490 ++ .../doc/guides/testpmd_app_ug/testpmd_funcs.rst | 4993 ++++++++++++++++++++ src/spdk/dpdk/doc/guides/tools/comp_perf.rst | 120 + src/spdk/dpdk/doc/guides/tools/cryptoperf.rst | 469 ++ src/spdk/dpdk/doc/guides/tools/devbind.rst | 116 + .../guides/tools/img/eventdev_order_atq_test.svg | 1546 ++++++ .../guides/tools/img/eventdev_order_queue_test.svg | 1643 +++++++ .../guides/tools/img/eventdev_perf_atq_test.svg | 3158 +++++++++++++ .../guides/tools/img/eventdev_perf_queue_test.svg | 2569 ++++++++++ .../img/eventdev_pipeline_atq_test_generic.svg | 3465 ++++++++++++++ .../eventdev_pipeline_atq_test_internal_port.svg | 3344 +++++++++++++ .../img/eventdev_pipeline_queue_test_generic.svg | 3688 +++++++++++++++ .../eventdev_pipeline_queue_test_internal_port.svg | 3826 +++++++++++++++ src/spdk/dpdk/doc/guides/tools/index.rst | 18 + src/spdk/dpdk/doc/guides/tools/pdump.rst | 124 + src/spdk/dpdk/doc/guides/tools/pmdinfo.rst | 29 + src/spdk/dpdk/doc/guides/tools/proc_info.rst | 85 + src/spdk/dpdk/doc/guides/tools/testbbdev.rst | 823 ++++ src/spdk/dpdk/doc/guides/tools/testeventdev.rst | 729 +++ .../dpdk/doc/guides/vdpadevs/features/default.ini | 50 + .../dpdk/doc/guides/vdpadevs/features/ifcvf.ini | 8 + .../dpdk/doc/guides/vdpadevs/features/mlx5.ini | 27 + .../dpdk/doc/guides/vdpadevs/features_overview.rst | 155 + src/spdk/dpdk/doc/guides/vdpadevs/ifc.rst | 106 + src/spdk/dpdk/doc/guides/vdpadevs/index.rst | 16 + src/spdk/dpdk/doc/guides/vdpadevs/mlx5.rst | 113 + .../dpdk/doc/guides/windows_gsg/build_dpdk.rst | 133 + src/spdk/dpdk/doc/guides/windows_gsg/index.rst | 14 + src/spdk/dpdk/doc/guides/windows_gsg/intro.rst | 20 + 592 files changed, 200282 insertions(+) create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/features/default.ini create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/features/fpga_5gnr_fec.ini create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/features/fpga_lte_fec.ini create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/features/mbc.ini create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/features/null.ini create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/features/turbo_sw.ini create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/fpga_5gnr_fec.rst create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/fpga_lte_fec.rst create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/index.rst create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/null.rst create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/overview.rst create mode 100644 src/spdk/dpdk/doc/guides/bbdevs/turbo_sw.rst create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/features/default.ini create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/features/isal.ini create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/features/octeontx.ini create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/features/qat.ini create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/features/zlib.ini create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/index.rst create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/isal.rst create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/octeontx.rst create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/overview.rst create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/qat_comp.rst create mode 100644 src/spdk/dpdk/doc/guides/compressdevs/zlib.rst create mode 100644 src/spdk/dpdk/doc/guides/conf.py create mode 100644 src/spdk/dpdk/doc/guides/contributing/abi_policy.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/abi_versioning.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/cheatsheet.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/coding_style.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/design.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/documentation.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/img/abi_stability_policy.svg create mode 100644 src/spdk/dpdk/doc/guides/contributing/img/patch_cheatsheet.svg create mode 100644 src/spdk/dpdk/doc/guides/contributing/img/what_is_an_abi.svg create mode 100644 src/spdk/dpdk/doc/guides/contributing/index.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/patches.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/stable.rst create mode 100644 src/spdk/dpdk/doc/guides/contributing/vulnerability.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/aesni_gcm.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/aesni_mb.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/armv8.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/caam_jr.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/ccp.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/dpaa2_sec.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/dpaa_sec.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_gcm.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_mb.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/armv8.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/caam_jr.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/ccp.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/default.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa2_sec.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa_sec.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/kasumi.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/mvsam.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/nitrox.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/null.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx2.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/openssl.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/qat.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/snow3g.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/virtio.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/features/zuc.ini create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/img/scheduler-overview.svg create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/index.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/kasumi.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/mvsam.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/nitrox.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/null.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/octeontx.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/octeontx2.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/openssl.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/overview.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/qat.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/scheduler.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/snow3g.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/virtio.rst create mode 100644 src/spdk/dpdk/doc/guides/cryptodevs/zuc.rst create mode 100644 src/spdk/dpdk/doc/guides/custom.css create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/dpaa.rst create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/dpaa2.rst create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/dsw.rst create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/index.rst create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/octeontx.rst create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/octeontx2.rst create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/opdl.rst create mode 100644 src/spdk/dpdk/doc/guides/eventdevs/sw.rst create mode 100644 src/spdk/dpdk/doc/guides/faq/faq.rst create mode 100644 src/spdk/dpdk/doc/guides/faq/index.rst create mode 100644 src/spdk/dpdk/doc/guides/freebsd_gsg/build_dpdk.rst create mode 100644 src/spdk/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst create mode 100644 src/spdk/dpdk/doc/guides/freebsd_gsg/freebsd_eal_parameters.rst create mode 100644 src/spdk/dpdk/doc/guides/freebsd_gsg/index.rst create mode 100644 src/spdk/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst create mode 100644 src/spdk/dpdk/doc/guides/freebsd_gsg/intro.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/debug_troubleshoot.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/flow_bifurcation.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_consumer_ring.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_crypto.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_distributor_worker.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_mempool.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_pdump.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_producer_ring.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_qos_tx.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_rx_rate.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_rx_tx_drop.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_sample_app_model.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/dtg_service.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/flow_bifurcation_overview.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/lm_bond_virtio_sriov.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/lm_vhost_user.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/packet_capture_framework.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/pvp_2nics.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/use_models_for_running_dpdk_in_containers.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/vf_daemon_overview.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/virtio_user_as_exceptional_path.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg create mode 100644 src/spdk/dpdk/doc/guides/howto/index.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/lm_bond_virtio_sriov.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/lm_virtio_vhost_user.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/openwrt.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/packet_capture_framework.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/pvp_reference_benchmark.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/rte_flow.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/telemetry.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/vfd.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/virtio_user_as_exceptional_path.rst create mode 100644 src/spdk/dpdk/doc/guides/howto/virtio_user_for_container_networking.rst create mode 100644 src/spdk/dpdk/doc/guides/index.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/build_dpdk.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/build_sample_apps.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/cross_build_dpdk_for_arm64.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/eal_args.include.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/enable_func.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/index.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/intro.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/linux_drivers.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/linux_eal_parameters.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/quick_start.rst create mode 100644 src/spdk/dpdk/doc/guides/linux_gsg/sys_reqs.rst create mode 100644 src/spdk/dpdk/doc/guides/mempool/index.rst create mode 100644 src/spdk/dpdk/doc/guides/mempool/octeontx.rst create mode 100644 src/spdk/dpdk/doc/guides/mempool/octeontx2.rst create mode 100644 src/spdk/dpdk/doc/guides/meson.build create mode 100644 src/spdk/dpdk/doc/guides/nics/af_packet.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/af_xdp.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/ark.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/atlantic.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/avp.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/axgbe.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/bnx2x.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/bnxt.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/build_and_test.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/cxgbe.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/dpaa.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/dpaa2.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/e1000em.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/ena.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/enetc.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/enic.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/fail_safe.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/features.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/features/af_xdp.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/afpacket.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/ark.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/atlantic.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/avp.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/axgbe.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/bnx2x.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/bnxt.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/cxgbe.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/cxgbevf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/default.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/dpaa.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/dpaa2.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/e1000.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/ena.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/enetc.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/enic.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/failsafe.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/fm10k.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/fm10k_vf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/hinic.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/hns3.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/hns3_vf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/i40e.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/i40e_vf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/iavf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/ice.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/igb.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/igb_vf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/igc.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/ionic.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/ipn3ke.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/ixgbe.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/ixgbe_vf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/liquidio.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/memif.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/mlx4.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/mlx5.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/mvneta.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/mvpp2.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/netvsc.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/nfb.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/nfp.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/octeontx.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/octeontx2.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/octeontx2_vec.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/octeontx2_vf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/pcap.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/pfe.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/qede.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/qede_vf.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/sfc_efx.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/szedata2.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/tap.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/thunderx.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/vhost.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/virtio.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/features/vmxnet3.ini create mode 100644 src/spdk/dpdk/doc/guides/nics/fm10k.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/hinic.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/hns3.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/i40e.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/ice.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/igb.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/igc.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/img/console.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/fast_pkt_proc.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/forward_stats.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/host_vm_comms.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/host_vm_comms_qemu.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/ice_dcf.svg create mode 100644 src/spdk/dpdk/doc/guides/nics/img/intel_perf_test_setup.svg create mode 100644 src/spdk/dpdk/doc/guides/nics/img/inter_vm_comms.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/mvpp2_tm.svg create mode 100644 src/spdk/dpdk/doc/guides/nics/img/perf_benchmark.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/single_port_nic.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/szedata2_nfb200g_architecture.svg create mode 100644 src/spdk/dpdk/doc/guides/nics/img/vm_vm_comms.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/vmxnet3_int.png create mode 100644 src/spdk/dpdk/doc/guides/nics/img/vswitch_vm.png create mode 100644 src/spdk/dpdk/doc/guides/nics/index.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/intel_vf.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/ionic.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/ipn3ke.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/ixgbe.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/kni.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/liquidio.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/memif.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/mlx4.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/mlx5.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/mvneta.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/mvpp2.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/netvsc.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/nfb.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/nfp.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/null.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/octeontx.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/octeontx2.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/overview.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/pcap_ring.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/pfe.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/qede.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/sfc_efx.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/softnic.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/szedata2.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/tap.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/thunderx.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/vdev_netvsc.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/vhost.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/virtio.rst create mode 100644 src/spdk/dpdk/doc/guides/nics/vmxnet3.rst create mode 100644 src/spdk/dpdk/doc/guides/platform/bluefield.rst create mode 100644 src/spdk/dpdk/doc/guides/platform/dpaa.rst create mode 100644 src/spdk/dpdk/doc/guides/platform/dpaa2.rst create mode 100644 src/spdk/dpdk/doc/guides/platform/img/octeontx2_packet_flow_hw_accelerators.svg create mode 100644 src/spdk/dpdk/doc/guides/platform/img/octeontx2_resource_virtualization.svg create mode 100644 src/spdk/dpdk/doc/guides/platform/index.rst create mode 100644 src/spdk/dpdk/doc/guides/platform/octeontx.rst create mode 100644 src/spdk/dpdk/doc/guides/platform/octeontx2.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/bbdev.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/bpf_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/build-sdk-meson.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/build_app.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/compressdev.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/cryptodev_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/dev_kit_build_system.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/dev_kit_root_make_help.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/efd_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/event_crypto_adapter.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/event_ethernet_rx_adapter.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/event_ethernet_tx_adapter.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/event_timer_adapter.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/eventdev.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/ext_app_lib_make_help.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/extend_dpdk.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/flow_classify_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/generic_receive_offload_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/generic_segmentation_offload_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/glossary.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/graph_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/hash_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/anatomy_of_a_node.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/architecture-overview.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/blk_diag_dropper.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/bond-mode-0.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/bond-mode-1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/bond-mode-2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/bond-mode-3.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/bond-mode-4.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/bond-mode-5.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/bond-overview.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/crypto_op.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/crypto_xform_chain.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/cryptodev_sym_sess.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/data_struct_per_port.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/drop_probability_eq3.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/drop_probability_eq4.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/drop_probability_graph.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i10.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i11.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i12.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i3.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i4.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i5.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i6.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i7.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i8.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/efd_i9.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/eq2_expression.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/eq2_factor.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/event_crypto_adapter_op_forward.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/event_crypto_adapter_op_new.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/eventdev_usage.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ewma_filter_eq_1.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ewma_filter_eq_2.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ex_data_flow_tru_dropper.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/figure32.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/figure33.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/figure34.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/figure35.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/figure37.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/figure38.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/figure39.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/flow_tru_droppper.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/graph_mem_layout.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/gro-key-algorithm.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/gso-output-segment-format.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/gso-three-seg-mbuf.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/hier_sched_blk.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/kernel_nic_intf.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/kni_traffic_flow.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/link_the_nodes.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/linuxapp_launch.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/m_definition.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/malloc_heap.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/mbuf1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/mbuf2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/member_i1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/member_i2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/member_i3.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/member_i4.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/member_i5.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/member_i6.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/member_i7.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/memory-management.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/memory-management2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/mempool.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/multi_process_memory.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/packet_distributor1.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/packet_distributor2.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/pipe_prefetch_sm.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/pkt_drop_probability.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/pkt_flow_kni.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/pkt_proc_pipeline_qos.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/prefetch_pipeline.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/rcu_general_info.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-dequeue1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-dequeue2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-dequeue3.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-enqueue1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-enqueue2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-enqueue3.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-modulo1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-modulo2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue2.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue3.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue4.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue5.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/ring1.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/sched_hier_per_port.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/stateful-op.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/stateless-op-shared.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/stateless-op.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/tbl24_tbl8.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/tbl24_tbl8_tbl8.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/turbo_tb_decode.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/turbo_tb_encode.svg create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/img/vhost_net_arch.png create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/index.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/intro.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/ip_fragment_reassembly_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/ipsec_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/kernel_nic_interface.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/lpm6_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/lpm_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/lto.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/mbuf_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/member_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/mempool_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/meson_ut.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/metrics_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/multi_proc_support.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/overview.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/packet_classif_access_ctrl.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/packet_distrib_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/packet_framework.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/pdump_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/perf_opt_guidelines.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/poll_mode_drv.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/power_man.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/profile_app.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/qos_framework.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/rawdev.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/rcu_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/reorder_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/ring_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/rte_flow.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/rte_security.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/service_cores.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/source_org.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/stack_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/switch_representation.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/telemetry_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/thread_safety_dpdk_functions.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/timer_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/trace_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/traffic_management.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/traffic_metering_and_policing.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/vhost_lib.rst create mode 100644 src/spdk/dpdk/doc/guides/prog_guide/writing_efficient_code.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/dpaa2_cmdif.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/dpaa2_qdma.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/ifpga.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/index.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/ioat.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/ntb.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/octeontx2_dma.rst create mode 100644 src/spdk/dpdk/doc/guides/rawdevs/octeontx2_ep.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/deprecation.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/index.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/known_issues.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_16_04.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_16_07.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_16_11.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_17_02.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_17_05.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_17_08.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_17_11.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_18_02.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_18_05.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_18_08.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_18_11.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_19_02.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_19_05.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_19_08.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_19_11.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_1_8.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_20_02.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_20_05.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_2_0.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_2_1.rst create mode 100644 src/spdk/dpdk/doc/guides/rel_notes/release_2_2.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/bbdev_app.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/cmd_line.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/compiling.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/dist_app.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ethtool.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/eventdev_pipeline.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/fips_validation.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/flow_classify.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/flow_filtering.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/hello_world.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/client_svr_sym_multi_proc_app.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/dist_app.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/dist_perf.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/example_rules.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/exception_path_example.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/ipsec_endpoints.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/ipv4_acl_rule.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/kernel_nic.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/l2_fwd_benchmark_setup.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/l2_fwd_encrypt_flow.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/l2_fwd_virtenv_benchmark_setup.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/l2_fwd_vm2vm.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/load_bal_app_arch.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/overlay_networking.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/performance_thread_1.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/performance_thread_2.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/pipeline_overview.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/ptpclient.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/qos_sched_app_arch.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/quickassist_block_diagram.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/ring_pipeline_perf_setup.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/server_node_efd.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/sym_multi_proc_app.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/tep_termination_arch.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/test_pipeline_app.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/threads_pipelines.png create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/vm_power_mgr_highlevel.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/vm_power_mgr_vm_request_seq.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/img/vmdq_dcb_example.svg create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/index.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/intro.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ioat.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ip_frag.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ip_pipeline.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ip_reassembly.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ipsec_secgw.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ipv4_multicast.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/keep_alive.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/kernel_nic_interface.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l2_forward_cat.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l2_forward_crypto.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l2_forward_event.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l2_forward_job_stats.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l3_forward.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l3_forward_graph.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/l3_forward_power_man.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/link_status_intr.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/multi_process.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ntb.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/packet_ordering.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/performance_thread.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/ptpclient.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/qos_metering.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/qos_scheduler.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/rxtx_callbacks.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/server_node_efd.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/service_cores.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/skeleton.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/tep_termination.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/test_pipeline.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/timer.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/vdpa.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/vhost.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/vhost_blk.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/vhost_crypto.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/vm_power_management.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst create mode 100644 src/spdk/dpdk/doc/guides/sample_app_ug/vmdq_forwarding.rst create mode 100644 src/spdk/dpdk/doc/guides/testpmd_app_ug/build_app.rst create mode 100644 src/spdk/dpdk/doc/guides/testpmd_app_ug/index.rst create mode 100644 src/spdk/dpdk/doc/guides/testpmd_app_ug/intro.rst create mode 100644 src/spdk/dpdk/doc/guides/testpmd_app_ug/run_app.rst create mode 100644 src/spdk/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/comp_perf.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/cryptoperf.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/devbind.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_order_atq_test.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_order_queue_test.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_perf_atq_test.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_perf_queue_test.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_pipeline_atq_test_generic.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_pipeline_atq_test_internal_port.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_pipeline_queue_test_generic.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/img/eventdev_pipeline_queue_test_internal_port.svg create mode 100644 src/spdk/dpdk/doc/guides/tools/index.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/pdump.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/pmdinfo.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/proc_info.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/testbbdev.rst create mode 100644 src/spdk/dpdk/doc/guides/tools/testeventdev.rst create mode 100644 src/spdk/dpdk/doc/guides/vdpadevs/features/default.ini create mode 100644 src/spdk/dpdk/doc/guides/vdpadevs/features/ifcvf.ini create mode 100644 src/spdk/dpdk/doc/guides/vdpadevs/features/mlx5.ini create mode 100644 src/spdk/dpdk/doc/guides/vdpadevs/features_overview.rst create mode 100644 src/spdk/dpdk/doc/guides/vdpadevs/ifc.rst create mode 100644 src/spdk/dpdk/doc/guides/vdpadevs/index.rst create mode 100644 src/spdk/dpdk/doc/guides/vdpadevs/mlx5.rst create mode 100644 src/spdk/dpdk/doc/guides/windows_gsg/build_dpdk.rst create mode 100644 src/spdk/dpdk/doc/guides/windows_gsg/index.rst create mode 100644 src/spdk/dpdk/doc/guides/windows_gsg/intro.rst (limited to 'src/spdk/dpdk/doc/guides') diff --git a/src/spdk/dpdk/doc/guides/bbdevs/features/default.ini b/src/spdk/dpdk/doc/guides/bbdevs/features/default.ini new file mode 100644 index 000000000..5fe267a62 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/features/default.ini @@ -0,0 +1,16 @@ +; +; Features of a default bbdev driver. +; +; This file defines the features that are valid for inclusion in +; the other driver files and also the order that they appear in +; the features table in the documentation. +; +[Features] +Turbo Decoder (4G) = +Turbo Encoder (4G) = +LDPC Decoder (5G) = +LDPC Encoder (5G) = +LLR/HARQ Compression = +External DDR Access = +HW Accelerated = +BBDEV API = diff --git a/src/spdk/dpdk/doc/guides/bbdevs/features/fpga_5gnr_fec.ini b/src/spdk/dpdk/doc/guides/bbdevs/features/fpga_5gnr_fec.ini new file mode 100644 index 000000000..7a0b8d4e7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/features/fpga_5gnr_fec.ini @@ -0,0 +1,11 @@ +; +; Supported features of the 'fpga_5ngr_fec' bbdev driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +LDPC Decoder (5G) = Y +LDPC Encoder (5G) = Y +External DDR Access = Y +HW Accelerated = Y +BBDEV API = Y diff --git a/src/spdk/dpdk/doc/guides/bbdevs/features/fpga_lte_fec.ini b/src/spdk/dpdk/doc/guides/bbdevs/features/fpga_lte_fec.ini new file mode 100644 index 000000000..f1cfb924a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/features/fpga_lte_fec.ini @@ -0,0 +1,10 @@ +; +; Supported features of the 'fpga_lte_fec' bbdev driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Turbo Decoder (4G) = Y +Turbo Encoder (4G) = Y +HW Accelerated = Y +BBDEV API = Y diff --git a/src/spdk/dpdk/doc/guides/bbdevs/features/mbc.ini b/src/spdk/dpdk/doc/guides/bbdevs/features/mbc.ini new file mode 100644 index 000000000..78a7b95da --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/features/mbc.ini @@ -0,0 +1,14 @@ +; +; Supported features of the 'mbc' bbdev driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Turbo Decoder (4G) = Y +Turbo Encoder (4G) = Y +LDPC Decoder (5G) = Y +LDPC Encoder (5G) = Y +LLR/HARQ Compression = Y +External DDR Access = Y +HW Accelerated = Y +BBDEV API = Y diff --git a/src/spdk/dpdk/doc/guides/bbdevs/features/null.ini b/src/spdk/dpdk/doc/guides/bbdevs/features/null.ini new file mode 100644 index 000000000..d9bbda9cf --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/features/null.ini @@ -0,0 +1,7 @@ +; +; Supported features of the 'null' bbdev driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +BBDEV API = Y diff --git a/src/spdk/dpdk/doc/guides/bbdevs/features/turbo_sw.ini b/src/spdk/dpdk/doc/guides/bbdevs/features/turbo_sw.ini new file mode 100644 index 000000000..2c7075e21 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/features/turbo_sw.ini @@ -0,0 +1,11 @@ +; +; Supported features of the 'turbo_sw' bbdev driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Turbo Decoder (4G) = Y +Turbo Encoder (4G) = Y +LDPC Decoder (5G) = Y +LDPC Encoder (5G) = Y +BBDEV API = Y diff --git a/src/spdk/dpdk/doc/guides/bbdevs/fpga_5gnr_fec.rst b/src/spdk/dpdk/doc/guides/bbdevs/fpga_5gnr_fec.rst new file mode 100644 index 000000000..19bba3661 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/fpga_5gnr_fec.rst @@ -0,0 +1,297 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Intel Corporation + +Intel(R) FPGA 5GNR FEC Poll Mode Driver +======================================= + +The BBDEV FPGA 5GNR FEC poll mode driver (PMD) supports an FPGA implementation of a VRAN +LDPC Encode / Decode 5GNR wireless acceleration function, using Intel's PCI-e and FPGA +based Vista Creek device. + +Features +-------- + +FPGA 5GNR FEC PMD supports the following features: + +- LDPC Encode in the DL +- LDPC Decode in the UL +- 8 VFs per PF (physical device) +- Maximum of 32 UL queues per VF +- Maximum of 32 DL queues per VF +- PCIe Gen-3 x8 Interface +- MSI-X +- SR-IOV + +FPGA 5GNR FEC PMD supports the following BBDEV capabilities: + +* For the LDPC encode operation: + - ``RTE_BBDEV_LDPC_CRC_24B_ATTACH`` : set to attach CRC24B to CB(s) + - ``RTE_BBDEV_LDPC_RATE_MATCH`` : if set then do not do Rate Match bypass + +* For the LDPC decode operation: + - ``RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK`` : check CRC24B from CB(s) + - ``RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE`` : disable early termination + - ``RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP`` : drops CRC24B bits appended while decoding + - ``RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE`` : provides an input for HARQ combining + - ``RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE`` : provides an input for HARQ combining + - ``RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE`` : HARQ memory input is internal + - ``RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE`` : HARQ memory output is internal + - ``RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK`` : loopback data to/from HARQ memory + - ``RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS`` : HARQ memory includes the fillers bits + + +Limitations +----------- + +FPGA 5GNR FEC does not support the following: + +- Scatter-Gather function + + +Installation +------------ + +Section 3 of the DPDK manual provides instuctions on installing and compiling DPDK. The +default set of bbdev compile flags may be found in config/common_base, where for example +the flag to build the FPGA 5GNR FEC device, ``CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC``, +is already set. It is assumed DPDK has been compiled using for instance: + +.. code-block:: console + + make install T=x86_64-native-linuxapp-gcc + + +DPDK requires hugepages to be configured as detailed in section 2 of the DPDK manual. +The bbdev test application has been tested with a configuration 40 x 1GB hugepages. The +hugepage configuration of a server may be examined using: + +.. code-block:: console + + grep Huge* /proc/meminfo + + +Initialization +-------------- + +When the device first powers up, its PCI Physical Functions (PF) can be listed through this command: + +.. code-block:: console + + sudo lspci -vd8086:0d8f + +The physical and virtual functions are compatible with Linux UIO drivers: +``vfio`` and ``igb_uio``. However, in order to work the FPGA 5GNR FEC device firstly needs +to be bound to one of these linux drivers through DPDK. + + +Bind PF UIO driver(s) +~~~~~~~~~~~~~~~~~~~~~ + +Install the DPDK igb_uio driver, bind it with the PF PCI device ID and use +``lspci`` to confirm the PF device is under use by ``igb_uio`` DPDK UIO driver. + +The igb_uio driver may be bound to the PF PCI device using one of three methods: + + +1. PCI functions (physical or virtual, depending on the use case) can be bound to +the UIO driver by repeating this command for every function. + +.. code-block:: console + + cd + insmod ./build/kmod/igb_uio.ko + echo "8086 0d8f" > /sys/bus/pci/drivers/igb_uio/new_id + lspci -vd8086:0d8f + + +2. Another way to bind PF with DPDK UIO driver is by using the ``dpdk-devbind.py`` tool + +.. code-block:: console + + cd + ./usertools/dpdk-devbind.py -b igb_uio 0000:06:00.0 + +where the PCI device ID (example: 0000:06:00.0) is obtained using lspci -vd8086:0d8f + + +3. A third way to bind is to use ``dpdk-setup.sh`` tool + +.. code-block:: console + + cd + ./usertools/dpdk-setup.sh + + select 'Bind Ethernet/Crypto/Baseband device to IGB UIO module' + or + select 'Bind Ethernet/Crypto/Baseband device to VFIO module' depending on driver required + enter PCI device ID + select 'Display current Ethernet/Crypto/Baseband device settings' to confirm binding + + +In the same way the FPGA 5GNR FEC PF can be bound with vfio, but vfio driver does not +support SR-IOV configuration right out of the box, so it will need to be patched. + + +Enable Virtual Functions +~~~~~~~~~~~~~~~~~~~~~~~~ + +Now, it should be visible in the printouts that PCI PF is under igb_uio control +"``Kernel driver in use: igb_uio``" + +To show the number of available VFs on the device, read ``sriov_totalvfs`` file.. + +.. code-block:: console + + cat /sys/bus/pci/devices/0000\:\:./sriov_totalvfs + + where 0000\:\:. is the PCI device ID + + +To enable VFs via igb_uio, echo the number of virtual functions intended to +enable to ``max_vfs`` file.. + +.. code-block:: console + + echo > /sys/bus/pci/devices/0000\:\:./max_vfs + + +Afterwards, all VFs must be bound to appropriate UIO drivers as required, same +way it was done with the physical function previously. + +Enabling SR-IOV via vfio driver is pretty much the same, except that the file +name is different: + +.. code-block:: console + + echo > /sys/bus/pci/devices/0000\:\:./sriov_numvfs + + +Configure the VFs through PF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The PCI virtual functions must be configured before working or getting assigned +to VMs/Containers. The configuration involves allocating the number of hardware +queues, priorities, load balance, bandwidth and other settings necessary for the +device to perform FEC functions. + +This configuration needs to be executed at least once after reboot or PCI FLR and can +be achieved by using the function ``fpga_5gnr_fec_configure()``, which sets up the +parameters defined in ``fpga_5gnr_fec_conf`` structure: + +.. code-block:: c + + struct fpga_5gnr_fec_conf { + bool pf_mode_en; + uint8_t vf_ul_queues_number[FPGA_5GNR_FEC_NUM_VFS]; + uint8_t vf_dl_queues_number[FPGA_5GNR_FEC_NUM_VFS]; + uint8_t ul_bandwidth; + uint8_t dl_bandwidth; + uint8_t ul_load_balance; + uint8_t dl_load_balance; + uint16_t flr_time_out; + }; + +- ``pf_mode_en``: identifies whether only PF is to be used, or the VFs. PF and + VFs are mutually exclusive and cannot run simultaneously. + Set to 1 for PF mode enabled. + If PF mode is enabled all queues available in the device are assigned + exclusively to PF and 0 queues given to VFs. + +- ``vf_*l_queues_number``: defines the hardware queue mapping for every VF. + +- ``*l_bandwidth``: in case of congestion on PCIe interface. The device + allocates different bandwidth to UL and DL. The weight is configured by this + setting. The unit of weight is 3 code blocks. For example, if the code block + cbps (code block per second) ratio between UL and DL is 12:1, then the + configuration value should be set to 36:3. The schedule algorithm is based + on code block regardless the length of each block. + +- ``*l_load_balance``: hardware queues are load-balanced in a round-robin + fashion. Queues get filled first-in first-out until they reach a pre-defined + watermark level, if exceeded, they won't get assigned new code blocks.. + This watermark is defined by this setting. + + If all hardware queues exceeds the watermark, no code blocks will be + streamed in from UL/DL code block FIFO. + +- ``flr_time_out``: specifies how many 16.384us to be FLR time out. The + time_out = flr_time_out x 16.384us. For instance, if you want to set 10ms for + the FLR time out then set this setting to 0x262=610. + + +An example configuration code calling the function ``fpga_5gnr_fec_configure()`` is shown +below: + +.. code-block:: c + + struct fpga_5gnr_fec_conf conf; + unsigned int i; + + memset(&conf, 0, sizeof(struct fpga_5gnr_fec_conf)); + conf.pf_mode_en = 1; + + for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) { + conf.vf_ul_queues_number[i] = 4; + conf.vf_dl_queues_number[i] = 4; + } + conf.ul_bandwidth = 12; + conf.dl_bandwidth = 5; + conf.dl_load_balance = 64; + conf.ul_load_balance = 64; + + /* setup FPGA PF */ + ret = fpga_5gnr_fec_configure(info->dev_name, &conf); + TEST_ASSERT_SUCCESS(ret, + "Failed to configure 4G FPGA PF for bbdev %s", + info->dev_name); + + +Test Application +---------------- + +BBDEV provides a test application, ``test-bbdev.py`` and range of test data for testing +the functionality of FPGA 5GNR FEC encode and decode, depending on the device's +capabilities. The test application is located under app->test-bbdev folder and has the +following options: + +.. code-block:: console + + "-p", "--testapp-path": specifies path to the bbdev test app. + "-e", "--eal-params" : EAL arguments which are passed to the test app. + "-t", "--timeout" : Timeout in seconds (default=300). + "-c", "--test-cases" : Defines test cases to run. Run all if not specified. + "-v", "--test-vector" : Test vector path (default=dpdk_path+/app/test-bbdev/test_vectors/bbdev_null.data). + "-n", "--num-ops" : Number of operations to process on device (default=32). + "-b", "--burst-size" : Operations enqueue/dequeue burst size (default=32). + "-l", "--num-lcores" : Number of lcores to run (default=16). + "-i", "--init-device" : Initialise PF device with default values. + + +To execute the test application tool using simple decode or encode data, +type one of the following: + +.. code-block:: console + + ./test-bbdev.py -c validation -n 64 -b 1 -v ./ldpc_dec_default.data + ./test-bbdev.py -c validation -n 64 -b 1 -v ./ldpc_enc_default.data + + +The test application ``test-bbdev.py``, supports the ability to configure the PF device with +a default set of values, if the "-i" or "- -init-device" option is included. The default values +are defined in test_bbdev_perf.c as: + +- VF_UL_QUEUE_VALUE 4 +- VF_DL_QUEUE_VALUE 4 +- UL_BANDWIDTH 3 +- DL_BANDWIDTH 3 +- UL_LOAD_BALANCE 128 +- DL_LOAD_BALANCE 128 +- FLR_TIMEOUT 610 + + +Test Vectors +~~~~~~~~~~~~ + +In addition to the simple LDPC decoder and LDPC encoder tests, bbdev also provides +a range of additional tests under the test_vectors folder, which may be useful. The results +of these tests will depend on the FPGA 5GNR FEC capabilities. diff --git a/src/spdk/dpdk/doc/guides/bbdevs/fpga_lte_fec.rst b/src/spdk/dpdk/doc/guides/bbdevs/fpga_lte_fec.rst new file mode 100644 index 000000000..206b6f4f9 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/fpga_lte_fec.rst @@ -0,0 +1,316 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Intel Corporation + +Intel(R) FPGA LTE FEC Poll Mode Driver +====================================== + +The BBDEV FPGA LTE FEC poll mode driver (PMD) supports an FPGA implementation of a VRAN +Turbo Encode / Decode LTE wireless acceleration function, using Intel's PCI-e and FPGA +based Vista Creek device. + +Features +-------- + +FPGA LTE FEC PMD supports the following features: + +- Turbo Encode in the DL with total throughput of 4.5 Gbits/s +- Turbo Decode in the UL with total throughput of 1.5 Gbits/s assuming 8 decoder iterations +- 8 VFs per PF (physical device) +- Maximum of 32 UL queues per VF +- Maximum of 32 DL queues per VF +- PCIe Gen-3 x8 Interface +- MSI-X +- SR-IOV + + +FPGA LTE FEC PMD supports the following BBDEV capabilities: + +* For the turbo encode operation: + - ``RTE_BBDEV_TURBO_CRC_24B_ATTACH`` : set to attach CRC24B to CB(s) + - ``RTE_BBDEV_TURBO_RATE_MATCH`` : if set then do not do Rate Match bypass + - ``RTE_BBDEV_TURBO_ENC_INTERRUPTS`` : set for encoder dequeue interrupts + + +* For the turbo decode operation: + - ``RTE_BBDEV_TURBO_CRC_TYPE_24B`` : check CRC24B from CB(s) + - ``RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE`` : perform subblock de-interleave + - ``RTE_BBDEV_TURBO_DEC_INTERRUPTS`` : set for decoder dequeue interrupts + - ``RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN`` : set if negative LLR encoder i/p is supported + - ``RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP`` : keep CRC24B bits appended while decoding + + +Limitations +----------- + +FPGA LTE FEC does not support the following: + +- Scatter-Gather function + + +Installation +-------------- + +Section 3 of the DPDK manual provides instuctions on installing and compiling DPDK. The +default set of bbdev compile flags may be found in config/common_base, where for example +the flag to build the FPGA LTE FEC device, ``CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC``, is already +set. It is assumed DPDK has been compiled using for instance: + +.. code-block:: console + + make install T=x86_64-native-linuxapp-gcc + + +DPDK requires hugepages to be configured as detailed in section 2 of the DPDK manual. +The bbdev test application has been tested with a configuration 40 x 1GB hugepages. The +hugepage configuration of a server may be examined using: + +.. code-block:: console + + grep Huge* /proc/meminfo + + +Initialization +-------------- + +When the device first powers up, its PCI Physical Functions (PF) can be listed through this command: + +.. code-block:: console + + sudo lspci -vd1172:5052 + +The physical and virtual functions are compatible with Linux UIO drivers: +``vfio`` and ``igb_uio``. However, in order to work the FPGA LTE FEC device firstly needs +to be bound to one of these linux drivers through DPDK. + + +Bind PF UIO driver(s) +~~~~~~~~~~~~~~~~~~~~~ + +Install the DPDK igb_uio driver, bind it with the PF PCI device ID and use +``lspci`` to confirm the PF device is under use by ``igb_uio`` DPDK UIO driver. + +The igb_uio driver may be bound to the PF PCI device using one of three methods: + + +1. PCI functions (physical or virtual, depending on the use case) can be bound to +the UIO driver by repeating this command for every function. + +.. code-block:: console + + cd + insmod ./build/kmod/igb_uio.ko + echo "1172 5052" > /sys/bus/pci/drivers/igb_uio/new_id + lspci -vd1172: + + +2. Another way to bind PF with DPDK UIO driver is by using the ``dpdk-devbind.py`` tool + +.. code-block:: console + + cd + ./usertools/dpdk-devbind.py -b igb_uio 0000:06:00.0 + +where the PCI device ID (example: 0000:06:00.0) is obtained using lspci -vd1172: + + +3. A third way to bind is to use ``dpdk-setup.sh`` tool + +.. code-block:: console + + cd + ./usertools/dpdk-setup.sh + + select 'Bind Ethernet/Crypto/Baseband device to IGB UIO module' + or + select 'Bind Ethernet/Crypto/Baseband device to VFIO module' depending on driver required + enter PCI device ID + select 'Display current Ethernet/Crypto/Baseband device settings' to confirm binding + + +In the same way the FPGA LTE FEC PF can be bound with vfio, but vfio driver does not +support SR-IOV configuration right out of the box, so it will need to be patched. + + +Enable Virtual Functions +~~~~~~~~~~~~~~~~~~~~~~~~ + +Now, it should be visible in the printouts that PCI PF is under igb_uio control +"``Kernel driver in use: igb_uio``" + +To show the number of available VFs on the device, read ``sriov_totalvfs`` file.. + +.. code-block:: console + + cat /sys/bus/pci/devices/0000\:\:./sriov_totalvfs + + where 0000\:\:. is the PCI device ID + + +To enable VFs via igb_uio, echo the number of virtual functions intended to +enable to ``max_vfs`` file.. + +.. code-block:: console + + echo > /sys/bus/pci/devices/0000\:\:./max_vfs + + +Afterwards, all VFs must be bound to appropriate UIO drivers as required, same +way it was done with the physical function previously. + +Enabling SR-IOV via vfio driver is pretty much the same, except that the file +name is different: + +.. code-block:: console + + echo > /sys/bus/pci/devices/0000\:\:./sriov_numvfs + + +Configure the VFs through PF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The PCI virtual functions must be configured before working or getting assigned +to VMs/Containers. The configuration involves allocating the number of hardware +queues, priorities, load balance, bandwidth and other settings necessary for the +device to perform FEC functions. + +This configuration needs to be executed at least once after reboot or PCI FLR and can +be achieved by using the function ``fpga_lte_fec_configure()``, which sets up the +parameters defined in ``fpga_lte_fec_conf`` structure: + +.. code-block:: c + + struct fpga_lte_fec_conf { + bool pf_mode_en; + uint8_t vf_ul_queues_number[FPGA_LTE_FEC_NUM_VFS]; + uint8_t vf_dl_queues_number[FPGA_LTE_FEC_NUM_VFS]; + uint8_t ul_bandwidth; + uint8_t dl_bandwidth; + uint8_t ul_load_balance; + uint8_t dl_load_balance; + uint16_t flr_time_out; + }; + +- ``pf_mode_en``: identifies whether only PF is to be used, or the VFs. PF and + VFs are mutually exclusive and cannot run simultaneously. + Set to 1 for PF mode enabled. + If PF mode is enabled all queues available in the device are assigned + exclusively to PF and 0 queues given to VFs. + +- ``vf_*l_queues_number``: defines the hardware queue mapping for every VF. + +- ``*l_bandwidth``: in case of congestion on PCIe interface. The device + allocates different bandwidth to UL and DL. The weight is configured by this + setting. The unit of weight is 3 code blocks. For example, if the code block + cbps (code block per second) ratio between UL and DL is 12:1, then the + configuration value should be set to 36:3. The schedule algorithm is based + on code block regardless the length of each block. + +- ``*l_load_balance``: hardware queues are load-balanced in a round-robin + fashion. Queues get filled first-in first-out until they reach a pre-defined + watermark level, if exceeded, they won't get assigned new code blocks.. + This watermark is defined by this setting. + + If all hardware queues exceeds the watermark, no code blocks will be + streamed in from UL/DL code block FIFO. + +- ``flr_time_out``: specifies how many 16.384us to be FLR time out. The + time_out = flr_time_out x 16.384us. For instance, if you want to set 10ms for + the FLR time out then set this setting to 0x262=610. + + +An example configuration code calling the function ``fpga_lte_fec_configure()`` is shown +below: + +.. code-block:: c + + struct fpga_lte_fec_conf conf; + unsigned int i; + + memset(&conf, 0, sizeof(struct fpga_lte_fec_conf)); + conf.pf_mode_en = 1; + + for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { + conf.vf_ul_queues_number[i] = 4; + conf.vf_dl_queues_number[i] = 4; + } + conf.ul_bandwidth = 12; + conf.dl_bandwidth = 5; + conf.dl_load_balance = 64; + conf.ul_load_balance = 64; + + /* setup FPGA PF */ + ret = fpga_lte_fec_configure(info->dev_name, &conf); + TEST_ASSERT_SUCCESS(ret, + "Failed to configure 4G FPGA PF for bbdev %s", + info->dev_name); + + +Test Application +---------------- + +BBDEV provides a test application, ``test-bbdev.py`` and range of test data for testing +the functionality of FPGA LTE FEC turbo encode and turbo decode, depending on the device's +capabilities. The test application is located under app->test-bbdev folder and has the +following options: + +.. code-block:: console + + "-p", "--testapp-path": specifies path to the bbdev test app. + "-e", "--eal-params" : EAL arguments which are passed to the test app. + "-t", "--timeout" : Timeout in seconds (default=300). + "-c", "--test-cases" : Defines test cases to run. Run all if not specified. + "-v", "--test-vector" : Test vector path (default=dpdk_path+/app/test-bbdev/test_vectors/bbdev_null.data). + "-n", "--num-ops" : Number of operations to process on device (default=32). + "-b", "--burst-size" : Operations enqueue/dequeue burst size (default=32). + "-l", "--num-lcores" : Number of lcores to run (default=16). + "-i", "--init-device" : Initialise PF device with default values. + + +To execute the test application tool using simple turbo decode or turbo encode data, +type one of the following: + +.. code-block:: console + + ./test-bbdev.py -c validation -n 64 -b 8 -v ./turbo_dec_default.data + ./test-bbdev.py -c validation -n 64 -b 8 -v ./turbo_enc_default.data + + +The test application ``test-bbdev.py``, supports the ability to configure the PF device with +a default set of values, if the "-i" or "- -init-device" option is included. The default values +are defined in test_bbdev_perf.c as: + +- VF_UL_QUEUE_VALUE 4 +- VF_DL_QUEUE_VALUE 4 +- UL_BANDWIDTH 3 +- DL_BANDWIDTH 3 +- UL_LOAD_BALANCE 128 +- DL_LOAD_BALANCE 128 +- FLR_TIMEOUT 610 + + +Test Vectors +~~~~~~~~~~~~ + +In addition to the simple turbo decoder and turbo encoder tests, bbdev also provides +a range of additional tests under the test_vectors folder, which may be useful. The results +of these tests will depend on the FPGA LTE FEC capabilities: + +* turbo decoder tests: + - ``turbo_dec_c1_k6144_r0_e10376_crc24b_sbd_negllr_high_snr.data`` + - ``turbo_dec_c1_k6144_r0_e10376_crc24b_sbd_negllr_low_snr.data`` + - ``turbo_dec_c1_k6144_r0_e34560_negllr.data`` + - ``turbo_dec_c1_k6144_r0_e34560_sbd_negllr.data`` + - ``turbo_dec_c2_k3136_r0_e4920_sbd_negllr_crc24b.data`` + - ``turbo_dec_c2_k3136_r0_e4920_sbd_negllr.data`` + + +* turbo encoder tests: + - ``turbo_enc_c1_k40_r0_e1190_rm.data`` + - ``turbo_enc_c1_k40_r0_e1194_rm.data`` + - ``turbo_enc_c1_k40_r0_e1196_rm.data`` + - ``turbo_enc_c1_k40_r0_e272_rm.data`` + - ``turbo_enc_c1_k6144_r0_e18444.data`` + - ``turbo_enc_c1_k6144_r0_e32256_crc24b_rm.data`` + - ``turbo_enc_c2_k5952_r0_e17868_crc24b.data`` + - ``turbo_enc_c3_k4800_r2_e14412_crc24b.data`` + - ``turbo_enc_c4_k4800_r2_e14412_crc24b.data`` diff --git a/src/spdk/dpdk/doc/guides/bbdevs/index.rst b/src/spdk/dpdk/doc/guides/bbdevs/index.rst new file mode 100644 index 000000000..a8092dd2e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/index.rst @@ -0,0 +1,15 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation + +Baseband Device Drivers +======================= + +.. toctree:: + :maxdepth: 2 + :numbered: + + overview + null + turbo_sw + fpga_lte_fec + fpga_5gnr_fec diff --git a/src/spdk/dpdk/doc/guides/bbdevs/null.rst b/src/spdk/dpdk/doc/guides/bbdevs/null.rst new file mode 100644 index 000000000..0b885d17f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/null.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation + +BBDEV null Poll Mode Driver +============================ + +The (**baseband_null**) is a bbdev poll mode driver which provides a minimal +implementation of a software bbdev device. As a null device it does not modify +the data in the mbuf on which the bbdev operation is to operate and it only +works for operation type ``RTE_BBDEV_OP_NONE``. + +When a burst of mbufs is submitted to a *bbdev null PMD* for processing then +each mbuf in the burst will be enqueued in an internal buffer ring to be +collected on a dequeue call. + + +Limitations +----------- + +* In-place operations for Turbo encode and decode are not supported + +Installation +------------ + +The *bbdev null PMD* is enabled and built by default in both the Linux and +FreeBSD builds. + +Initialization +-------------- + +To use the PMD in an application, user must: + +- Call ``rte_vdev_init("baseband_null")`` within the application. + +- Use ``--vdev="baseband_null"`` in the EAL options, which will call ``rte_vdev_init()`` internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* ``socket_id``: Specify the socket where the memory for the device is going to be allocated + (by default, *socket_id* will be the socket where the core that is creating the PMD is running on). + +* ``max_nb_queues``: Specify the maximum number of queues in the device (default is ``RTE_MAX_LCORE``). + +Example: +~~~~~~~~ + +.. code-block:: console + + ./test-bbdev.py -e="--vdev=baseband_null,socket_id=0,max_nb_queues=8" diff --git a/src/spdk/dpdk/doc/guides/bbdevs/overview.rst b/src/spdk/dpdk/doc/guides/bbdevs/overview.rst new file mode 100644 index 000000000..8dc35a3c1 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/overview.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2020 Intel Corporation. + +Baseband Device Supported Functionality Matrices +================================================ + +Supported Feature Flags +----------------------- + +.. _table_bbdev_pmd_features: + +.. include:: overview_feature_table.txt diff --git a/src/spdk/dpdk/doc/guides/bbdevs/turbo_sw.rst b/src/spdk/dpdk/doc/guides/bbdevs/turbo_sw.rst new file mode 100644 index 000000000..20620c2e2 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/bbdevs/turbo_sw.rst @@ -0,0 +1,181 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation + +SW Turbo Poll Mode Driver +========================= + +The SW Turbo PMD (**baseband_turbo_sw**) provides a software only poll mode bbdev +driver that can optionally utilize Intel optimized libraries for LTE and 5GNR +Layer 1 workloads acceleration. + +Note that the driver can also be built without any dependency with reduced +functionality for maintenance purpose. + +To enable linking to the SDK libraries see detailed installation section below. +Two flags can be enabled depending on whether the target machine can support +AVX2 and AVX512 instructions sets and the related SDK libraries for vectorized +signal processing functions are installed : +- CONFIG_RTE_BBDEV_SDK_AVX2 +- CONFIG_RTE_BBDEV_SDK_AVX512 +By default these 2 flags are disabled by default. For AVX2 machine and SDK +library installed then the first flag can be enabled. For AVX512 machine and +SDK library installed then both flags can be enabled for full real time capability. + +This PMD supports the functions: FEC, Rate Matching and CRC functions detailed +in the Features section. + +Features +-------- + +SW Turbo PMD can support for the following capabilities when the SDK libraries +are used: + +For the LTE encode operation: + +* ``RTE_BBDEV_TURBO_CRC_24A_ATTACH`` +* ``RTE_BBDEV_TURBO_CRC_24B_ATTACH`` +* ``RTE_BBDEV_TURBO_RATE_MATCH`` +* ``RTE_BBDEV_TURBO_RV_INDEX_BYPASS`` + +For the LTE decode operation: + +* ``RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE`` +* ``RTE_BBDEV_TURBO_CRC_TYPE_24B`` +* ``RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN`` +* ``RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN`` +* ``RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP`` +* ``RTE_BBDEV_TURBO_EARLY_TERMINATION`` + +For the 5G NR LDPC encode operation: + +* ``RTE_BBDEV_LDPC_RATE_MATCH`` +* ``RTE_BBDEV_LDPC_CRC_24A_ATTACH`` +* ``RTE_BBDEV_LDPC_CRC_24B_ATTACH`` + +For the 5G NR LDPC decode operation: + +* ``RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK`` +* ``RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK`` +* ``RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP`` +* ``RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE`` +* ``RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE`` +* ``RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE`` + +Limitations +----------- + +* In-place operations for encode and decode are not supported + +Installation +------------ + +FlexRAN SDK Download +~~~~~~~~~~~~~~~~~~~~ + +As an option it is possible to link this driver with FleXRAN SDK libraries +which can enable real time signal processing using AVX instructions. + +These libraries are available through this `link `_. + +After download is complete, the user needs to unpack and compile on their +system before building DPDK. + +The following table maps DPDK versions with past FlexRAN SDK releases: + +.. _table_flexran_releases: + +.. table:: DPDK and FlexRAN FEC SDK releases compliance + + ===================== ============================ + DPDK version FlexRAN FEC SDK release + ===================== ============================ + 19.08 19.04 + ===================== ============================ + +FlexRAN SDK Installation +~~~~~~~~~~~~~~~~~~~~~~~~ + +Note that the installation of these libraries is optional. + +The following are pre-requisites for building FlexRAN SDK Libraries: + (a) An AVX2 or AVX512 supporting machine + (b) CentOS Linux release 7.2.1511 (Core) operating system is advised + (c) Intel ICC 18.0.1 20171018 compiler or more recent and related libraries + ICC is `available with a free community license `_. + +The following instructions should be followed in this exact order: + +#. Set the environment variables: + + .. code-block:: console + + source /linux/bin/compilervars.sh intel64 -platform linux + +#. Run the SDK extractor script and accept the license: + + .. code-block:: console + + cd + ./FlexRAN-FEC-SDK-19-04.sh + +#. Generate makefiles based on system configuration: + + .. code-block:: console + + cd /FlexRAN-FEC-SDK-19-04/sdk/ + ./create-makefiles-linux.sh + +#. A build folder is generated in this form ``build--``, enter that + folder and install: + + .. code-block:: console + + cd build-avx512-icc/ + make && make install + +Initialization +-------------- + +In order to enable this virtual bbdev PMD, the user may: + +* Build the ``FLEXRAN SDK`` libraries (explained in Installation section). + +* Export the environmental variables ``FLEXRAN_SDK`` to the path where the + FlexRAN SDK libraries were installed. And ``DIR_WIRELESS_SDK`` to the path + where the libraries were extracted. + +Example: + +.. code-block:: console + + export FLEXRAN_SDK=/FlexRAN-FEC-SDK-19-04/sdk/build-avx2-icc/install + export DIR_WIRELESS_SDK=/FlexRAN-FEC-SDK-19-04/sdk/build-avx2-icc/ + +* Set ``CONFIG_RTE_BBDEV_SDK_AVX2=y`` and ``CONFIG_RTE_BBDEV_SDK_AVX512=y`` + in DPDK common configuration file ``config/common_base`` to be able to use + the SDK libraries as mentioned above. + For AVX2 machine it is possible to only enable CONFIG_RTE_BBDEV_SDK_AVX2 + for limited 4G functionality. + If no flag are set the PMD driver will still build but its capabilities + will be limited accordingly. + +To use the PMD in an application, user must: + +- Call ``rte_vdev_init("baseband_turbo_sw")`` within the application. + +- Use ``--vdev="baseband_turbo_sw"`` in the EAL options, which will call ``rte_vdev_init()`` internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* ``socket_id``: Specify the socket where the memory for the device is going to be allocated + (by default, *socket_id* will be the socket where the core that is creating the PMD is running on). + +* ``max_nb_queues``: Specify the maximum number of queues in the device (default is ``RTE_MAX_LCORE``). + +Example: +~~~~~~~~ + +.. code-block:: console + + ./test-bbdev.py -e="--vdev=baseband_turbo_sw,socket_id=0,max_nb_queues=8" \ + -c validation -v ./turbo_*_default.data diff --git a/src/spdk/dpdk/doc/guides/compressdevs/features/default.ini b/src/spdk/dpdk/doc/guides/compressdevs/features/default.ini new file mode 100644 index 000000000..e1419ee8d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/features/default.ini @@ -0,0 +1,27 @@ +; +; Features of a default compression driver. +; +; This file defines the features that are valid for inclusion in +; the other driver files and also the order that they appear in +; the features table in the documentation. +; +[Features] +HW Accelerated = +CPU SSE = +CPU AVX = +CPU AVX2 = +CPU AVX512 = +CPU NEON = +Stateful Compression = +Stateful Decompression = +Pass-through = +OOP SGL In SGL Out = +OOP SGL In LB Out = +OOP LB In SGL Out = +Deflate = +LZS = +Adler32 = +Crc32 = +Adler32&Crc32 = +Fixed = +Dynamic = diff --git a/src/spdk/dpdk/doc/guides/compressdevs/features/isal.ini b/src/spdk/dpdk/doc/guides/compressdevs/features/isal.ini new file mode 100644 index 000000000..e705031e5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/features/isal.ini @@ -0,0 +1,18 @@ +; +; Refer to default.ini for the full list of available PMD features. +; +; Supported features of 'ISA-L' compression driver. +; +[Features] +CPU SSE = Y +CPU AVX = Y +CPU AVX2 = Y +CPU AVX512 = Y +OOP SGL In SGL Out = Y +OOP SGL In LB Out = Y +OOP LB In SGL Out = Y +Deflate = Y +Adler32 = Y +Crc32 = Y +Fixed = Y +Dynamic = Y diff --git a/src/spdk/dpdk/doc/guides/compressdevs/features/octeontx.ini b/src/spdk/dpdk/doc/guides/compressdevs/features/octeontx.ini new file mode 100644 index 000000000..cc8b02568 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/features/octeontx.ini @@ -0,0 +1,10 @@ +; +; Refer to default.ini for the full list of available PMD features. +; +; Supported features of 'OCTEON TX ZIP' compression driver. +; +[Features] +HW Accelerated = Y +Deflate = Y +Fixed = Y +Dynamic = Y diff --git a/src/spdk/dpdk/doc/guides/compressdevs/features/qat.ini b/src/spdk/dpdk/doc/guides/compressdevs/features/qat.ini new file mode 100644 index 000000000..bced8f9cf --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/features/qat.ini @@ -0,0 +1,17 @@ +; +; Refer to default.ini for the full list of available PMD features. +; +; Supported features of 'QAT' compression driver. +; +[Features] +HW Accelerated = Y +Stateful Decompression = Y +OOP SGL In SGL Out = Y +OOP SGL In LB Out = Y +OOP LB In SGL Out = Y +Deflate = Y +Adler32 = Y +Crc32 = Y +Adler32&Crc32 = Y +Fixed = Y +Dynamic = Y diff --git a/src/spdk/dpdk/doc/guides/compressdevs/features/zlib.ini b/src/spdk/dpdk/doc/guides/compressdevs/features/zlib.ini new file mode 100644 index 000000000..58a4ee3ab --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/features/zlib.ini @@ -0,0 +1,10 @@ +; +; Refer to default.ini for the full list of available PMD features. +; +; Supported features of 'ZLIB' compression driver. +; +[Features] +Pass-through = Y +Deflate = Y +Fixed = Y +Dynamic = Y diff --git a/src/spdk/dpdk/doc/guides/compressdevs/index.rst b/src/spdk/dpdk/doc/guides/compressdevs/index.rst new file mode 100644 index 000000000..1f37e260e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/index.rst @@ -0,0 +1,16 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +Compression Device Drivers +========================== + + +.. toctree:: + :maxdepth: 2 + :numbered: + + overview + isal + octeontx + qat_comp + zlib diff --git a/src/spdk/dpdk/doc/guides/compressdevs/isal.rst b/src/spdk/dpdk/doc/guides/compressdevs/isal.rst new file mode 100644 index 000000000..af1f41f24 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/isal.rst @@ -0,0 +1,149 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +ISA-L Compression Poll Mode Driver +================================== + +The ISA-L PMD (**librte_pmd_isal_comp**) provides poll mode compression & +decompression driver support for utilizing Intel ISA-L library, +which implements the deflate algorithm for both Deflate(compression) and Inflate(decompression). + + +Features +-------- + +ISA-L PMD has support for: + +Compression/Decompression algorithm: + + * DEFLATE + +Huffman code type: + + * FIXED + * DYNAMIC + +Window size support: + + * 32K + +Checksum: + + * CRC32 + * ADLER32 + +To enable a checksum in the driver, the compression and/or decompression xform +structure, rte_comp_xform, must be filled with either of the CompressDev +checksum flags supported. :: + + compress_xform->compress.chksum = RTE_COMP_CHECKSUM_CRC32 + + decompress_xform->decompress.chksum = RTE_COMP_CHECKSUM_CRC32 + +:: + + compress_xform->compress.chksum = RTE_COMP_CHECKSUM_ADLER32 + + decompress_xform->decompress.chksum = RTE_COMP_CHECKSUM_ADLER32 + +If you request a checksum for compression or decompression, +the checksum field in the operation structure, ``op->output_chksum``, +will be filled with the checksum. + +.. Note:: + + For the compression case above, your output buffer will need to be large enough to hold the compressed data plus a scratchpad for the checksum at the end, the scratchpad is 8 bytes for CRC32 and 4 bytes for Adler32. + +Level guide: + +The ISA-L levels have been mapped to somewhat correspond to the same ZLIB level, +i.e. ZLIB L1 gives a compression ratio similar to ISA-L L1. +Compressdev level 0 enables "No Compression", which passes the uncompressed +data to the output buffer, plus deflate headers. +The ISA-L library does not support this, therefore compressdev level 0 is not supported. + +The compressdev API has 10 levels, 0-9. ISA-L has 4 levels of compression, 0-3. +As a result the level mappings from the API to the PMD are shown below. + +.. _table_ISA-L_compression_levels: + +.. table:: Level mapping from Compressdev to ISA-L PMD. + + +-------------+----------------------------------------------+-----------------------------------------------+ + | Compressdev | PMD Functionality | Internal ISA-L | + | API Level | | Level | + +=============+==============================================+===============================================+ + | 0 | No compression, Not Supported | --- | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 1 | Dynamic (Fast compression) | 1 | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 2 | Dynamic | 2 | + | | (Higher compression ratio) | | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 3 | Dynamic | 3 | + | | (Best compression ratio) | (Level 2 if | + | | | no AVX512/AVX2) | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 4 | Dynamic (Best compression ratio) | Same as above | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 5 | Dynamic (Best compression ratio) | Same as above | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 6 | Dynamic (Best compression ratio) | Same as above | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 7 | Dynamic (Best compression ratio) | Same as above | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 8 | Dynamic (Best compression ratio) | Same as above | + +-------------+----------------------------------------------+-----------------------------------------------+ + | 9 | Dynamic (Best compression ratio) | Same as above | + +-------------+----------------------------------------------+-----------------------------------------------+ + +.. Note:: + + The above table only shows mapping when API calls for dynamic compression. + For fixed compression, regardless of API level, internally ISA-L level 0 is always used. + + +Limitations +----------- + +* Compressdev level 0, no compression, is not supported. + +Installation +------------ + +* To build DPDK with Intel's ISA-L library, the user is required to download the library from ``_. + +* Once downloaded, the user needs to build the library, the ISA-L autotools are usually sufficient:: + + ./autogen.sh + ./configure + +* make can be used to install the library on their system, before building DPDK:: + + make + sudo make install + +* To build with meson, the **libisal.pc** file, must be copied into "pkgconfig", + e.g. /usr/lib/pkgconfig or /usr/lib64/pkgconfig depending on your system, + for meson to find the ISA-L library. The **libisal.pc** is located in library sources:: + + cp isal/libisal.pc /usr/lib/pkgconfig/ + + +Initialization +-------------- + +In order to enable this virtual compression PMD, user must: + +* Set ``CONFIG_RTE_LIBRTE_PMD_ISAL=y`` in config/common_base. + +To use the PMD in an application, user must: + +* Call ``rte_vdev_init("compress_isal")`` within the application. + +* Use ``--vdev="compress_isal"`` in the EAL options, which will call ``rte_vdev_init()`` internally. + +The following parameter (optional) can be provided in the previous two calls: + +* ``socket_id:`` Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). diff --git a/src/spdk/dpdk/doc/guides/compressdevs/octeontx.rst b/src/spdk/dpdk/doc/guides/compressdevs/octeontx.rst new file mode 100644 index 000000000..5924ad1fa --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/octeontx.rst @@ -0,0 +1,105 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Cavium Networks. + +OCTEON TX ZIP Compression Poll Mode Driver +========================================== + +The OCTEON TX ZIP PMD (**librte_pmd_octeontx_zip**) provides poll mode +compression & decompression driver for ZIP HW offload device, found in +**Cavium OCTEON TX** SoC family. + +More information can be found at `Cavium, Inc Official Website +`_. + +Features +-------- + +OCTEON TX ZIP PMD has support for: + +Compression/Decompression algorithm: + +* DEFLATE + +Huffman code type: + +* FIXED +* DYNAMIC + +Window size support: + +* 2 to 2^14 + +Limitations +----------- + +* Chained mbufs are not supported. + +Supported OCTEON TX SoCs +------------------------ + +- CN83xx + +Steps To Setup Platform +----------------------- + + OCTEON TX SDK includes kernel image which provides OCTEON TX ZIP PF + driver to manage configuration of ZIPVF device + Required version of SDK is "OCTEONTX-SDK-6.2.0-build35" or above. + + SDK can be install by using below command. + #rpm -ivh OCTEONTX-SDK-6.2.0-build35.x86_64.rpm --force --nodeps + It will install OCTEONTX-SDK at following default location + /usr/local/Cavium_Networks/OCTEONTX-SDK/ + + For more information on building and booting linux kernel on OCTEON TX + please refer /usr/local/Cavium_Networks/OCTEONTX-SDK/docs/OcteonTX-SDK-UG_6.2.0.pdf. + + SDK and related information can be obtained from: `Cavium support site `_. + +Installation +------------ + +Driver Compilation +~~~~~~~~~~~~~~~~~~ + +To compile the OCTEON TX ZIP PMD for Linux arm64 gcc target, run the +following ``make`` command: + + .. code-block:: console + + cd + make config T=arm64-thunderx-linux-gcc install + + +Initialization +-------------- + +The OCTEON TX zip is exposed as pci device which consists of a set of +PCIe VF devices. On EAL initialization, ZIP PCIe VF devices will be +probed. To use the PMD in an application, user must: + +* run dev_bind script to bind eight ZIP PCIe VFs to the ``vfio-pci`` driver: + + .. code-block:: console + + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:00.1 + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:00.2 + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:00.3 + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:00.4 + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:00.5 + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:00.6 + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:00.7 + ./usertools/dpdk-devbind.py -b vfio-pci 0001:04:01.0 + +* The unit test cases can be tested as below: + + .. code-block:: console + + reserve enough huge pages + cd to the top-level DPDK directory + export RTE_TARGET=arm64-thunderx-linux-gcc + export RTE_SDK=`pwd` + cd to app/test + type the command "make" to compile + run the tests with "./test" + type the command "compressdev_autotest" to test diff --git a/src/spdk/dpdk/doc/guides/compressdevs/overview.rst b/src/spdk/dpdk/doc/guides/compressdevs/overview.rst new file mode 100644 index 000000000..809e4e6e7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/overview.rst @@ -0,0 +1,32 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +Compression Device Supported Functionality Matrices +=================================================== + +Supported Feature Flags +----------------------- + +.. _table_compression_pmd_features: + +.. include:: overview_feature_table.txt + +.. Note:: + + - "Pass-through" feature flag refers to the ability of the PMD + to let input buffers pass-through it, copying the input to the output, + without making any modifications to it (no compression done). + + - "OOP SGL In SGL Out" feature flag stands for + "Out-of-place Scatter-gather list Input, Scatter-gather list Output", + which means PMD supports different scatter-gather styled input and output buffers + (i.e. both can consists of multiple segments). + + - "OOP SGL In LB Out" feature flag stands for + "Out-of-place Scatter-gather list Input, Linear Buffers Output", + which means PMD supports input from scatter-gathered styled buffers, outputting linear buffers + (i.e. single segment). + + - "OOP LB In SGL Out" feature flag stands for + "Out-of-place Linear Buffers Input, Scatter-gather list Output", + which means PMD supports input from linear buffer, outputting scatter-gathered styled buffers. diff --git a/src/spdk/dpdk/doc/guides/compressdevs/qat_comp.rst b/src/spdk/dpdk/doc/guides/compressdevs/qat_comp.rst new file mode 100644 index 000000000..475c4a9f9 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/qat_comp.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +Intel(R) QuickAssist (QAT) Compression Poll Mode Driver +======================================================= + +The QAT compression PMD provides poll mode compression & decompression driver +support for the following hardware accelerator devices: + +* ``Intel QuickAssist Technology C62x`` +* ``Intel QuickAssist Technology C3xxx`` +* ``Intel QuickAssist Technology DH895x`` + + +Features +-------- + +QAT compression PMD has support for: + +Compression/Decompression algorithm: + + * DEFLATE - using Fixed and Dynamic Huffman encoding + +Window size support: + + * 32K + +Checksum generation: + + * CRC32, Adler and combined checksum + +Stateful operation: + + * Decompression only + +Limitations +----------- + +* Compressdev level 0, no compression, is not supported. +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) +* No BSD support as BSD QAT kernel driver not available. +* Stateful compression is not supported. + + +Installation +------------ + +The QAT compression PMD is built by default with a standard DPDK build. + +It depends on a QAT kernel driver, see :ref:`building_qat`. diff --git a/src/spdk/dpdk/doc/guides/compressdevs/zlib.rst b/src/spdk/dpdk/doc/guides/compressdevs/zlib.rst new file mode 100644 index 000000000..986c59d43 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/compressdevs/zlib.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Cavium Networks. + +ZLIB Compression Poll Mode Driver +================================== + +The ZLIB PMD (**librte_pmd_zlib**) provides poll mode compression & +decompression driver based on SW zlib library, + +Features +-------- + +ZLIB PMD has support for: + +Compression/Decompression algorithm: + +* DEFLATE + +Huffman code type: + +* FIXED +* DYNAMIC + +Window size support: + +* Min - 256 bytes +* Max - 32K + +Limitations +----------- + +* Scatter-Gather and Stateful not supported. + +Installation +------------ + +* To build DPDK with ZLIB library, the user is required to download the ``libz`` library. +* Use following command for installation. + +* For Fedora users:: + sudo yum install zlib-devel +* For Ubuntu users:: + sudo apt-get install zlib1g-dev + +* Once downloaded, the user needs to build the library. + +* To build from sources + download zlib sources from http://zlib.net/ and do following before building DPDK:: + + make + sudo make install + +Initialization +-------------- + +In order to enable this virtual compression PMD, user must: + +* Set ``CONFIG_RTE_LIBRTE_PMD_ZLIB=y`` in config/common_base. + +To use the PMD in an application, user must: + +* Call ``rte_vdev_init("compress_zlib")`` within the application. + +* Use ``--vdev="compress_zlib"`` in the EAL options, which will call ``rte_vdev_init()`` internally. + +The following parameter (optional) can be provided in the previous two calls: + +* ``socket_id:`` Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). diff --git a/src/spdk/dpdk/doc/guides/conf.py b/src/spdk/dpdk/doc/guides/conf.py new file mode 100644 index 000000000..700e05e17 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/conf.py @@ -0,0 +1,435 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2015 Intel Corporation + +from __future__ import print_function +import subprocess +from docutils import nodes +from distutils.version import LooseVersion +from sphinx import __version__ as sphinx_version +from sphinx.highlighting import PygmentsBridge +from pygments.formatters.latex import LatexFormatter +from os import listdir +from os import environ +from os.path import basename +from os.path import dirname +from os.path import join as path_join + +try: + # Python 2. + import ConfigParser as configparser +except: + # Python 3. + import configparser + +try: + import sphinx_rtd_theme + + html_theme = "sphinx_rtd_theme" + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +except: + print('Install the sphinx ReadTheDocs theme for improved html documentation ' + 'layout: pip install sphinx_rtd_theme') + pass + +project = 'Data Plane Development Kit' +html_logo = '../logo/DPDK_logo_vertical_rev_small.png' +latex_logo = '../logo/DPDK_logo_horizontal_tag.png' +html_add_permalinks = "" +html_show_copyright = False +highlight_language = 'none' + +# If MAKEFLAGS is exported by the user, garbage text might end up in version +version = subprocess.check_output(['make', '-sRrC', '../../', 'showversion'], + env=dict(environ, MAKEFLAGS="")) +version = version.decode('utf-8').rstrip() +release = version + +master_doc = 'index' + +# Maximum feature description string length +feature_str_len = 25 + +# Figures, tables and code-blocks automatically numbered if they have caption +numfig = True + +latex_documents = [ + ('index', + 'doc.tex', + '', + '', + 'manual') +] + +# Latex directives to be included directly in the latex/pdf docs. +custom_latex_preamble = r""" +\usepackage{textalpha} +\RecustomVerbatimEnvironment{Verbatim}{Verbatim}{xleftmargin=5mm} +\usepackage{etoolbox} +\robustify\( +\robustify\) +""" + +# Configuration for the latex/pdf docs. +latex_elements = { + 'papersize': 'a4paper', + 'pointsize': '11pt', + # remove blank pages + 'classoptions': ',openany,oneside', + 'babel': '\\usepackage[english]{babel}', + # customize Latex formatting + 'preamble': custom_latex_preamble +} + + +# Override the default Latex formatter in order to modify the +# code/verbatim blocks. +class CustomLatexFormatter(LatexFormatter): + def __init__(self, **options): + super(CustomLatexFormatter, self).__init__(**options) + # Use the second smallest font size for code/verbatim blocks. + self.verboptions = r'formatcom=\footnotesize' + +# Replace the default latex formatter. +PygmentsBridge.latex_formatter = CustomLatexFormatter + +# Configuration for man pages +man_pages = [("testpmd_app_ug/run_app", "testpmd", + "tests for dpdk pmds", "", 1), + ("tools/pdump", "dpdk-pdump", + "enable packet capture on dpdk ports", "", 1), + ("tools/proc_info", "dpdk-procinfo", + "access dpdk port stats and memory info", "", 1), + ("tools/pmdinfo", "dpdk-pmdinfo", + "dump a PMDs hardware support info", "", 1), + ("tools/devbind", "dpdk-devbind", + "check device status and bind/unbind them from drivers", "", 8)] + + +# ####### :numref: fallback ######## +# The following hook functions add some simple handling for the :numref: +# directive for Sphinx versions prior to 1.3.1. The functions replace the +# :numref: reference with a link to the target (for all Sphinx doc types). +# It doesn't try to label figures/tables. +def numref_role(reftype, rawtext, text, lineno, inliner): + """ + Add a Sphinx role to handle numref references. Note, we can't convert + the link here because the doctree isn't build and the target information + isn't available. + """ + # Add an identifier to distinguish numref from other references. + newnode = nodes.reference('', + '', + refuri='_local_numref_#%s' % text, + internal=True) + return [newnode], [] + + +def process_numref(app, doctree, from_docname): + """ + Process the numref nodes once the doctree has been built and prior to + writing the files. The processing involves replacing the numref with a + link plus text to indicate if it is a Figure or Table link. + """ + + # Iterate over the reference nodes in the doctree. + for node in doctree.traverse(nodes.reference): + target = node.get('refuri', '') + + # Look for numref nodes. + if target.startswith('_local_numref_#'): + target = target.replace('_local_numref_#', '') + + # Get the target label and link information from the Sphinx env. + data = app.builder.env.domains['std'].data + docname, label, _ = data['labels'].get(target, ('', '', '')) + relative_url = app.builder.get_relative_uri(from_docname, docname) + + # Add a text label to the link. + if target.startswith('figure'): + caption = 'Figure' + elif target.startswith('table'): + caption = 'Table' + else: + caption = 'Link' + + # New reference node with the updated link information. + newnode = nodes.reference('', + caption, + refuri='%s#%s' % (relative_url, label), + internal=True) + node.replace_self(newnode) + + +def generate_overview_table(output_filename, table_id, section, table_name, title): + """ + Function to generate the Overview Table from the ini files that define + the features for each driver. + + The default features for the table and their order is defined by the + 'default.ini' file. + + """ + # Default warning string. + warning = 'Warning generate_overview_table()' + + # Get the default features and order from the 'default.ini' file. + ini_path = path_join(dirname(output_filename), 'features') + config = configparser.ConfigParser() + config.optionxform = str + config.read(path_join(ini_path, 'default.ini')) + default_features = config.items(section) + + # Create a dict of the valid features to validate the other ini files. + valid_features = {} + max_feature_length = 0 + for feature in default_features: + key = feature[0] + valid_features[key] = ' ' + max_feature_length = max(max_feature_length, len(key)) + + # Get a list of driver ini files, excluding 'default.ini'. + ini_files = [basename(file) for file in listdir(ini_path) + if file.endswith('.ini') and file != 'default.ini'] + ini_files.sort() + + # Build up a list of the table header names from the ini filenames. + pmd_names = [] + for ini_filename in ini_files: + name = ini_filename[:-4] + name = name.replace('_vf', 'vf') + pmd_names.append(name) + + # Pad the table header names. + max_header_len = len(max(pmd_names, key=len)) + header_names = [] + for name in pmd_names: + if '_vec' in name: + pmd, vec = name.split('_') + name = '{0:{fill}{align}{width}}vec'.format(pmd, + fill='.', align='<', width=max_header_len-3) + else: + name = '{0:{fill}{align}{width}}'.format(name, + fill=' ', align='<', width=max_header_len) + header_names.append(name) + + # Create a dict of the defined features for each driver from the ini files. + ini_data = {} + for ini_filename in ini_files: + config = configparser.ConfigParser() + config.optionxform = str + config.read(path_join(ini_path, ini_filename)) + + # Initialize the dict with the default.ini value. + ini_data[ini_filename] = valid_features.copy() + + # Check for a valid ini section. + if not config.has_section(section): + print("{}: File '{}' has no [{}] secton".format(warning, + ini_filename, + section)) + continue + + # Check for valid features names. + for name, value in config.items(section): + if name not in valid_features: + print("{}: Unknown feature '{}' in '{}'".format(warning, + name, + ini_filename)) + continue + + if value: + # Get the first letter only. + ini_data[ini_filename][name] = value[0] + + # Print out the RST Driver Overview table from the ini file data. + outfile = open(output_filename, 'w') + num_cols = len(header_names) + + print_table_css(outfile, table_id) + print('.. table:: ' + table_name + '\n', file=outfile) + print_table_header(outfile, num_cols, header_names, title) + print_table_body(outfile, num_cols, ini_files, ini_data, default_features) + + +def print_table_header(outfile, num_cols, header_names, title): + """ Print the RST table header. The header names are vertical. """ + print_table_divider(outfile, num_cols) + + line = '' + for name in header_names: + line += ' ' + name[0] + + print_table_row(outfile, title, line) + + for i in range(1, len(header_names[0])): + line = '' + for name in header_names: + line += ' ' + name[i] + + print_table_row(outfile, '', line) + + print_table_divider(outfile, num_cols) + + +def print_table_body(outfile, num_cols, ini_files, ini_data, default_features): + """ Print out the body of the table. Each row is a NIC feature. """ + + for feature, _ in default_features: + line = '' + + for ini_filename in ini_files: + line += ' ' + ini_data[ini_filename][feature] + + print_table_row(outfile, feature, line) + + print_table_divider(outfile, num_cols) + + +def print_table_row(outfile, feature, line): + """ Print a single row of the table with fixed formatting. """ + line = line.rstrip() + print(' {:<{}}{}'.format(feature, feature_str_len, line), file=outfile) + + +def print_table_divider(outfile, num_cols): + """ Print the table divider line. """ + line = ' ' + column_dividers = ['='] * num_cols + line += ' '.join(column_dividers) + + feature = '=' * feature_str_len + + print_table_row(outfile, feature, line) + + +def print_table_css(outfile, table_id): + template = """ +.. raw:: html + + +""" + print(template.replace("idx", "id%d" % (table_id)), file=outfile) + + +def setup(app): + table_file = dirname(__file__) + '/nics/overview_table.txt' + generate_overview_table(table_file, 1, + 'Features', + 'Features availability in networking drivers', + 'Feature') + table_file = dirname(__file__) + '/cryptodevs/overview_feature_table.txt' + generate_overview_table(table_file, 1, + 'Features', + 'Features availability in crypto drivers', + 'Feature') + table_file = dirname(__file__) + '/cryptodevs/overview_cipher_table.txt' + generate_overview_table(table_file, 2, + 'Cipher', + 'Cipher algorithms in crypto drivers', + 'Cipher algorithm') + table_file = dirname(__file__) + '/cryptodevs/overview_auth_table.txt' + generate_overview_table(table_file, 3, + 'Auth', + 'Authentication algorithms in crypto drivers', + 'Authentication algorithm') + table_file = dirname(__file__) + '/cryptodevs/overview_aead_table.txt' + generate_overview_table(table_file, 4, + 'AEAD', + 'AEAD algorithms in crypto drivers', + 'AEAD algorithm') + table_file = dirname(__file__) + '/cryptodevs/overview_asym_table.txt' + generate_overview_table(table_file, 5, + 'Asymmetric', + 'Asymmetric algorithms in crypto drivers', + 'Asymmetric algorithm') + table_file = dirname(__file__) + '/compressdevs/overview_feature_table.txt' + generate_overview_table(table_file, 1, + 'Features', + 'Features availability in compression drivers', + 'Feature') + table_file = dirname(__file__) + '/vdpadevs/overview_feature_table.txt' + generate_overview_table(table_file, 1, + 'Features', + 'Features availability in vDPA drivers', + 'Feature') + table_file = dirname(__file__) + '/bbdevs/overview_feature_table.txt' + generate_overview_table(table_file, 1, + 'Features', + 'Features availability in bbdev drivers', + 'Feature') + + if LooseVersion(sphinx_version) < LooseVersion('1.3.1'): + print('Upgrade sphinx to version >= 1.3.1 for ' + 'improved Figure/Table number handling.') + # Add a role to handle :numref: references. + app.add_role('numref', numref_role) + # Process the numref references once the doctree has been created. + app.connect('doctree-resolved', process_numref) + + try: + # New function in sphinx 1.8 + app.add_css_file('css/custom.css') + except: + app.add_stylesheet('css/custom.css') diff --git a/src/spdk/dpdk/doc/guides/contributing/abi_policy.rst b/src/spdk/dpdk/doc/guides/contributing/abi_policy.rst new file mode 100644 index 000000000..ee17ccb20 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/abi_policy.rst @@ -0,0 +1,337 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2019 The DPDK contributors + +ABI Policy +========== + +Description +----------- + +This document details the management policy that ensures the long-term stability +of the DPDK ABI and API. + +General Guidelines +------------------ + +#. Major ABI versions are declared no more frequently than yearly. Compatibility + with the major ABI version is mandatory in subsequent releases until a new + major ABI version is declared. +#. Major ABI versions are usually but not always declared aligned with a + :ref:`LTS release `. +#. The ABI version is managed at a project level in DPDK, and is reflected in + all non-experimental :ref:`library's soname `. +#. The ABI should be preserved and not changed lightly. ABI changes must follow + the outlined :ref:`deprecation process `. +#. The addition of symbols is generally not problematic. The modification of + symbols is managed with :ref:`ABI Versioning `. +#. The removal of symbols is considered an :ref:`ABI breakage `, + once approved these will form part of the next ABI version. +#. Libraries or APIs marked as :ref:`experimental ` may + change without constraint, as they are not considered part of an ABI version. + Experimental libraries have the major ABI version ``0``. +#. Updates to the :ref:`minimum hardware requirements `, which drop + support for hardware which was previously supported, should be treated as an + ABI change. + +.. note:: + + In 2019, the DPDK community stated its intention to move to ABI stable + releases, over a number of release cycles. This change begins with + maintaining ABI stability through one year of DPDK releases starting from + DPDK 19.11. This policy will be reviewed in 2020, with intention of + lengthening the stability period. Additional implementation detail can be + found in the :ref:`release notes <20_02_abi_changes>`. + +What is an ABI? +~~~~~~~~~~~~~~~ + +An ABI (Application Binary Interface) is the set of runtime interfaces exposed +by a library. It is similar to an API (Application Programming Interface) but +is the result of compilation. It is also effectively cloned when applications +link to dynamic libraries. That is to say when an application is compiled to +link against dynamic libraries, it is assumed that the ABI remains constant +between the time the application is compiled/linked, and the time that it runs. +Therefore, in the case of dynamic linking, it is critical that an ABI is +preserved, or (when modified), done in such a way that the application is unable +to behave improperly or in an unexpected fashion. + +.. _figure_what_is_an_abi: + +.. figure:: img/what_is_an_abi.* + + Illustration of DPDK API and ABI. + + +What is an ABI version? +~~~~~~~~~~~~~~~~~~~~~~~ + +An ABI version is an instance of a library's ABI at a specific release. Certain +releases are considered to be milestone releases, the yearly LTS release for +example. The ABI of a milestone release may be declared as a 'major ABI +version', where this ABI version is then supported for some number of subsequent +releases and is annotated in the library's :ref:`soname`. + +ABI version support in subsequent releases facilitates application upgrades, by +enabling applications built against the milestone release to upgrade to +subsequent releases of a library without a rebuild. + +More details on major ABI version can be found in the :ref:`ABI versioning +` guide. + +The DPDK ABI policy +------------------- + +A new major ABI version is declared no more frequently than yearly, with +declarations usually aligning with a LTS release, e.g. ABI 20 for DPDK 19.11. +Compatibility with the major ABI version is then mandatory in subsequent +releases until the next major ABI version is declared, e.g. ABI 21 for DPDK +20.11. + +At the declaration of a major ABI version, major version numbers encoded in +libraries' sonames are bumped to indicate the new version, with the minor +version reset to ``0``. An example would be ``librte_eal.so.20.3`` would become +``librte_eal.so.21.0``. + +The ABI may then change multiple times, without warning, between the last major +ABI version increment and the HEAD label of the git tree, with the condition +that ABI compatibility with the major ABI version is preserved and therefore +sonames do not change. + +Minor versions are incremented to indicate the release of a new ABI compatible +DPDK release, typically the DPDK quarterly releases. An example of this, might +be that ``librte_eal.so.20.1`` would indicate the first ABI compatible DPDK +release, following the declaration of the new major ABI version ``20``. + +An ABI version is supported in all new releases until the next major ABI version +is declared. When changing the major ABI version, the release notes will detail +all ABI changes. + +.. _figure_abi_stability_policy: + +.. figure:: img/abi_stability_policy.* + + Mapping of new ABI versions and ABI version compatibility to DPDK + releases. + +.. _abi_changes: + +ABI Changes +~~~~~~~~~~~ + +The ABI may still change after the declaration of a major ABI version, that is +new APIs may be still added or existing APIs may be modified. + +.. Warning:: + + Note that, this policy details the method by which the ABI may be changed, + with due regard to preserving compatibility and observing deprecation + notices. This process however should not be undertaken lightly, as a general + rule ABI stability is extremely important for downstream consumers of DPDK. + The API should only be changed for significant reasons, such as performance + enhancements. API breakages due to changes such as reorganizing public + structure fields for aesthetic or readability purposes should be avoided. + +The requirements for changing the ABI are: + +#. At least 3 acknowledgments of the need to do so must be made on the + dpdk.org mailing list. + + - The acknowledgment of the maintainer of the component is mandatory, or if + no maintainer is available for the component, the tree/sub-tree maintainer + for that component must acknowledge the ABI change instead. + + - The acknowledgment of three members of the technical board, as delegates + of the `technical board `_ acknowledging + the need for the ABI change, is also mandatory. + + - It is also recommended that acknowledgments from different "areas of + interest" be sought for each deprecation, for example: from NIC vendors, + CPU vendors, end-users, etc. + +#. Backward compatibility with the major ABI version must be maintained through + :ref:`abi_versioning`, with :ref:`forward-only ` compatibility + offered for any ABI changes that are indicated to be part of the next ABI + version. + + - In situations where backward compatibility is not possible, read the + section on :ref:`abi_breakages`. + + - No backward or forward compatibility is offered for API changes marked as + ``experimental``, as described in the section on :ref:`Experimental APIs + and Libraries `. + + - In situations in which an ``experimental`` symbol has been stable for some + time. When promoting the symbol to become part of the next ABI version, the + maintainer may choose to provide an alias to the ``experimental`` tag, so + as not to break consuming applications. + +#. If a newly proposed API functionally replaces an existing one, when the new + API becomes non-experimental, then the old one is marked with + ``__rte_deprecated``. + + - The depreciated API should follow the notification process to be removed, + see :ref:`deprecation_notices`. + + - At the declaration of the next major ABI version, those ABI changes then + become a formal part of the new ABI and the requirement to preserve ABI + compatibility with the last major ABI version is then dropped. + + - The responsibility for removing redundant ABI compatibility code rests + with the original contributor of the ABI changes, failing that, then with + the contributor's company and then finally with the maintainer. + +.. _forward-only: + +.. Note:: + + Note that forward-only compatibility is offered for those changes made + between major ABI versions. As a library's soname can only describe + compatibility with the last major ABI version, until the next major ABI + version is declared, these changes therefore cannot be resolved as a runtime + dependency through the soname. Therefore any application wishing to make use + of these ABI changes can only ensure that its runtime dependencies are met + through Operating System package versioning. + +.. _hw_rqmts: + +.. Note:: + + Updates to the minimum hardware requirements, which drop support for hardware + which was previously supported, should be treated as an ABI change, and + follow the relevant deprecation policy procedures as above: 3 acks, technical + board approval and announcement at least one release in advance. + +.. _abi_breakages: + +ABI Breakages +~~~~~~~~~~~~~ + +For those ABI changes that are too significant to reasonably maintain multiple +symbol versions, there is an amended process. In these cases, ABIs may be +updated without the requirement of backward compatibility being provided. These +changes must follow the same process :ref:`described above ` as non-breaking +changes, however with the following additional requirements: + +#. ABI breaking changes (including an alternative map file) can be included with + deprecation notice, in wrapped way by the ``RTE_NEXT_ABI`` option, to provide + more details about oncoming changes. ``RTE_NEXT_ABI`` wrapper will be removed + at the declaration of the next major ABI version. + +#. Once approved, and after the deprecation notice has been observed these + changes will form part of the next declared major ABI version. + +Examples of ABI Changes +~~~~~~~~~~~~~~~~~~~~~~~ + +The following are examples of allowable ABI changes occurring between +declarations of major ABI versions. + +* DPDK 19.11 release defines the function ``rte_foo()`` ; ``rte_foo()`` + is part of the major ABI version ``20``. + +* DPDK 20.02 release defines a new function ``rte_foo(uint8_t bar)``. + This is not a problem as long as the symbol ``rte_foo@DPDK20`` is + preserved through :ref:`abi_versioning`. + + - The new function may be marked with the ``__rte_experimental`` tag for a + number of releases, as described in the section :ref:`experimental_apis`. + + - Once ``rte_foo(uint8_t bar)`` becomes non-experimental, ``rte_foo()`` is + declared as ``__rte_deprecated`` and an deprecation notice is provided. + +* DPDK 19.11 is not re-released to include ``rte_foo(uint8_t bar)``, the new + version of ``rte_foo`` only exists from DPDK 20.02 onwards as described in the + :ref:`note on forward-only compatibility`. + +* DPDK 20.02 release defines the experimental function ``__rte_experimental + rte_baz()``. This function may or may not exist in the DPDK 20.05 release. + +* An application ``dPacket`` wishes to use ``rte_foo(uint8_t bar)``, before the + declaration of the DPDK ``21`` major ABI version. The application can only + ensure its runtime dependencies are met by specifying ``DPDK (>= 20.2)`` as + an explicit package dependency, as the soname can only indicate the + supported major ABI version. + +* At the release of DPDK 20.11, the function ``rte_foo(uint8_t bar)`` becomes + formally part of then new major ABI version DPDK ``21`` and ``rte_foo()`` may be + removed. + +.. _deprecation_notices: + +Examples of Deprecation Notices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following are some examples of ABI deprecation notices which would be +added to the Release Notes: + +* The Macro ``#RTE_FOO`` is deprecated and will be removed with ABI version + 21, to be replaced with the inline function ``rte_foo()``. + +* The function ``rte_mbuf_grok()`` has been updated to include a new parameter + in version 20.2. Backwards compatibility will be maintained for this function + until the release of the new DPDK major ABI version 21, in DPDK version + 20.11. + +* The members of ``struct rte_foo`` have been reorganized in DPDK 20.02 for + performance reasons. Existing binary applications will have backwards + compatibility in release 20.02, while newly built binaries will need to + reference the new structure variant ``struct rte_foo2``. Compatibility will be + removed in release 20.11, and all applications will require updating and + rebuilding to the new structure at that time, which will be renamed to the + original ``struct rte_foo``. + +* Significant ABI changes are planned for the ``librte_dostuff`` library. The + upcoming release 20.02 will not contain these changes, but release 20.11 will, + and no backwards compatibility is planned due to the extensive nature of + these changes. Binaries using this library built prior to ABI version 21 will + require updating and recompilation. + +.. _experimental_apis: + +Experimental +------------ + +APIs +~~~~ + +APIs marked as ``experimental`` are not considered part of an ABI version and +may change without warning at any time. Since changes to APIs are most likely +immediately after their introduction, as users begin to take advantage of those +new APIs and start finding issues with them, new DPDK APIs will be automatically +marked as ``experimental`` to allow for a period of stabilization before they +become part of a tracked ABI version. + +Note that marking an API as experimental is a multi step process. +To mark an API as experimental, the symbols which are desired to be exported +must be placed in an EXPERIMENTAL version block in the corresponding libraries' +version map script. +Secondly, the corresponding prototypes of those exported functions (in the +development header files), must be marked with the ``__rte_experimental`` tag +(see ``rte_compat.h``). +The DPDK build makefiles perform a check to ensure that the map file and the +C code reflect the same list of symbols. +This check can be circumvented by defining ``ALLOW_EXPERIMENTAL_API`` +during compilation in the corresponding library Makefile. + +In addition to tagging the code with ``__rte_experimental``, +the doxygen markup must also contain the EXPERIMENTAL string, +and the MAINTAINERS file should note the EXPERIMENTAL libraries. + +For removing the experimental tag associated with an API, deprecation notice is +not required. Though, an API should remain in experimental state for at least +one release. Thereafter, the normal process of posting patch for review to +mailing list can be followed. + +After the experimental tag has been formally removed, a tree/sub-tree maintainer +may choose to offer an alias to the experimental tag so as not to break +applications using the symbol. The alias is then dropped at the declaration of +next major ABI version. + +Libraries +~~~~~~~~~ + +Libraries marked as ``experimental`` are entirely not considered part of an ABI +version, and may change without warning at any time. Experimental libraries +always have a major ABI version of ``0`` to indicate they exist outside of +:ref:`abi_versioning` , with the minor version incremented with each ABI change +to library. diff --git a/src/spdk/dpdk/doc/guides/contributing/abi_versioning.rst b/src/spdk/dpdk/doc/guides/contributing/abi_versioning.rst new file mode 100644 index 000000000..e96fde340 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/abi_versioning.rst @@ -0,0 +1,697 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +.. _abi_versioning: + +ABI Versioning +============== + +This document details the mechanics of ABI version management in DPDK. + +.. _what_is_soname: + +What is a library's soname? +--------------------------- + +System libraries usually adopt the familiar major and minor version naming +convention, where major versions (e.g. ``librte_eal 20.x, 21.x``) are presumed +to be ABI incompatible with each other and minor versions (e.g. ``librte_eal +20.1, 20.2``) are presumed to be ABI compatible. A library's `soname +`_. is typically used to provide backward +compatibility information about a given library, describing the lowest common +denominator ABI supported by the library. The soname or logical name for the +library, is typically comprised of the library's name and major version e.g. +``librte_eal.so.20``. + +During an application's build process, a library's soname is noted as a runtime +dependency of the application. This information is then used by the `dynamic +linker `_ when resolving the +applications dependencies at runtime, to load a library supporting the correct +ABI version. The library loaded at runtime therefore, may be a minor revision +supporting the same major ABI version (e.g. ``librte_eal.20.2``), as the library +used to link the application (e.g ``librte_eal.20.0``). + +.. _major_abi_versions: + +Major ABI versions +------------------ + +An ABI version change to a given library, especially in core libraries such as +``librte_mbuf``, may cause an implicit ripple effect on the ABI of it's +consuming libraries, causing ABI breakages. There may however be no explicit +reason to bump a dependent library's ABI version, as there may have been no +obvious change to the dependent library's API, even though the library's ABI +compatibility will have been broken. + +This interdependence of DPDK libraries, means that ABI versioning of libraries +is more manageable at a project level, with all project libraries sharing a +**single ABI version**. In addition, the need to maintain a stable ABI for some +number of releases as described in the section :doc:`abi_policy`, means +that ABI version increments need to carefully planned and managed at a project +level. + +Major ABI versions are therefore declared typically aligned with an LTS release +and is then supported some number of subsequent releases, shared across all +libraries. This means that a single project level ABI version, reflected in all +individual library's soname, library filenames and associated version maps +persists over multiple releases. + +.. code-block:: none + + $ head ./lib/librte_acl/rte_acl_version.map + DPDK_20 { + global: + ... + + $ head ./lib/librte_eal/rte_eal_version.map + DPDK_20 { + global: + ... + +When an ABI change is made between major ABI versions to a given library, a new +section is added to that library's version map describing the impending new ABI +version, as described in the section :ref:`example_abi_macro_usage`. The +library's soname and filename however do not change, e.g. ``libacl.so.20``, as +ABI compatibility with the last major ABI version continues to be preserved for +that library. + +.. code-block:: none + + $ head ./lib/librte_acl/rte_acl_version.map + DPDK_20 { + global: + ... + + DPDK_21 { + global: + + } DPDK_20; + ... + + $ head ./lib/librte_eal/rte_eal_version.map + DPDK_20 { + global: + ... + +However when a new ABI version is declared, for example DPDK ``21``, old +depreciated functions may be safely removed at this point and the entire old +major ABI version removed, see the section :ref:`deprecating_entire_abi` on +how this may be done. + +.. code-block:: none + + $ head ./lib/librte_acl/rte_acl_version.map + DPDK_21 { + global: + ... + + $ head ./lib/librte_eal/rte_eal_version.map + DPDK_21 { + global: + ... + +At the same time, the major ABI version is changed atomically across all +libraries by incrementing the major version in the ABI_VERSION file. This is +done globally for all libraries that declare a stable ABI. For libraries marked +as EXPERIMENTAL, their major ABI version is always set to 0. + +Minor ABI versions +~~~~~~~~~~~~~~~~~~ + +Each non-LTS release will also increment minor ABI version, to permit multiple +DPDK versions being installed alongside each other. Both stable and +experimental ABI's are versioned using the global version file that is updated +at the start of each release cycle, and are managed at the project level. + +Versioning Macros +----------------- + +When a symbol is exported from a library to provide an API, it also provides a +calling convention (ABI) that is embodied in its name, return type and +arguments. Occasionally that function may need to change to accommodate new +functionality or behavior. When that occurs, it is may be required to allow for +backward compatibility for a time with older binaries that are dynamically +linked to the DPDK. + +To support backward compatibility the ``rte_function_versioning.h`` +header file provides macros to use when updating exported functions. These +macros are used in conjunction with the ``rte__version.map`` file for +a given library to allow multiple versions of a symbol to exist in a shared +library so that older binaries need not be immediately recompiled. + +The macros exported are: + +* ``VERSION_SYMBOL(b, e, n)``: Creates a symbol version table entry binding + versioned symbol ``b@DPDK_n`` to the internal function ``be``. + +* ``BIND_DEFAULT_SYMBOL(b, e, n)``: Creates a symbol version entry instructing + the linker to bind references to symbol ``b`` to the internal symbol + ``be``. + +* ``MAP_STATIC_SYMBOL(f, p)``: Declare the prototype ``f``, and map it to the + fully qualified function ``p``, so that if a symbol becomes versioned, it + can still be mapped back to the public symbol name. + +* ``__vsym``: Annotation to be used in a declaration of the internal symbol + ``be`` to signal that it is being used as an implementation of a particular + version of symbol ``b``. + +* ``VERSION_SYMBOL_EXPERIMENTAL(b, e)``: Creates a symbol version table entry + binding versioned symbol ``b@EXPERIMENTAL`` to the internal function ``be``. + The macro is used when a symbol matures to become part of the stable ABI, to + provide an alias to experimental for some time. + +.. _example_abi_macro_usage: + +Examples of ABI Macro use +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updating a public API +_____________________ + +Assume we have a function as follows + +.. code-block:: c + + /* + * Create an acl context object for apps to + * manipulate + */ + struct rte_acl_ctx * + rte_acl_create(const struct rte_acl_param *param) + { + ... + } + + +Assume that struct rte_acl_ctx is a private structure, and that a developer +wishes to enhance the acl api so that a debugging flag can be enabled on a +per-context basis. This requires an addition to the structure (which, being +private, is safe), but it also requires modifying the code as follows + +.. code-block:: c + + /* + * Create an acl context object for apps to + * manipulate + */ + struct rte_acl_ctx * + rte_acl_create(const struct rte_acl_param *param, int debug) + { + ... + } + + +Note also that, being a public function, the header file prototype must also be +changed, as must all the call sites, to reflect the new ABI footprint. We will +maintain previous ABI versions that are accessible only to previously compiled +binaries. + +The addition of a parameter to the function is ABI breaking as the function is +public, and existing application may use it in its current form. However, the +compatibility macros in DPDK allow a developer to use symbol versioning so that +multiple functions can be mapped to the same public symbol based on when an +application was linked to it. To see how this is done, we start with the +requisite libraries version map file. Initially the version map file for the acl +library looks like this + +.. code-block:: none + + DPDK_20 { + global: + + rte_acl_add_rules; + rte_acl_build; + rte_acl_classify; + rte_acl_classify_alg; + rte_acl_classify_scalar; + rte_acl_create; + rte_acl_dump; + rte_acl_find_existing; + rte_acl_free; + rte_acl_ipv4vlan_add_rules; + rte_acl_ipv4vlan_build; + rte_acl_list_dump; + rte_acl_reset; + rte_acl_reset_rules; + rte_acl_set_ctx_classify; + + local: *; + }; + +This file needs to be modified as follows + +.. code-block:: none + + DPDK_20 { + global: + + rte_acl_add_rules; + rte_acl_build; + rte_acl_classify; + rte_acl_classify_alg; + rte_acl_classify_scalar; + rte_acl_create; + rte_acl_dump; + rte_acl_find_existing; + rte_acl_free; + rte_acl_ipv4vlan_add_rules; + rte_acl_ipv4vlan_build; + rte_acl_list_dump; + rte_acl_reset; + rte_acl_reset_rules; + rte_acl_set_ctx_classify; + + local: *; + }; + + DPDK_21 { + global: + rte_acl_create; + + } DPDK_20; + +The addition of the new block tells the linker that a new version node +``DPDK_21`` is available, which contains the symbol rte_acl_create, and inherits +the symbols from the DPDK_20 node. This list is directly translated into a +list of exported symbols when DPDK is compiled as a shared library. + +Next, we need to specify in the code which function maps to the rte_acl_create +symbol at which versions. First, at the site of the initial symbol definition, +we need to update the function so that it is uniquely named, and not in conflict +with the public symbol name + +.. code-block:: c + + -struct rte_acl_ctx * + -rte_acl_create(const struct rte_acl_param *param) + +struct rte_acl_ctx * __vsym + +rte_acl_create_v20(const struct rte_acl_param *param) + { + size_t sz; + struct rte_acl_ctx *ctx; + ... + +Note that the base name of the symbol was kept intact, as this is conducive to +the macros used for versioning symbols and we have annotated the function as +``__vsym``, an implementation of a versioned symbol . That is our next step, +mapping this new symbol name to the initial symbol name at version node 20. +Immediately after the function, we add the VERSION_SYMBOL macro. + +.. code-block:: c + + #include + + ... + VERSION_SYMBOL(rte_acl_create, _v20, 20); + +Remembering to also add the rte_function_versioning.h header to the requisite c +file where these changes are being made. The macro instructs the linker to +create a new symbol ``rte_acl_create@DPDK_20``, which matches the symbol created +in older builds, but now points to the above newly named function. We have now +mapped the original rte_acl_create symbol to the original function (but with a +new name). + +Please see the section :ref:`Enabling versioning macros +` to enable this macro in the meson/ninja build. +Next, we need to create the new ``v21`` version of the symbol. We create a new +function name, with the ``v21`` suffix, and implement it appropriately. + +.. code-block:: c + + struct rte_acl_ctx * __vsym + rte_acl_create_v21(const struct rte_acl_param *param, int debug); + { + struct rte_acl_ctx *ctx = rte_acl_create_v20(param); + + ctx->debug = debug; + + return ctx; + } + +This code serves as our new API call. Its the same as our old call, but adds the +new parameter in place. Next we need to map this function to the new default +symbol ``rte_acl_create@DPDK_21``. To do this, immediately after the function, +we add the BIND_DEFAULT_SYMBOL macro. + +.. code-block:: c + + #include + + ... + BIND_DEFAULT_SYMBOL(rte_acl_create, _v21, 21); + +The macro instructs the linker to create the new default symbol +``rte_acl_create@DPDK_21``, which points to the above newly named function. + +We finally modify the prototype of the call in the public header file, +such that it contains both versions of the symbol and the public API. + +.. code-block:: c + + struct rte_acl_ctx * + rte_acl_create(const struct rte_acl_param *param); + + struct rte_acl_ctx * __vsym + rte_acl_create_v20(const struct rte_acl_param *param); + + struct rte_acl_ctx * __vsym + rte_acl_create_v21(const struct rte_acl_param *param, int debug); + + +And that's it, on the next shared library rebuild, there will be two versions of +rte_acl_create, an old DPDK_20 version, used by previously built applications, +and a new DPDK_21 version, used by future built applications. + +.. note:: + + **Before you leave**, please take care reviewing the sections on + :ref:`mapping static symbols `, + :ref:`enabling versioning macros `, + and :ref:`ABI deprecation `. + + +.. _mapping_static_symbols: + +Mapping static symbols +______________________ + +Now we've taken what was a public symbol, and duplicated it into two uniquely +and differently named symbols. We've then mapped each of those back to the +public symbol ``rte_acl_create`` with different version tags. This only applies +to dynamic linking, as static linking has no notion of versioning. That leaves +this code in a position of no longer having a symbol simply named +``rte_acl_create`` and a static build will fail on that missing symbol. + +To correct this, we can simply map a function of our choosing back to the public +symbol in the static build with the ``MAP_STATIC_SYMBOL`` macro. Generally the +assumption is that the most recent version of the symbol is the one you want to +map. So, back in the C file where, immediately after ``rte_acl_create_v21`` is +defined, we add this + + +.. code-block:: c + + struct rte_acl_ctx * __vsym + rte_acl_create_v21(const struct rte_acl_param *param, int debug) + { + ... + } + MAP_STATIC_SYMBOL(struct rte_acl_ctx *rte_acl_create(const struct rte_acl_param *param, int debug), rte_acl_create_v21); + +That tells the compiler that, when building a static library, any calls to the +symbol ``rte_acl_create`` should be linked to ``rte_acl_create_v21`` + + +.. _enabling_versioning_macros: + +Enabling versioning macros +__________________________ + +Finally, we need to indicate to the :doc:`meson/ninja build system +<../prog_guide/build-sdk-meson>` to enable versioning macros when building the +library or driver. In the libraries or driver where we have added symbol +versioning, in the ``meson.build`` file we add the following + +.. code-block:: none + + use_function_versioning = true + +at the start of the head of the file. This will indicate to the tool-chain to +enable the function version macros when building. There is no corresponding +directive required for the ``make`` build system. + + +.. _aliasing_experimental_symbols: + +Aliasing experimental symbols +_____________________________ + +In situations in which an ``experimental`` symbol has been stable for some time, +and it becomes a candidate for promotion to the stable ABI. At this time, when +promoting the symbol, maintainer may choose to provide an alias to the +``experimental`` symbol version, so as not to break consuming applications. + +The process to provide an alias to ``experimental`` is similar to that, of +:ref:`symbol versioning ` described above. +Assume we have an experimental function ``rte_acl_create`` as follows: + +.. code-block:: c + + #include + + /* + * Create an acl context object for apps to + * manipulate + */ + __rte_experimental + struct rte_acl_ctx * + rte_acl_create(const struct rte_acl_param *param) + { + ... + } + +In the map file, experimental symbols are listed as part of the ``EXPERIMENTAL`` +version node. + +.. code-block:: none + + DPDK_20 { + global: + ... + + local: *; + }; + + EXPERIMENTAL { + global: + + rte_acl_create; + }; + +When we promote the symbol to the stable ABI, we simply strip the +``__rte_experimental`` annotation from the function and move the symbol from the +``EXPERIMENTAL`` node, to the node of the next major ABI version as follow. + +.. code-block:: c + + /* + * Create an acl context object for apps to + * manipulate + */ + struct rte_acl_ctx * + rte_acl_create(const struct rte_acl_param *param) + { + ... + } + +We then update the map file, adding the symbol ``rte_acl_create`` +to the ``DPDK_21`` version node. + +.. code-block:: none + + DPDK_20 { + global: + ... + + local: *; + }; + + DPDK_21 { + global: + + rte_acl_create; + } DPDK_20; + + +Although there are strictly no guarantees or commitments associated with +:ref:`experimental symbols `, a maintainer may wish to offer +an alias to experimental. The process to add an alias to experimental, +is similar to the symbol versioning process. Assuming we have an experimental +symbol as before, we now add the symbol to both the ``EXPERIMENTAL`` +and ``DPDK_21`` version nodes. + +.. code-block:: c + + #include ; + #include + + /* + * Create an acl context object for apps to + * manipulate + */ + struct rte_acl_ctx * + rte_acl_create(const struct rte_acl_param *param) + { + ... + } + + __rte_experimental + struct rte_acl_ctx * + rte_acl_create_e(const struct rte_acl_param *param) + { + return rte_acl_create(param); + } + VERSION_SYMBOL_EXPERIMENTAL(rte_acl_create, _e); + + struct rte_acl_ctx * + rte_acl_create_v21(const struct rte_acl_param *param) + { + return rte_acl_create(param); + } + BIND_DEFAULT_SYMBOL(rte_acl_create, _v21, 21); + +In the map file, we map the symbol to both the ``EXPERIMENTAL`` +and ``DPDK_21`` version nodes. + +.. code-block:: none + + DPDK_20 { + global: + ... + + local: *; + }; + + DPDK_21 { + global: + + rte_acl_create; + } DPDK_20; + + EXPERIMENTAL { + global: + + rte_acl_create; + }; + +.. note:: + + Please note, similar to :ref:`symbol versioning `, + when aliasing to experimental you will also need to take care of + :ref:`mapping static symbols `. + + +.. _abi_deprecation: + +Deprecating part of a public API +________________________________ + +Lets assume that you've done the above updates, and in preparation for the next +major ABI version you decide you would like to retire the old version of the +function. After having gone through the ABI deprecation announcement process, +removal is easy. Start by removing the symbol from the requisite version map +file: + +.. code-block:: none + + DPDK_20 { + global: + + rte_acl_add_rules; + rte_acl_build; + rte_acl_classify; + rte_acl_classify_alg; + rte_acl_classify_scalar; + rte_acl_dump; + - rte_acl_create + rte_acl_find_existing; + rte_acl_free; + rte_acl_ipv4vlan_add_rules; + rte_acl_ipv4vlan_build; + rte_acl_list_dump; + rte_acl_reset; + rte_acl_reset_rules; + rte_acl_set_ctx_classify; + + local: *; + }; + + DPDK_21 { + global: + rte_acl_create; + } DPDK_20; + + +Next remove the corresponding versioned export. + +.. code-block:: c + + -VERSION_SYMBOL(rte_acl_create, _v20, 20); + + +Note that the internal function definition could also be removed, but its used +in our example by the newer version ``v21``, so we leave it in place and declare +it as static. This is a coding style choice. + +.. _deprecating_entire_abi: + +Deprecating an entire ABI version +_________________________________ + +While removing a symbol from an ABI may be useful, it is more practical to +remove an entire version node at once, as is typically done at the declaration +of a major ABI version. If a version node completely specifies an API, then +removing part of it, typically makes it incomplete. In those cases it is better +to remove the entire node. + +To do this, start by modifying the version map file, such that all symbols from +the node to be removed are merged into the next node in the map. + +In the case of our map above, it would transform to look as follows + +.. code-block:: none + + DPDK_21 { + global: + + rte_acl_add_rules; + rte_acl_build; + rte_acl_classify; + rte_acl_classify_alg; + rte_acl_classify_scalar; + rte_acl_dump; + rte_acl_create + rte_acl_find_existing; + rte_acl_free; + rte_acl_ipv4vlan_add_rules; + rte_acl_ipv4vlan_build; + rte_acl_list_dump; + rte_acl_reset; + rte_acl_reset_rules; + rte_acl_set_ctx_classify; + + local: *; + }; + +Then any uses of BIND_DEFAULT_SYMBOL that pointed to the old node should be +updated to point to the new version node in any header files for all affected +symbols. + +.. code-block:: c + + -BIND_DEFAULT_SYMBOL(rte_acl_create, _v20, 20); + +BIND_DEFAULT_SYMBOL(rte_acl_create, _v21, 21); + +Lastly, any VERSION_SYMBOL macros that point to the old version node should be +removed, taking care to keep, where need old code in place to support newer +versions of the symbol. + + +Running the ABI Validator +------------------------- + +The ``devtools`` directory in the DPDK source tree contains a utility program, +``check-abi.sh``, for validating the DPDK ABI based on the libabigail +`abidiff utility `_. + +The syntax of the ``check-abi.sh`` utility is:: + + devtools/check-abi.sh + +Where specifies the directory housing the reference build of DPDK, +and specifies the DPDK build directory to check the ABI of. + +The ABI compatibility is automatically verified when using a build script +from ``devtools``, if the variable ``DPDK_ABI_REF_VERSION`` is set with a tag, +as described in :ref:`ABI check recommendations`. diff --git a/src/spdk/dpdk/doc/guides/contributing/cheatsheet.rst b/src/spdk/dpdk/doc/guides/contributing/cheatsheet.rst new file mode 100644 index 000000000..0debd118d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/cheatsheet.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +Patch Cheatsheet +================ + +.. _figure_patch_cheatsheet: + +.. figure:: img/patch_cheatsheet.* + + Cheat sheet for submitting patches to dev@dpdk.org diff --git a/src/spdk/dpdk/doc/guides/contributing/coding_style.rst b/src/spdk/dpdk/doc/guides/contributing/coding_style.rst new file mode 100644 index 000000000..4efde93f6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/coding_style.rst @@ -0,0 +1,1003 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +.. _coding_style: + +DPDK Coding Style +================= + +Description +----------- + +This document specifies the preferred style for source files in the DPDK source tree. +It is based on the Linux Kernel coding guidelines and the FreeBSD 7.2 Kernel Developer's Manual (see man style(9)), but was heavily modified for the needs of the DPDK. + +General Guidelines +------------------ + +The rules and guidelines given in this document cannot cover every situation, so the following general guidelines should be used as a fallback: + +* The code style should be consistent within each individual file. +* In the case of creating new files, the style should be consistent within each file in a given directory or module. +* The primary reason for coding standards is to increase code readability and comprehensibility, therefore always use whatever option will make the code easiest to read. + +Line length is recommended to be not more than 80 characters, including comments. +[Tab stop size should be assumed to be 8-characters wide]. + +.. note:: + + The above is recommendation, and not a hard limit. + However, it is expected that the recommendations should be followed in all but the rarest situations. + +C Comment Style +--------------- + +Usual Comments +~~~~~~~~~~~~~~ + +These comments should be used in normal cases. +To document a public API, a doxygen-like format must be used: refer to :ref:`doxygen_guidelines`. + +.. code-block:: c + + /* + * VERY important single-line comments look like this. + */ + + /* Most single-line comments look like this. */ + + /* + * Multi-line comments look like this. Make them real sentences. Fill + * them so they look like real paragraphs. + */ + +License Header +~~~~~~~~~~~~~~ + +Each file should begin with a special comment containing the appropriate copyright and license for the file. +Generally this is the BSD License, except for code for Linux Kernel modules. +After any copyright header, a blank line should be left before any other contents, e.g. include statements in a C file. + +C Preprocessor Directives +------------------------- + +Header Includes +~~~~~~~~~~~~~~~ + +In DPDK sources, the include files should be ordered as following: + +#. libc includes (system includes first) +#. DPDK EAL includes +#. DPDK misc libraries includes +#. application-specific includes + +Include files from the local application directory are included using quotes, while includes from other paths are included using angle brackets: "<>". + +Example: + +.. code-block:: c + + #include + #include + + #include + + #include + #include + + #include "application.h" + +Header File Guards +~~~~~~~~~~~~~~~~~~ + +Headers should be protected against multiple inclusion with the usual: + +.. code-block:: c + + #ifndef _FILE_H_ + #define _FILE_H_ + + /* Code */ + + #endif /* _FILE_H_ */ + + +Macros +~~~~~~ + +Do not ``#define`` or declare names except with the standard DPDK prefix: ``RTE_``. +This is to ensure there are no collisions with definitions in the application itself. + +The names of "unsafe" macros (ones that have side effects), and the names of macros for manifest constants, are all in uppercase. + +The expansions of expression-like macros are either a single token or have outer parentheses. +If a macro is an inline expansion of a function, the function name is all in lowercase and the macro has the same name all in uppercase. +If the macro encapsulates a compound statement, enclose it in a do-while loop, so that it can be used safely in if statements. +Any final statement-terminating semicolon should be supplied by the macro invocation rather than the macro, to make parsing easier for pretty-printers and editors. + +For example: + +.. code-block:: c + + #define MACRO(x, y) do { \ + variable = (x) + (y); \ + (y) += 2; \ + } while(0) + +.. note:: + + Wherever possible, enums and inline functions should be preferred to macros, since they provide additional degrees of type-safety and can allow compilers to emit extra warnings about unsafe code. + +Conditional Compilation +~~~~~~~~~~~~~~~~~~~~~~~ + +* When code is conditionally compiled using ``#ifdef`` or ``#if``, a comment may be added following the matching + ``#endif`` or ``#else`` to permit the reader to easily discern where conditionally compiled code regions end. +* This comment should be used only for (subjectively) long regions, regions greater than 20 lines, or where a series of nested ``#ifdef``'s may be confusing to the reader. + Exceptions may be made for cases where code is conditionally not compiled for the purposes of lint(1), or other tools, even though the uncompiled region may be small. +* The comment should be separated from the ``#endif`` or ``#else`` by a single space. +* For short conditionally compiled regions, a closing comment should not be used. +* The comment for ``#endif`` should match the expression used in the corresponding ``#if`` or ``#ifdef``. +* The comment for ``#else`` and ``#elif`` should match the inverse of the expression(s) used in the preceding ``#if`` and/or ``#elif`` statements. +* In the comments, the subexpression ``defined(FOO)`` is abbreviated as "FOO". + For the purposes of comments, ``#ifndef FOO`` is treated as ``#if !defined(FOO)``. + +.. code-block:: c + + #ifdef KTRACE + #include + #endif + + #ifdef COMPAT_43 + /* A large region here, or other conditional code. */ + #else /* !COMPAT_43 */ + /* Or here. */ + #endif /* COMPAT_43 */ + + #ifndef COMPAT_43 + /* Yet another large region here, or other conditional code. */ + #else /* COMPAT_43 */ + /* Or here. */ + #endif /* !COMPAT_43 */ + +.. note:: + + Conditional compilation should be used only when absolutely necessary, as it increases the number of target binaries that need to be built and tested. + +C Types +------- + +Integers +~~~~~~~~ + +For fixed/minimum-size integer values, the project uses the form uintXX_t (from stdint.h) instead of older BSD-style integer identifiers of the form u_intXX_t. + +Enumerations +~~~~~~~~~~~~ + +* Enumeration values are all uppercase. + +.. code-block:: c + + enum enumtype { ONE, TWO } et; + +* Enum types should be used in preference to macros #defining a set of (sequential) values. +* Enum types should be prefixed with ``rte_`` and the elements by a suitable prefix [generally starting ``RTE__`` - where is a shortname for the enum type] to avoid namespace collisions. + +Bitfields +~~~~~~~~~ + +The developer should group bitfields that are included in the same integer, as follows: + +.. code-block:: c + + struct grehdr { + uint16_t rec:3, + srr:1, + seq:1, + key:1, + routing:1, + csum:1, + version:3, + reserved:4, + ack:1; + /* ... */ + } + +Variable Declarations +~~~~~~~~~~~~~~~~~~~~~ + +In declarations, do not put any whitespace between asterisks and adjacent tokens, except for tokens that are identifiers related to types. +(These identifiers are the names of basic types, type qualifiers, and typedef-names other than the one being declared.) +Separate these identifiers from asterisks using a single space. + +For example: + +.. code-block:: c + + int *x; /* no space after asterisk */ + int * const x; /* space after asterisk when using a type qualifier */ + +* All externally-visible variables should have an ``rte_`` prefix in the name to avoid namespace collisions. +* Do not use uppercase letters - either in the form of ALL_UPPERCASE, or CamelCase - in variable names. + Lower-case letters and underscores only. + +Structure Declarations +~~~~~~~~~~~~~~~~~~~~~~ + +* In general, when declaring variables in new structures, declare them sorted by use, then by size (largest to smallest), and then in alphabetical order. + Sorting by use means that commonly used variables are used together and that the structure layout makes logical sense. + Ordering by size then ensures that as little padding is added to the structure as possible. +* For existing structures, additions to structures should be added to the end so for backward compatibility reasons. +* Each structure element gets its own line. +* Try to make the structure readable by aligning the member names using spaces as shown below. +* Names following extremely long types, which therefore cannot be easily aligned with the rest, should be separated by a single space. + +.. code-block:: c + + struct foo { + struct foo *next; /* List of active foo. */ + struct mumble amumble; /* Comment for mumble. */ + int bar; /* Try to align the comments. */ + struct verylongtypename *baz; /* Won't fit with other members */ + }; + + +* Major structures should be declared at the top of the file in which they are used, or in separate header files if they are used in multiple source files. +* Use of the structures should be by separate variable declarations and those declarations must be extern if they are declared in a header file. +* Externally visible structure definitions should have the structure name prefixed by ``rte_`` to avoid namespace collisions. + +.. note:: + + Uses of ``bool`` in structures are not preferred as is wastes space and + it's also not clear as to what type size the bool is. A preferred use of + ``bool`` is mainly as a return type from functions that return true/false, + and maybe local variable functions. + + Ref: `LKML `_ + +Queues +~~~~~~ + +Use queue(3) macros rather than rolling your own lists, whenever possible. +Thus, the previous example would be better written: + +.. code-block:: c + + #include + + struct foo { + LIST_ENTRY(foo) link; /* Use queue macros for foo lists. */ + struct mumble amumble; /* Comment for mumble. */ + int bar; /* Try to align the comments. */ + struct verylongtypename *baz; /* Won't fit with other members */ + }; + LIST_HEAD(, foo) foohead; /* Head of global foo list. */ + + +DPDK also provides an optimized way to store elements in lockless rings. +This should be used in all data-path code, when there are several consumer and/or producers to avoid locking for concurrent access. + +Typedefs +~~~~~~~~ + +Avoid using typedefs for structure types. + +For example, use: + +.. code-block:: c + + struct my_struct_type { + /* ... */ + }; + + struct my_struct_type my_var; + + +rather than: + +.. code-block:: c + + typedef struct my_struct_type { + /* ... */ + } my_struct_type; + + my_struct_type my_var + + +Typedefs are problematic because they do not properly hide their underlying type; +for example, you need to know if the typedef is the structure itself, as shown above, or a pointer to the structure. +In addition, they must be declared exactly once, whereas an incomplete structure type can be mentioned as many times as necessary. +Typedefs are difficult to use in stand-alone header files. +The header that defines the typedef must be included before the header that uses it, or by the header that uses it (which causes namespace pollution), +or there must be a back-door mechanism for obtaining the typedef. + +Note that #defines used instead of typedefs also are problematic (since they do not propagate the pointer type correctly due to direct text replacement). +For example, ``#define pint int *`` does not work as expected, while ``typedef int *pint`` does work. +As stated when discussing macros, typedefs should be preferred to macros in cases like this. + +When convention requires a typedef; make its name match the struct tag. +Avoid typedefs ending in ``_t``, except as specified in Standard C or by POSIX. + +.. note:: + + It is recommended to use typedefs to define function pointer types, for reasons of code readability. + This is especially true when the function type is used as a parameter to another function. + +For example: + +.. code-block:: c + + /** + * Definition of a remote launch function. + */ + typedef int (lcore_function_t)(void *); + + /* launch a function of lcore_function_t type */ + int rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned slave_id); + + +C Indentation +------------- + +General +~~~~~~~ + +* Indentation is a hard tab, that is, a tab character, not a sequence of spaces, + +.. note:: + + Global whitespace rule in DPDK, use tabs for indentation, spaces for alignment. + +* Do not put any spaces before a tab for indentation. +* If you have to wrap a long statement, put the operator at the end of the line, and indent again. +* For control statements (if, while, etc.), continuation it is recommended that the next line be indented by two tabs, rather than one, + to prevent confusion as to whether the second line of the control statement forms part of the statement body or not. + Alternatively, the line continuation may use additional spaces to line up to an appropriately point on the preceding line, for example, to align to an opening brace. + +.. note:: + + As with all style guidelines, code should match style already in use in an existing file. + +.. code-block:: c + + while (really_long_variable_name_1 == really_long_variable_name_2 && + var3 == var4){ /* confusing to read as */ + x = y + z; /* control stmt body lines up with second line of */ + a = b + c; /* control statement itself if single indent used */ + } + + if (really_long_variable_name_1 == really_long_variable_name_2 && + var3 == var4){ /* two tabs used */ + x = y + z; /* statement body no longer lines up */ + a = b + c; + } + + z = a + really + long + statement + that + needs + + two + lines + gets + indented + on + the + + second + and + subsequent + lines; + + +* Do not add whitespace at the end of a line. + +* Do not add whitespace or a blank line at the end of a file. + + +Control Statements and Loops +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Include a space after keywords (if, while, for, return, switch). +* Do not use braces (``{`` and ``}``) for control statements with zero or just a single statement, unless that statement is more than a single line in which case the braces are permitted. + +.. code-block:: c + + for (p = buf; *p != '\0'; ++p) + ; /* nothing */ + for (;;) + stmt; + for (;;) { + z = a + really + long + statement + that + needs + + two + lines + gets + indented + on + the + + second + and + subsequent + lines; + } + for (;;) { + if (cond) + stmt; + } + if (val != NULL) + val = realloc(val, newsize); + + +* Parts of a for loop may be left empty. + +.. code-block:: c + + for (; cnt < 15; cnt++) { + stmt1; + stmt2; + } + +* Closing and opening braces go on the same line as the else keyword. +* Braces that are not necessary should be left out. + +.. code-block:: c + + if (test) + stmt; + else if (bar) { + stmt; + stmt; + } else + stmt; + + +Function Calls +~~~~~~~~~~~~~~ + +* Do not use spaces after function names. +* Commas should have a space after them. +* No spaces after ``(`` or ``[`` or preceding the ``]`` or ``)`` characters. + +.. code-block:: c + + error = function(a1, a2); + if (error != 0) + exit(error); + + +Operators +~~~~~~~~~ + +* Unary operators do not require spaces, binary operators do. +* Do not use parentheses unless they are required for precedence or unless the statement is confusing without them. + However, remember that other people may be more easily confused than you. + +Exit +~~~~ + +Exits should be 0 on success, or 1 on failure. + +.. code-block:: c + + exit(0); /* + * Avoid obvious comments such as + * "Exit 0 on success." + */ + } + +Local Variables +~~~~~~~~~~~~~~~ + +* Variables should be declared at the start of a block of code rather than in the middle. + The exception to this is when the variable is ``const`` in which case the declaration must be at the point of first use/assignment. +* When declaring variables in functions, multiple variables per line are OK. + However, if multiple declarations would cause the line to exceed a reasonable line length, begin a new set of declarations on the next line rather than using a line continuation. +* Be careful to not obfuscate the code by initializing variables in the declarations, only the last variable on a line should be initialized. + If multiple variables are to be initialized when defined, put one per line. +* Do not use function calls in initializers, except for ``const`` variables. + +.. code-block:: c + + int i = 0, j = 0, k = 0; /* bad, too many initializer */ + + char a = 0; /* OK, one variable per line with initializer */ + char b = 0; + + float x, y = 0.0; /* OK, only last variable has initializer */ + + +Casts and sizeof +~~~~~~~~~~~~~~~~ + +* Casts and sizeof statements are not followed by a space. +* Always write sizeof statements with parenthesis. + The redundant parenthesis rules do not apply to sizeof(var) instances. + +C Function Definition, Declaration and Use +------------------------------------------- + +Prototypes +~~~~~~~~~~ + +* It is recommended (and generally required by the compiler) that all non-static functions are prototyped somewhere. +* Functions local to one source module should be declared static, and should not be prototyped unless absolutely necessary. +* Functions used from other parts of code (external API) must be prototyped in the relevant include file. +* Function prototypes should be listed in a logical order, preferably alphabetical unless there is a compelling reason to use a different ordering. +* Functions that are used locally in more than one module go into a separate header file, for example, "extern.h". +* Do not use the ``__P`` macro. +* Functions that are part of an external API should be documented using Doxygen-like comments above declarations. See :ref:`doxygen_guidelines` for details. +* Functions that are part of the external API must have an ``rte_`` prefix on the function name. +* Do not use uppercase letters - either in the form of ALL_UPPERCASE, or CamelCase - in function names. Lower-case letters and underscores only. +* When prototyping functions, associate names with parameter types, for example: + +.. code-block:: c + + void function1(int fd); /* good */ + void function2(int); /* bad */ + +* Short function prototypes should be contained on a single line. + Longer prototypes, e.g. those with many parameters, can be split across multiple lines. + The second and subsequent lines should be further indented as for line statement continuations as described in the previous section. + +.. code-block:: c + + static char *function1(int _arg, const char *_arg2, + struct foo *_arg3, + struct bar *_arg4, + struct baz *_arg5); + static void usage(void); + +.. note:: + + Unlike function definitions, the function prototypes do not need to place the function return type on a separate line. + +Definitions +~~~~~~~~~~~ + +* The function type should be on a line by itself preceding the function. +* The opening brace of the function body should be on a line by itself. + +.. code-block:: c + + static char * + function(int a1, int a2, float fl, int a4) + { + + +* Do not declare functions inside other functions. + ANSI C states that such declarations have file scope regardless of the nesting of the declaration. + Hiding file declarations in what appears to be a local scope is undesirable and will elicit complaints from a good compiler. +* Old-style (K&R) function declaration should not be used, use ANSI function declarations instead as shown below. +* Long argument lists should be wrapped as described above in the function prototypes section. + +.. code-block:: c + + /* + * All major routines should have a comment briefly describing what + * they do. The comment before the "main" routine should describe + * what the program does. + */ + int + main(int argc, char *argv[]) + { + char *ep; + long num; + int ch; + +C Statement Style and Conventions +--------------------------------- + +NULL Pointers +~~~~~~~~~~~~~ + +* NULL is the preferred null pointer constant. + Use NULL instead of ``(type *)0`` or ``(type *)NULL``, except where the compiler does not know the destination type e.g. for variadic args to a function. +* Test pointers against NULL, for example, use: + +.. code-block:: c + + if (p == NULL) /* Good, compare pointer to NULL */ + + if (!p) /* Bad, using ! on pointer */ + + +* Do not use ! for tests unless it is a boolean, for example, use: + +.. code-block:: c + + if (*p == '\0') /* check character against (char)0 */ + +Return Value +~~~~~~~~~~~~ + +* Functions which create objects, or allocate memory, should return pointer types, and NULL on error. + The error type should be indicated may setting the variable ``rte_errno`` appropriately. +* Functions which work on bursts of packets, such as RX-like or TX-like functions, should return the number of packets handled. +* Other functions returning int should generally behave like system calls: + returning 0 on success and -1 on error, setting ``rte_errno`` to indicate the specific type of error. +* Where already standard in a given library, the alternative error approach may be used where the negative value is not -1 but is instead ``-errno`` if relevant, for example, ``-EINVAL``. + Note, however, to allow consistency across functions returning integer or pointer types, the previous approach is preferred for any new libraries. +* For functions where no error is possible, the function type should be ``void`` not ``int``. +* Routines returning ``void *`` should not have their return values cast to any pointer type. + (Typecasting can prevent the compiler from warning about missing prototypes as any implicit definition of a function returns int, + which, unlike ``void *``, needs a typecast to assign to a pointer variable.) + +.. note:: + + The above rule about not typecasting ``void *`` applies to malloc, as well as to DPDK functions. + +* Values in return statements should not be enclosed in parentheses. + +Logging and Errors +~~~~~~~~~~~~~~~~~~ + +In the DPDK environment, use the logging interface provided: + +.. code-block:: c + + /* register log types for this application */ + int my_logtype1 = rte_log_register("myapp.log1"); + int my_logtype2 = rte_log_register("myapp.log2"); + + /* set global log level to INFO */ + rte_log_set_global_level(RTE_LOG_INFO); + + /* only display messages higher than NOTICE for log2 (default + * is DEBUG) */ + rte_log_set_level(my_logtype2, RTE_LOG_NOTICE); + + /* enable all PMD logs (whose identifier string starts with "pmd.") */ + rte_log_set_level_pattern("pmd.*", RTE_LOG_DEBUG); + + /* log in debug level */ + rte_log_set_global_level(RTE_LOG_DEBUG); + RTE_LOG(DEBUG, my_logtype1, "this is a debug level message\n"); + RTE_LOG(INFO, my_logtype1, "this is a info level message\n"); + RTE_LOG(WARNING, my_logtype1, "this is a warning level message\n"); + RTE_LOG(WARNING, my_logtype2, "this is a debug level message (not displayed)\n"); + + /* log in info level */ + rte_log_set_global_level(RTE_LOG_INFO); + RTE_LOG(DEBUG, my_logtype1, "debug level message (not displayed)\n"); + +Branch Prediction +~~~~~~~~~~~~~~~~~ + +* When a test is done in a critical zone (called often or in a data path) the code can use the ``likely()`` and ``unlikely()`` macros to indicate the expected, or preferred fast path. + They are expanded as a compiler builtin and allow the developer to indicate if the branch is likely to be taken or not. Example: + +.. code-block:: c + + #include + if (likely(x > 1)) + do_stuff(); + +.. note:: + + The use of ``likely()`` and ``unlikely()`` should only be done in performance critical paths, + and only when there is a clearly preferred path, or a measured performance increase gained from doing so. + These macros should be avoided in non-performance-critical code. + +Static Variables and Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* All functions and variables that are local to a file must be declared as ``static`` because it can often help the compiler to do some optimizations (such as, inlining the code). +* Functions that should be inlined should to be declared as ``static inline`` and can be defined in a .c or a .h file. + +.. note:: + Static functions defined in a header file must be declared as ``static inline`` in order to prevent compiler warnings about the function being unused. + +Const Attribute +~~~~~~~~~~~~~~~ + +The ``const`` attribute should be used as often as possible when a variable is read-only. + +Inline ASM in C code +~~~~~~~~~~~~~~~~~~~~ + +The ``asm`` and ``volatile`` keywords do not have underscores. The AT&T syntax should be used. +Input and output operands should be named to avoid confusion, as shown in the following example: + +.. code-block:: c + + asm volatile("outb %[val], %[port]" + : : + [port] "dN" (port), + [val] "a" (val)); + +Control Statements +~~~~~~~~~~~~~~~~~~ + +* Forever loops are done with for statements, not while statements. +* Elements in a switch statement that cascade should have a FALLTHROUGH comment. For example: + +.. code-block:: c + + switch (ch) { /* Indent the switch. */ + case 'a': /* Don't indent the case. */ + aflag = 1; /* Indent case body one tab. */ + /* FALLTHROUGH */ + case 'b': + bflag = 1; + break; + case '?': + default: + usage(); + /* NOTREACHED */ + } + +Dynamic Logging +--------------- + +DPDK provides infrastructure to perform logging during runtime. This is very +useful for enabling debug output without recompilation. To enable or disable +logging of a particular topic, the ``--log-level`` parameter can be provided +to EAL, which will change the log level. DPDK code can register topics, +which allows the user to adjust the log verbosity for that specific topic. + +In general, the naming scheme is as follows: ``type.section.name`` + + * Type is the type of component, where ``lib``, ``pmd``, ``bus`` and ``user`` + are the common options. + * Section refers to a specific area, for example a poll-mode-driver for an + ethernet device would use ``pmd.net``, while an eventdev PMD uses + ``pmd.event``. + * The name identifies the individual item that the log applies to. + The name section must align with + the directory that the PMD code resides. See examples below for clarity. + +Examples: + + * The virtio network PMD in ``drivers/net/virtio`` uses ``pmd.net.virtio`` + * The eventdev software poll mode driver in ``drivers/event/sw`` uses ``pmd.event.sw`` + * The octeontx mempool driver in ``drivers/mempool/octeontx`` uses ``pmd.mempool.octeontx`` + * The DPDK hash library in ``lib/librte_hash`` uses ``lib.hash`` + +Specializations +~~~~~~~~~~~~~~~ + +In addition to the above logging topic, any PMD or library can further split +logging output by using "specializations". A specialization could be the +difference between initialization code, and logs of events that occur at runtime. + +An example could be the initialization log messages getting one +specialization, while another specialization handles mailbox command logging. +Each PMD, library or component can create as many specializations as required. + +A specialization looks like this: + + * Initialization output: ``type.section.name.init`` + * PF/VF mailbox output: ``type.section.name.mbox`` + +A real world example is the i40e poll mode driver which exposes two +specializations, one for initialization ``pmd.net.i40e.init`` and the other for +the remaining driver logs ``pmd.net.i40e.driver``. + +Note that specializations have no formatting rules, but please follow +a precedent if one exists. In order to see all current log topics and +specializations, run the ``app/test`` binary, and use the ``dump_log_types`` + +Python Code +----------- + +All Python code should work with Python 2.7+ and 3.2+ and be compliant with +`PEP8 (Style Guide for Python Code) `_. + +The ``pep8`` tool can be used for testing compliance with the guidelines. + +Integrating with the Build System +--------------------------------- + +DPDK supports being built in two different ways: + +* using ``make`` - or more specifically "GNU make", i.e. ``gmake`` on FreeBSD +* using the tools ``meson`` and ``ninja`` + +Any new library or driver to be integrated into DPDK should support being +built with both systems. While building using ``make`` is a legacy approach, and +most build-system enhancements are being done using ``meson`` and ``ninja`` +there are no plans at this time to deprecate the legacy ``make`` build system. + +Therefore all new component additions should include both a ``Makefile`` and a +``meson.build`` file, and should be added to the component lists in both the +``Makefile`` and ``meson.build`` files in the relevant top-level directory: +either ``lib`` directory or a ``driver`` subdirectory. + +Makefile Contents +~~~~~~~~~~~~~~~~~ + +The ``Makefile`` for the component should be of the following format, where +```` corresponds to the name of the library in question, e.g. hash, +lpm, etc. For drivers, the same format of Makefile is used. + +.. code-block:: none + + # pull in basic DPDK definitions, including whether library is to be + # built or not + include $(RTE_SDK)/mk/rte.vars.mk + + # library name + LIB = librte_.a + + # any library cflags needed. Generally add "-O3 $(WERROR_FLAGS)" + CFLAGS += -O3 + CFLAGS += $(WERROR_FLAGS) + + # the symbol version information for the library + EXPORT_MAP := rte__version.map + + # all source filenames are stored in SRCS-y + SRCS-$(CONFIG_RTE_LIBRTE_) += rte_.c + + # install includes + SYMLINK-$(CONFIG_RTE_LIBRTE_)-include += rte_.h + + # pull in rules to build the library + include $(RTE_SDK)/mk/rte.lib.mk + +Meson Build File Contents - Libraries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``meson.build`` file for a new DPDK library should be of the following basic +format. + +.. code-block:: python + + sources = files('file1.c', ...) + headers = files('file1.h', ...) + + +This will build based on a number of conventions and assumptions within the DPDK +itself, for example, that the library name is the same as the directory name in +which the files are stored. + +For a library ``meson.build`` file, there are number of variables which can be +set, some mandatory, others optional. The mandatory fields are: + +sources + **Default Value = []**. + This variable should list out the files to be compiled up to create the + library. Files must be specified using the meson ``files()`` function. + + +The optional fields are: + +build + **Default Value = true** + Used to optionally compile a library, based on its dependencies or + environment. When set to "false" the ``reason`` value, explained below, should + also be set to explain to the user why the component is not being built. + A simple example of use would be: + +.. code-block:: python + + if not is_linux + build = false + reason = 'only supported on Linux' + endif + + +cflags + **Default Value = [<-march/-mcpu flags>]**. + Used to specify any additional cflags that need to be passed to compile + the sources in the library. + +deps + **Default Value = ['eal']**. + Used to list the internal library dependencies of the library. It should + be assigned to using ``+=`` rather than overwriting using ``=``. The + dependencies should be specified as strings, each one giving the name of + a DPDK library, without the ``librte_`` prefix. Dependencies are handled + recursively, so specifying e.g. ``mempool``, will automatically also + make the library depend upon the mempool library's dependencies too - + ``ring`` and ``eal``. For libraries that only depend upon EAL, this + variable may be omitted from the ``meson.build`` file. For example: + +.. code-block:: python + + deps += ['ethdev'] + + +ext_deps + **Default Value = []**. + Used to specify external dependencies of this library. They should be + returned as dependency objects, as returned from the meson + ``dependency()`` or ``find_library()`` functions. Before returning + these, they should be checked to ensure the dependencies have been + found, and, if not, the ``build`` variable should be set to ``false``. + For example: + +.. code-block:: python + + my_dep = dependency('libX', required: 'false') + if my_dep.found() + ext_deps += my_dep + else + build = false + endif + + +headers + **Default Value = []**. + Used to return the list of header files for the library that should be + installed to $PREFIX/include when ``ninja install`` is run. As with + source files, these should be specified using the meson ``files()`` + function. + +includes: + **Default Value = []**. + Used to indicate any additional header file paths which should be + added to the header search path for other libs depending on this + library. EAL uses this so that other libraries building against it + can find the headers in subdirectories of the main EAL directory. The + base directory of each library is always given in the include path, + it does not need to be specified here. + +name + **Default Value = library name derived from the directory name**. + If a library's .so or .a file differs from that given in the directory + name, the name should be specified using this variable. In practice, + since the convention is that for a library called ``librte_xyz.so``, the + sources are stored in a directory ``lib/librte_xyz``, this value should + never be needed for new libraries. + +.. note:: + + The name value also provides the name used to find the function version + map file, as part of the build process, so if the directory name and + library names differ, the ``version.map`` file should be named + consistently with the library, not the directory + +objs + **Default Value = []**. + This variable can be used to pass to the library build some pre-built + objects that were compiled up as part of another target given in the + included library ``meson.build`` file. + +reason + **Default Value = ''**. + This variable should be used when a library is not to be built i.e. when + ``build`` is set to "false", to specify the reason why a library will not be + built. For missing dependencies this should be of the form + ``'missing dependency, "libname"'``. + +use_function_versioning + **Default Value = false**. + Specifies if the library in question has ABI versioned functions. If it + has, this value should be set to ensure that the C files are compiled + twice with suitable parameters for each of shared or static library + builds. + +Meson Build File Contents - Drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For drivers, the values are largely the same as for libraries. The variables +supported are: + +build + As above. + +cflags + As above. + +deps + As above. + +ext_deps + As above. + +includes + **Default Value = ** Some drivers include a base + directory for additional source files and headers, so we have this + variable to allow the headers from that base directory to be found when + compiling driver sources. Should be appended to using ``+=`` rather than + overwritten using ``=``. The values appended should be meson include + objects got using the ``include_directories()`` function. For example: + +.. code-block:: python + + includes += include_directories('base') + +name + As above, though note that each driver class can define it's own naming + scheme for the resulting ``.so`` files. + +objs + As above, generally used for the contents of the ``base`` directory. + +pkgconfig_extra_libs + **Default Value = []** + This variable is used to pass additional library link flags through to + the DPDK pkgconfig file generated, for example, to track any additional + libraries that may need to be linked into the build - especially when + using static libraries. Anything added here will be appended to the end + of the ``pkgconfig --libs`` output. + +reason + As above. + +sources [mandatory] + As above + +version + As above diff --git a/src/spdk/dpdk/doc/guides/contributing/design.rst b/src/spdk/dpdk/doc/guides/contributing/design.rst new file mode 100644 index 000000000..d3dd694b6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/design.rst @@ -0,0 +1,177 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +Design +====== + +Environment or Architecture-specific Sources +-------------------------------------------- + +In DPDK and DPDK applications, some code is specific to an architecture (i686, x86_64) or to an executive environment (freebsd or linux) and so on. +As far as is possible, all such instances of architecture or env-specific code should be provided via standard APIs in the EAL. + +By convention, a file is common if it is not located in a directory indicating that it is specific. +For instance, a file located in a subdir of "x86_64" directory is specific to this architecture. +A file located in a subdir of "linux" is specific to this execution environment. + +.. note:: + + Code in DPDK libraries and applications should be generic. + The correct location for architecture or executive environment specific code is in the EAL. + +When absolutely necessary, there are several ways to handle specific code: + +* Use a ``#ifdef`` with the CONFIG option in the C code. + This can be done when the differences are small and they can be embedded in the same C file: + + .. code-block:: c + + #ifdef RTE_ARCH_I686 + toto(); + #else + titi(); + #endif + +* Use the CONFIG option in the Makefile. This is done when the differences are more significant. + In this case, the code is split into two separate files that are architecture or environment specific. + This should only apply inside the EAL library. + +.. note:: + + As in the linux kernel, the ``CONFIG_`` prefix is not used in C code. + This is only needed in Makefiles or shell scripts. + +Per Architecture Sources +~~~~~~~~~~~~~~~~~~~~~~~~ + +The following config options can be used: + +* ``CONFIG_RTE_ARCH`` is a string that contains the name of the architecture. +* ``CONFIG_RTE_ARCH_I686``, ``CONFIG_RTE_ARCH_X86_64``, ``CONFIG_RTE_ARCH_X86_64_32`` or ``CONFIG_RTE_ARCH_PPC_64`` are defined only if we are building for those architectures. + +Per Execution Environment Sources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following config options can be used: + +* ``CONFIG_RTE_EXEC_ENV`` is a string that contains the name of the executive environment. +* ``CONFIG_RTE_EXEC_ENV_FREEBSD`` or ``CONFIG_RTE_EXEC_ENV_LINUX`` are defined only if we are building for this execution environment. + +Library Statistics +------------------ + +Description +~~~~~~~~~~~ + +This document describes the guidelines for DPDK library-level statistics counter +support. This includes guidelines for turning library statistics on and off and +requirements for preventing ABI changes when implementing statistics. + + +Mechanism to allow the application to turn library statistics on and off +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each library that maintains statistics counters should provide a single build +time flag that decides whether the statistics counter collection is enabled or +not. This flag should be exposed as a variable within the DPDK configuration +file. When this flag is set, all the counters supported by current library are +collected for all the instances of every object type provided by the library. +When this flag is cleared, none of the counters supported by the current library +are collected for any instance of any object type provided by the library: + +.. code-block:: console + + # DPDK file config/common_linux, config/common_freebsd, etc. + CONFIG_RTE__STATS_COLLECT=y/n + +The default value for this DPDK configuration file variable (either "yes" or +"no") is decided by each library. + + +Prevention of ABI changes due to library statistics support +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The layout of data structures and prototype of functions that are part of the +library API should not be affected by whether the collection of statistics +counters is turned on or off for the current library. In practical terms, this +means that space should always be allocated in the API data structures for +statistics counters and the statistics related API functions are always built +into the code, regardless of whether the statistics counter collection is turned +on or off for the current library. + +When the collection of statistics counters for the current library is turned +off, the counters retrieved through the statistics related API functions should +have a default value of zero. + + +Motivation to allow the application to turn library statistics on and off +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is highly recommended that each library provides statistics counters to allow +an application to monitor the library-level run-time events. Typical counters +are: number of packets received/dropped/transmitted, number of buffers +allocated/freed, number of occurrences for specific events, etc. + +However, the resources consumed for library-level statistics counter collection +have to be spent out of the application budget and the counters collected by +some libraries might not be relevant to the current application. In order to +avoid any unwanted waste of resources and/or performance impacts, the +application should decide at build time whether the collection of library-level +statistics counters should be turned on or off for each library individually. + +Library-level statistics counters can be relevant or not for specific +applications: + +* For Application A, counters maintained by Library X are always relevant and + the application needs to use them to implement certain features, such as traffic + accounting, logging, application-level statistics, etc. In this case, + the application requires that collection of statistics counters for Library X is + always turned on. + +* For Application B, counters maintained by Library X are only useful during the + application debug stage and are not relevant once debug phase is over. In this + case, the application may decide to turn on the collection of Library X + statistics counters during the debug phase and at a later stage turn them off. + +* For Application C, counters maintained by Library X are not relevant at all. + It might be that the application maintains its own set of statistics counters + that monitor a different set of run-time events (e.g. number of connection + requests, number of active users, etc). It might also be that the application + uses multiple libraries (Library X, Library Y, etc) and it is interested in the + statistics counters of Library Y, but not in those of Library X. In this case, + the application may decide to turn the collection of statistics counters off for + Library X and on for Library Y. + +The statistics collection consumes a certain amount of CPU resources (cycles, +cache bandwidth, memory bandwidth, etc) that depends on: + +* Number of libraries used by the current application that have statistics + counters collection turned on. + +* Number of statistics counters maintained by each library per object type + instance (e.g. per port, table, pipeline, thread, etc). + +* Number of instances created for each object type supported by each library. + +* Complexity of the statistics logic collection for each counter: when only + some occurrences of a specific event are valid, additional logic is typically + needed to decide whether the current occurrence of the event should be counted + or not. For example, in the event of packet reception, when only TCP packets + with destination port within a certain range should be recorded, conditional + branches are usually required. When processing a burst of packets that have been + validated for header integrity, counting the number of bits set in a bitmask + might be needed. + +PF and VF Considerations +------------------------ + +The primary goal of DPDK is to provide a userspace dataplane. Managing VFs from +a PF driver is a control plane feature and developers should generally rely on +the Linux Kernel for that. + +Developers should work with the Linux Kernel community to get the required +functionality upstream. PF functionality should only be added to DPDK for +testing and prototyping purposes while the kernel work is ongoing. It should +also be marked with an "EXPERIMENTAL" tag. If the functionality isn't +upstreamable then a case can be made to maintain the PF functionality in DPDK +without the EXPERIMENTAL tag. diff --git a/src/spdk/dpdk/doc/guides/contributing/documentation.rst b/src/spdk/dpdk/doc/guides/contributing/documentation.rst new file mode 100644 index 000000000..375ea64ba --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/documentation.rst @@ -0,0 +1,751 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +.. _doc_guidelines: + +DPDK Documentation Guidelines +============================= + +This document outlines the guidelines for writing the DPDK Guides and API documentation in RST and Doxygen format. + +It also explains the structure of the DPDK documentation and shows how to build the Html and PDF versions of the documents. + + +Structure of the Documentation +------------------------------ + +The DPDK source code repository contains input files to build the API documentation and User Guides. + +The main directories that contain files related to documentation are shown below:: + + lib + |-- librte_acl + |-- librte_cfgfile + |-- librte_cmdline + |-- librte_eal + | |-- ... + ... + doc + |-- api + +-- guides + |-- freebsd_gsg + |-- linux_gsg + |-- prog_guide + |-- sample_app_ug + |-- guidelines + |-- testpmd_app_ug + |-- rel_notes + |-- nics + |-- ... + + +The API documentation is built from `Doxygen `_ comments in the header files. +These files are mainly in the ``lib/librte_*`` directories although some of the Poll Mode Drivers in ``drivers/net`` +are also documented with Doxygen. + +The configuration files that are used to control the Doxygen output are in the ``doc/api`` directory. + +The user guides such as *The Programmers Guide* and the *FreeBSD* and *Linux Getting Started* Guides are generated +from RST markup text files using the `Sphinx `_ Documentation Generator. + +These files are included in the ``doc/guides/`` directory. +The output is controlled by the ``doc/guides/conf.py`` file. + + +Role of the Documentation +------------------------- + +The following items outline the roles of the different parts of the documentation and when they need to be updated or +added to by the developer. + +* **Release Notes** + + The Release Notes document which features have been added in the current and previous releases of DPDK and highlight + any known issues. + The Releases Notes also contain notifications of features that will change ABI compatibility in the next release. + + Developers should include updates to the Release Notes with patch sets that relate to any of the following sections: + + * New Features + * Resolved Issues (see below) + * Known Issues + * API Changes + * ABI Changes + * Shared Library Versions + + Resolved Issues should only include issues from previous releases that have been resolved in the current release. + Issues that are introduced and then fixed within a release cycle do not have to be included here. + + Refer to the Release Notes from the previous DPDK release for the correct format of each section. + + +* **API documentation** + + The API documentation explains how to use the public DPDK functions. + The `API index page `_ shows the generated API documentation with related groups of functions. + + The API documentation should be updated via Doxygen comments when new functions are added. + +* **Getting Started Guides** + + The Getting Started Guides show how to install and configure DPDK and how to run DPDK based applications on different OSes. + + A Getting Started Guide should be added when DPDK is ported to a new OS. + +* **The Programmers Guide** + + The Programmers Guide explains how the API components of DPDK such as the EAL, Memzone, Rings and the Hash Library work. + It also explains how some higher level functionality such as Packet Distributor, Packet Framework and KNI work. + It also shows the build system and explains how to add applications. + + The Programmers Guide should be expanded when new functionality is added to DPDK. + +* **App Guides** + + The app guides document the DPDK applications in the ``app`` directory such as ``testpmd``. + + The app guides should be updated if functionality is changed or added. + +* **Sample App Guides** + + The sample app guides document the DPDK example applications in the examples directory. + Generally they demonstrate a major feature such as L2 or L3 Forwarding, Multi Process or Power Management. + They explain the purpose of the sample application, how to run it and step through some of the code to explain the + major functionality. + + A new sample application should be accompanied by a new sample app guide. + The guide for the Skeleton Forwarding app is a good starting reference. + +* **Network Interface Controller Drivers** + + The NIC Drivers document explains the features of the individual Poll Mode Drivers, such as software requirements, + configuration and initialization. + + New documentation should be added for new Poll Mode Drivers. + +* **Guidelines** + + The guideline documents record community process, expectations and design directions. + + They can be extended, amended or discussed by submitting a patch and getting community approval. + + +Building the Documentation +-------------------------- + +Dependencies +~~~~~~~~~~~~ + + +The following dependencies must be installed to build the documentation: + +* Doxygen. + +* Sphinx (also called python-sphinx). + +* TexLive (at least TexLive-core and the extra Latex support). + +* Inkscape. + +`Doxygen`_ generates documentation from commented source code. +It can be installed as follows: + +.. code-block:: console + + # Ubuntu/Debian. + sudo apt-get -y install doxygen + + # Red Hat/Fedora. + sudo dnf -y install doxygen + +`Sphinx`_ is a Python documentation tool for converting RST files to Html or to PDF (via LaTeX). +For full support with figure and table captioning the latest version of Sphinx can be installed as follows: + +.. code-block:: console + + # Ubuntu/Debian. + sudo apt-get -y install python-pip + sudo pip install --upgrade sphinx + sudo pip install --upgrade sphinx_rtd_theme + + # Red Hat/Fedora. + sudo dnf -y install python-pip + sudo pip install --upgrade sphinx + sudo pip install --upgrade sphinx_rtd_theme + +For further information on getting started with Sphinx see the +`Sphinx Getting Started `_. + +.. Note:: + + To get full support for Figure and Table numbering it is best to install Sphinx 1.3.1 or later. + + +`Inkscape`_ is a vector based graphics program which is used to create SVG images and also to convert SVG images to PDF images. +It can be installed as follows: + +.. code-block:: console + + # Ubuntu/Debian. + sudo apt-get -y install inkscape + + # Red Hat/Fedora. + sudo dnf -y install inkscape + +`TexLive `_ is an installation package for Tex/LaTeX. +It is used to generate the PDF versions of the documentation. +The main required packages can be installed as follows: + +.. code-block:: console + + # Ubuntu/Debian. + sudo apt-get -y install texlive-latex-extra texlive-lang-greek + + # Red Hat/Fedora, selective install. + sudo dnf -y install texlive-collection-latexextra texlive-greek-fontenc + +`Latexmk `_ is a perl script +for running LaTeX for resolving cross references, +and it also runs auxiliary programs like bibtex, makeindex if necessary, and dvips. +It has also a number of other useful capabilities (see man 1 latexmk). + +.. code-block:: console + + # Ubuntu/Debian. + sudo apt-get -y install latexmk + + # Red Hat/Fedora. + sudo dnf -y install latexmk + + +Build commands +~~~~~~~~~~~~~~ + +The documentation is built using the standard DPDK build system. +Some examples are shown below: + +* Generate all the documentation targets:: + + make doc + +* Generate the Doxygen API documentation in Html:: + + make doc-api-html + +* Generate the guides documentation in Html:: + + make doc-guides-html + +* Generate the guides documentation in Pdf:: + + make doc-guides-pdf + +The output of these commands is generated in the ``build`` directory:: + + build/doc + |-- html + | |-- api + | +-- guides + | + +-- pdf + +-- guides + + +.. Note:: + + Make sure to fix any Sphinx or Doxygen warnings when adding or updating documentation. + +The documentation output files can be removed as follows:: + + make doc-clean + + +Document Guidelines +------------------- + +Here are some guidelines in relation to the style of the documentation: + +* Document the obvious as well as the obscure since it won't always be obvious to the reader. + For example an instruction like "Set up 64 2MB Hugepages" is better when followed by a sample commandline or a link to + the appropriate section of the documentation. + +* Use American English spellings throughout. + This can be checked using the ``aspell`` utility:: + + aspell --lang=en_US --check doc/guides/sample_app_ug/mydoc.rst + + +RST Guidelines +-------------- + +The RST (reStructuredText) format is a plain text markup format that can be converted to Html, PDF or other formats. +It is most closely associated with Python but it can be used to document any language. +It is used in DPDK to document everything apart from the API. + +The Sphinx documentation contains a very useful `RST Primer `_ which is a +good place to learn the minimal set of syntax required to format a document. + +The official `reStructuredText `_ website contains the specification for the +RST format and also examples of how to use it. +However, for most developers the RST Primer is a better resource. + +The most common guidelines for writing RST text are detailed in the +`Documenting Python `_ guidelines. +The additional guidelines below reiterate or expand upon those guidelines. + + +Line Length +~~~~~~~~~~~ + +* Lines in sentences should be less than 80 characters and wrapped at + words. Multiple sentences which are not separated by a blank line are joined + automatically into paragraphs. + +* Lines in literal blocks **must** be less than 80 characters since + they are not wrapped by the document formatters and can exceed the page width + in PDF documents. + + Long literal command lines can be shown wrapped with backslashes. For + example:: + + testpmd -l 2-3 -n 4 \ + --vdev=virtio_user0,path=/dev/vhost-net,queues=2,queue_size=1024 \ + -- -i --tx-offloads=0x0000002c --enable-lro --txq=2 --rxq=2 \ + --txd=1024 --rxd=1024 + + +Whitespace +~~~~~~~~~~ + +* Standard RST indentation is 3 spaces. + Code can be indented 4 spaces, especially if it is copied from source files. + +* No tabs. + Convert tabs in embedded code to 4 or 8 spaces. + +* No trailing whitespace. + +* Add 2 blank lines before each section header. + +* Add 1 blank line after each section header. + +* Add 1 blank line between each line of a list. + + +Section Headers +~~~~~~~~~~~~~~~ + +* Section headers should use the following underline formats:: + + Level 1 Heading + =============== + + + Level 2 Heading + --------------- + + + Level 3 Heading + ~~~~~~~~~~~~~~~ + + + Level 4 Heading + ^^^^^^^^^^^^^^^ + + +* Level 4 headings should be used sparingly. + +* The underlines should match the length of the text. + +* In general, the heading should be less than 80 characters, for conciseness. + +* As noted above: + + * Add 2 blank lines before each section header. + + * Add 1 blank line after each section header. + + +Lists +~~~~~ + +* Bullet lists should be formatted with a leading ``*`` as follows:: + + * Item one. + + * Item two is a long line that is wrapped and then indented to match + the start of the previous line. + + * One space character between the bullet and the text is preferred. + +* Numbered lists can be formatted with a leading number but the preference is to use ``#.`` which will give automatic numbering. + This is more convenient when adding or removing items:: + + #. Item one. + + #. Item two is a long line that is wrapped and then indented to match + the start of the previous line. + + #. Item three. + +* Definition lists can be written with or without a bullet:: + + * Item one. + + Some text about item one. + + * Item two. + + Some text about item two. + +* All lists, and sub-lists, must be separated from the preceding text by a blank line. + This is a syntax requirement. + +* All list items should be separated by a blank line for readability. + + +Code and Literal block sections +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Inline text that is required to be rendered with a fixed width font should be enclosed in backquotes like this: + \`\`text\`\`, so that it appears like this: ``text``. + +* Fixed width, literal blocks of texts should be indented at least 3 spaces and prefixed with ``::`` like this:: + + Here is some fixed width text:: + + 0x0001 0x0001 0x00FF 0x00FF + +* It is also possible to specify an encoding for a literal block using the ``.. code-block::`` directive so that syntax + highlighting can be applied. + Examples of supported highlighting are:: + + .. code-block:: console + .. code-block:: c + .. code-block:: python + .. code-block:: diff + .. code-block:: none + + That can be applied as follows:: + + .. code-block:: c + + #include + + int main() { + + printf("Hello World\n"); + + return 0; + } + + Which would be rendered as: + + .. code-block:: c + + #include + + int main() { + + printf("Hello World\n"); + + return 0; + } + + +* The default encoding for a literal block using the simplified ``::`` + directive is ``none``. + +* Lines in literal blocks must be less than 80 characters since they can exceed the page width when converted to PDF documentation. + For long literal lines that exceed that limit try to wrap the text at sensible locations. + For example a long command line could be documented like this and still work if copied directly from the docs:: + + build/app/testpmd -l 0-2 -n3 --vdev=net_pcap0,iface=eth0 \ + --vdev=net_pcap1,iface=eth1 \ + -- -i --nb-cores=2 --nb-ports=2 \ + --total-num-mbufs=2048 + +* Long lines that cannot be wrapped, such as application output, should be truncated to be less than 80 characters. + + +Images +~~~~~~ + +* All images should be in SVG scalar graphics format. + They should be true SVG XML files and should not include binary formats embedded in a SVG wrapper. + +* The DPDK documentation contains some legacy images in PNG format. + These will be converted to SVG in time. + +* `Inkscape `_ is the recommended graphics editor for creating the images. + Use some of the older images in ``doc/guides/prog_guide/img/`` as a template, for example ``mbuf1.svg`` + or ``ring-enqueue1.svg``. + +* The SVG images should include a copyright notice, as an XML comment. + +* Images in the documentation should be formatted as follows: + + * The image should be preceded by a label in the format ``.. _figure_XXXX:`` with a leading underscore and + where ``XXXX`` is a unique descriptive name. + + * Images should be included using the ``.. figure::`` directive and the file type should be set to ``*`` (not ``.svg``). + This allows the format of the image to be changed if required, without updating the documentation. + + * Images must have a caption as part of the ``.. figure::`` directive. + +* Here is an example of the previous three guidelines:: + + .. _figure_mempool: + + .. figure:: img/mempool.* + + A mempool in memory with its associated ring. + +.. _mock_label: + +* Images can then be linked to using the ``:numref:`` directive:: + + The mempool layout is shown in :numref:`figure_mempool`. + + This would be rendered as: *The mempool layout is shown in* :ref:`Fig 6.3 `. + + **Note**: The ``:numref:`` directive requires Sphinx 1.3.1 or later. + With earlier versions it will still be rendered as a link but won't have an automatically generated number. + +* The caption of the image can be generated, with a link, using the ``:ref:`` directive:: + + :ref:`figure_mempool` + + This would be rendered as: *A mempool in memory with its associated ring.* + +Tables +~~~~~~ + +* RST tables should be used sparingly. + They are hard to format and to edit, they are often rendered incorrectly in PDF format, and the same information + can usually be shown just as clearly with a definition or bullet list. + +* Tables in the documentation should be formatted as follows: + + * The table should be preceded by a label in the format ``.. _table_XXXX:`` with a leading underscore and where + ``XXXX`` is a unique descriptive name. + + * Tables should be included using the ``.. table::`` directive and must have a caption. + +* Here is an example of the previous two guidelines:: + + .. _table_qos_pipes: + + .. table:: Sample configuration for QOS pipes. + + +----------+----------+----------+ + | Header 1 | Header 2 | Header 3 | + | | | | + +==========+==========+==========+ + | Text | Text | Text | + +----------+----------+----------+ + | ... | ... | ... | + +----------+----------+----------+ + +* Tables can be linked to using the ``:numref:`` and ``:ref:`` directives, as shown in the previous section for images. + For example:: + + The QOS configuration is shown in :numref:`table_qos_pipes`. + +* Tables should not include merged cells since they are not supported by the PDF renderer. + + +.. _links: + +Hyperlinks +~~~~~~~~~~ + +* Links to external websites can be plain URLs. + The following is rendered as https://dpdk.org:: + + https://dpdk.org + +* They can contain alternative text. + The following is rendered as `Check out DPDK `_:: + + `Check out DPDK `_ + +* An internal link can be generated by placing labels in the document with the format ``.. _label_name``. + +* The following links to the top of this section: :ref:`links`:: + + .. _links: + + Hyperlinks + ~~~~~~~~~~ + + * The following links to the top of this section: :ref:`links`: + +.. Note:: + + The label must have a leading underscore but the reference to it must omit it. + This is a frequent cause of errors and warnings. + +* The use of a label is preferred since it works across files and will still work if the header text changes. + + +.. _doxygen_guidelines: + +Doxygen Guidelines +------------------ + +The DPDK API is documented using Doxygen comment annotations in the header files. +Doxygen is a very powerful tool, it is extremely configurable and with a little effort can be used to create expressive documents. +See the `Doxygen website `_ for full details on how to use it. + +The following are some guidelines for use of Doxygen in the DPDK API documentation: + +* New libraries that are documented with Doxygen should be added to the Doxygen configuration file: ``doc/api/doxy-api.conf``. + It is only required to add the directory that contains the files. + It isn't necessary to explicitly name each file since the configuration matches all ``rte_*.h`` files in the directory. + +* Use proper capitalization and punctuation in the Doxygen comments since they will become sentences in the documentation. + This in particular applies to single line comments, which is the case the is most often forgotten. + +* Use ``@`` style Doxygen commands instead of ``\`` style commands. + +* Add a general description of each library at the head of the main header files: + + .. code-block:: c + + /** + * @file + * RTE Mempool. + * + * A memory pool is an allocator of fixed-size object. It is + * identified by its name, and uses a ring to store free objects. + * ... + */ + +* Document the purpose of a function, the parameters used and the return + value: + + .. code-block:: c + + /** + * Try to take the lock. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ + int rte_spinlock_trylock(rte_spinlock_t *sl); + +* Doxygen supports Markdown style syntax such as bold, italics, fixed width text and lists. + For example the second line in the ``devargs`` parameter in the previous example will be rendered as: + + The strings should be a pci address like ``0000:01:00.0`` or **virtual** device name like ``net_pcap0``. + +* Use ``-`` instead of ``*`` for lists within the Doxygen comment since the latter can get confused with the comment delimiter. + +* Add an empty line between the function description, the ``@params`` and ``@return`` for readability. + +* Place the ``@params`` description on separate line and indent it by 2 spaces. + (It would be better to use no indentation since this is more common and also because checkpatch complains about leading + whitespace in comments. + However this is the convention used in the existing DPDK code.) + +* Documented functions can be linked to simply by adding ``()`` to the function name: + + .. code-block:: c + + /** + * The functions exported by the application Ethernet API to setup + * a device designated by its port identifier must be invoked in + * the following order: + * - rte_eth_dev_configure() + * - rte_eth_tx_queue_setup() + * - rte_eth_rx_queue_setup() + * - rte_eth_dev_start() + */ + + In the API documentation the functions will be rendered as links, see the + `online section of the rte_ethdev.h docs `_ that contains the above text. + +* The ``@see`` keyword can be used to create a *see also* link to another file or library. + This directive should be placed on one line at the bottom of the documentation section. + + .. code-block:: c + + /** + * ... + * + * Some text that references mempools. + * + * @see eal_memzone.c + */ + +* Doxygen supports two types of comments for documenting variables, constants and members: prefix and postfix: + + .. code-block:: c + + /** This is a prefix comment. */ + #define RTE_FOO_ERROR 0x023. + + #define RTE_BAR_ERROR 0x024. /**< This is a postfix comment. */ + +* Postfix comments are preferred for struct members and constants if they can be documented in the same way: + + .. code-block:: c + + struct rte_eth_stats { + uint64_t ipackets; /**< Total number of received packets. */ + uint64_t opackets; /**< Total number of transmitted packets.*/ + uint64_t ibytes; /**< Total number of received bytes. */ + uint64_t obytes; /**< Total number of transmitted bytes. */ + uint64_t imissed; /**< Total of RX missed packets. */ + uint64_t ibadcrc; /**< Total of RX packets with CRC error. */ + uint64_t ibadlen; /**< Total of RX packets with bad length. */ + } + + Note: postfix comments should be aligned with spaces not tabs in accordance + with the :ref:`coding_style`. + +* If a single comment type can't be used, due to line length limitations then + prefix comments should be preferred. + For example this section of the code contains prefix comments, postfix comments on the same line and postfix + comments on a separate line: + + .. code-block:: c + + /** Number of elements in the elt_pa array. */ + uint32_t pg_num __rte_cache_aligned; + uint32_t pg_shift; /**< LOG2 of the physical pages. */ + uintptr_t pg_mask; /**< Physical page mask value. */ + uintptr_t elt_va_start; + /**< Virtual address of the first mempool object. */ + uintptr_t elt_va_end; + /**< Virtual address of the mempool object. */ + phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT]; + /**< Array of physical page addresses for the mempool buffer. */ + + This doesn't have an effect on the rendered documentation but it is confusing for the developer reading the code. + It this case it would be clearer to use prefix comments throughout: + + .. code-block:: c + + /** Number of elements in the elt_pa array. */ + uint32_t pg_num __rte_cache_aligned; + /** LOG2 of the physical pages. */ + uint32_t pg_shift; + /** Physical page mask value. */ + uintptr_t pg_mask; + /** Virtual address of the first mempool object. */ + uintptr_t elt_va_start; + /** Virtual address of the mempool object. */ + uintptr_t elt_va_end; + /** Array of physical page addresses for the mempool buffer. */ + phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT]; + +* Check for Doxygen warnings in new code by checking the API documentation build:: + + make doc-api-html >/dev/null + +* Read the rendered section of the documentation that you have added for correctness, clarity and consistency + with the surrounding text. diff --git a/src/spdk/dpdk/doc/guides/contributing/img/abi_stability_policy.svg b/src/spdk/dpdk/doc/guides/contributing/img/abi_stability_policy.svg new file mode 100644 index 000000000..4fd400731 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/img/abi_stability_policy.svg @@ -0,0 +1,1059 @@ + +image/svg+xmlv19.11 +v20.02 +v20.05 +v20.08 +V20.11 +v21.02 +v21.05 +v21.08 +v21.11 +v22.02 +V22.05 +V22.08 +V22.11 +v20 +v21 + + + + + + + + + + + + +ABI +Version +Compatibility +v20 +v21 +v22 +v23 + +v22 +v23 + + + + + + + + + +v20 ABI is declared aligned with v19.11 LTS +v21 symbols are added and v20 symbols are modified, support for v20 ABI continues. +v21 ABI is declared aligned with v20.11 LTS, remaining v20 symbols are removed. +ABI Versions +DPDK Releases +v22 symbols are added and v21 symbols are modified, support for v21 ABI continues….. +LTS Release +Minor Release +ABI Version + \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/contributing/img/patch_cheatsheet.svg b/src/spdk/dpdk/doc/guides/contributing/img/patch_cheatsheet.svg new file mode 100644 index 000000000..85225923e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/img/patch_cheatsheet.svg @@ -0,0 +1,1484 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CHEATSHEET + PATCH SUBMIT + + + + + + Patch Pre-Checks + Commit Pre-Checks + Bugfix? + Git send-email + + + Compile Pre-Checks + + + Include warning/error + + Fixes: line + + How to reproduce + + + + + build gcc icc clang + + make test doc + + make examples + + make shared-lib + + library ABI version + + build 32 and 64 bits + + + git send-email *.patch --annotate --to <maintainer> --cc dev@dpdk.org [ --cc other@participants.com --cover-letter -v[N] --in-reply-to <message ID> ] + harry.van.haaren@intel.com + + Suggestions / Updates? + + + Patch version ( eg: -v2 ) + + Patch version annotations + + Send --to maintainer + + Send --cc dev@dpdk.org + + Cover letter + + + Send --in-reply-to <message ID> + **** + + + v1.0 + + + + + + + + + Signed-off-by: + + Suggested-by: + + Reported-by: + + Tested-by: + + + Previous Acks + * + + + Commit message + + + Mailing List + + Acked-by: + + Reviewed-by: + Nack (refuse patch) + + + + Tested-by: + + Previous Acks only when fixing typos, rebased, or checkpatch issues. + * + + + The version.map function names must be in alphabetical order. + * + * + + + + + Rebase to git + + Checkpatch + + ABI breakage + + Maintainers file + + Release notes + + Documentation + + + ** + + + Update version.map + + + + Doxygen + + *** + + + + + + + + * + * + * + New header files must get a new page in the API docs. + + + Available from patchwork, or email header. Reply to Cover letters. + * + * + * + * + + + + git format-patch -[N] + // creates .patch files for final review + + diff --git a/src/spdk/dpdk/doc/guides/contributing/img/what_is_an_abi.svg b/src/spdk/dpdk/doc/guides/contributing/img/what_is_an_abi.svg new file mode 100644 index 000000000..fd3d993eb --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/img/what_is_an_abi.svg @@ -0,0 +1,382 @@ + +image/svg+xmlDPDK +Application +API +ABI +Programming +Language +Functions +Datatypes +Return Types +Constants + +Instruction set +Executable & Linker +Format +Calling Conventions. + +function calls + \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/contributing/index.rst b/src/spdk/dpdk/doc/guides/contributing/index.rst new file mode 100644 index 000000000..2fefd9193 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/index.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +Contributor's Guidelines +======================== + +.. toctree:: + :maxdepth: 2 + :numbered: + + coding_style + design + abi_policy + abi_versioning + documentation + patches + vulnerability + stable + cheatsheet diff --git a/src/spdk/dpdk/doc/guides/contributing/patches.rst b/src/spdk/dpdk/doc/guides/contributing/patches.rst new file mode 100644 index 000000000..16b40225f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/patches.rst @@ -0,0 +1,691 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +.. submitting_patches: + +Contributing Code to DPDK +========================= + +This document outlines the guidelines for submitting code to DPDK. + +The DPDK development process is modeled (loosely) on the Linux Kernel development model so it is worth reading the +Linux kernel guide on submitting patches: +`How to Get Your Change Into the Linux Kernel `_. +The rationale for many of the DPDK guidelines is explained in greater detail in the kernel guidelines. + + +The DPDK Development Process +---------------------------- + +The DPDK development process has the following features: + +* The code is hosted in a public git repository. +* There is a mailing list where developers submit patches. +* There are maintainers for hierarchical components. +* Patches are reviewed publicly on the mailing list. +* Successfully reviewed patches are merged to the repository. +* Patches should be sent to the target repository or sub-tree, see below. +* All sub-repositories are merged into main repository for ``-rc1`` and ``-rc2`` versions of the release. +* After the ``-rc2`` release all patches should target the main repository. + +The mailing list for DPDK development is `dev@dpdk.org `_. +Contributors will need to `register for the mailing list `_ in order to submit patches. +It is also worth registering for the DPDK `Patchwork `_ + +If you are using the GitHub service, you can link your repository to +the ``travis-ci.org`` build service. When you push patches to your GitHub +repository, the travis service will automatically build your changes. + +The development process requires some familiarity with the ``git`` version control system. +Refer to the `Pro Git Book `_ for further information. + +Source License +-------------- + +The DPDK uses the Open Source BSD-3-Clause license for the core libraries and +drivers. The kernel components are GPL-2.0 licensed. DPDK uses single line +reference to Unique License Identifiers in source files as defined by the Linux +Foundation's `SPDX project `_. + +DPDK uses first line of the file to be SPDX tag. In case of *#!* scripts, SPDX +tag can be placed in 2nd line of the file. + +For example, to label a file as subject to the BSD-3-Clause license, +the following text would be used: + +``SPDX-License-Identifier: BSD-3-Clause`` + +To label a file as dual-licensed with BSD-3-Clause and GPL-2.0 (e.g., for code +that is shared between the kernel and userspace), the following text would be +used: + +``SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)`` + +Refer to ``licenses/README`` for more details. + +Maintainers and Sub-trees +------------------------- + +The DPDK maintenance hierarchy is divided into a main repository ``dpdk`` and sub-repositories ``dpdk-next-*``. + +There are maintainers for the trees and for components within the tree. + +Trees and maintainers are listed in the ``MAINTAINERS`` file. For example:: + + Crypto Drivers + -------------- + M: Some Name + T: git://dpdk.org/next/dpdk-next-crypto + + Intel AES-NI GCM PMD + M: Some One + F: drivers/crypto/aesni_gcm/ + F: doc/guides/cryptodevs/aesni_gcm.rst + +Where: + +* ``M`` is a tree or component maintainer. +* ``T`` is a repository tree. +* ``F`` is a maintained file or directory. + +Additional details are given in the ``MAINTAINERS`` file. + +The role of the component maintainers is to: + +* Review patches for the component or delegate the review. + The review should be done, ideally, within 1 week of submission to the mailing list. +* Add an ``acked-by`` to patches, or patchsets, that are ready for committing to a tree. +* Reply to questions asked about the component. + +Component maintainers can be added or removed by submitting a patch to the ``MAINTAINERS`` file. +Maintainers should have demonstrated a reasonable level of contributions or reviews to the component area. +The maintainer should be confirmed by an ``ack`` from an established contributor. +There can be more than one component maintainer if desired. + +The role of the tree maintainers is to: + +* Maintain the overall quality of their tree. + This can entail additional review, compilation checks or other tests deemed necessary by the maintainer. +* Commit patches that have been reviewed by component maintainers and/or other contributors. + The tree maintainer should determine if patches have been reviewed sufficiently. +* Ensure that patches are reviewed in a timely manner. +* Prepare the tree for integration. +* Ensure that there is a designated back-up maintainer and coordinate a handover for periods where the + tree maintainer can't perform their role. + +Tree maintainers can be added or removed by submitting a patch to the ``MAINTAINERS`` file. +The proposer should justify the need for a new sub-tree and should have demonstrated a sufficient level of contributions in the area or to a similar area. +The maintainer should be confirmed by an ``ack`` from an existing tree maintainer. +Disagreements on trees or maintainers can be brought to the Technical Board. + +The backup maintainer for the master tree should be selected from the existing sub-tree maintainers from the project. +The backup maintainer for a sub-tree should be selected from among the component maintainers within that sub-tree. + + +Getting the Source Code +----------------------- + +The source code can be cloned using either of the following: + +main repository:: + + git clone git://dpdk.org/dpdk + git clone https://dpdk.org/git/dpdk + +sub-repositories (`list `_):: + + git clone git://dpdk.org/next/dpdk-next-* + git clone https://dpdk.org/git/next/dpdk-next-* + +Make your Changes +----------------- + +Make your planned changes in the cloned ``dpdk`` repo. Here are some guidelines and requirements: + +* Follow the :ref:`coding_style` guidelines. + +* If you add new files or directories you should add your name to the ``MAINTAINERS`` file. + +* Initial submission of new PMDs should be prepared against a corresponding repo. + + * Thus, for example, initial submission of a new network PMD should be + prepared against dpdk-next-net repo. + + * Likewise, initial submission of a new crypto or compression PMD should be + prepared against dpdk-next-crypto repo. + + * For other PMDs and more info, refer to the ``MAINTAINERS`` file. + +* New external functions should be added to the local ``version.map`` file. See + the :doc:`ABI policy ` and :ref:`ABI versioning ` + guides. New external functions should also be added in alphabetical order. + +* Important changes will require an addition to the release notes in ``doc/guides/rel_notes/``. + See the :ref:`Release Notes section of the Documentation Guidelines ` for details. + +* Test the compilation works with different targets, compilers and options, see :ref:`contrib_check_compilation`. + +* Don't break compilation between commits with forward dependencies in a patchset. + Each commit should compile on its own to allow for ``git bisect`` and continuous integration testing. + +* Add tests to the ``app/test`` unit test framework where possible. + +* Add documentation, if relevant, in the form of Doxygen comments or a User Guide in RST format. + See the :ref:`Documentation Guidelines `. + +Once the changes have been made you should commit them to your local repo. + +For small changes, that do not require specific explanations, it is better to keep things together in the +same patch. +Larger changes that require different explanations should be separated into logical patches in a patchset. +A good way of thinking about whether a patch should be split is to consider whether the change could be +applied without dependencies as a backport. + +It is better to keep the related documentation changes in the same patch +file as the code, rather than one big documentation patch at the end of a +patchset. This makes it easier for future maintenance and development of the +code. + +As a guide to how patches should be structured run ``git log`` on similar files. + + +Commit Messages: Subject Line +----------------------------- + +The first, summary, line of the git commit message becomes the subject line of the patch email. +Here are some guidelines for the summary line: + +* The summary line must capture the area and the impact of the change. + +* The summary line should be around 50 characters. + +* The summary line should be lowercase apart from acronyms. + +* It should be prefixed with the component name (use git log to check existing components). + For example:: + + ixgbe: fix offload config option name + + config: increase max queues per port + +* Use the imperative of the verb (like instructions to the code base). + +* Don't add a period/full stop to the subject line or you will end up two in the patch name: ``dpdk_description..patch``. + +The actual email subject line should be prefixed by ``[PATCH]`` and the version, if greater than v1, +for example: ``PATCH v2``. +The is generally added by ``git send-email`` or ``git format-patch``, see below. + +If you are submitting an RFC draft of a feature you can use ``[RFC]`` instead of ``[PATCH]``. +An RFC patch doesn't have to be complete. +It is intended as a way of getting early feedback. + + +Commit Messages: Body +--------------------- + +Here are some guidelines for the body of a commit message: + +* The body of the message should describe the issue being fixed or the feature being added. + It is important to provide enough information to allow a reviewer to understand the purpose of the patch. + +* When the change is obvious the body can be blank, apart from the signoff. + +* The commit message must end with a ``Signed-off-by:`` line which is added using:: + + git commit --signoff # or -s + + The purpose of the signoff is explained in the + `Developer's Certificate of Origin `_ + section of the Linux kernel guidelines. + + .. Note:: + + All developers must ensure that they have read and understood the + Developer's Certificate of Origin section of the documentation prior + to applying the signoff and submitting a patch. + +* The signoff must be a real name and not an alias or nickname. + More than one signoff is allowed. + +* The text of the commit message should be wrapped at 72 characters. + +* When fixing a regression, it is required to reference the id of the commit + which introduced the bug, and put the original author of that commit on CC. + You can generate the required lines using the following git alias, which prints + the commit SHA and the author of the original code:: + + git config alias.fixline "log -1 --abbrev=12 --format='Fixes: %h (\"%s\")%nCc: %ae'" + + The output of ``git fixline `` must then be added to the commit message:: + + doc: fix some parameter description + + Update the docs, fixing description of some parameter. + + Fixes: abcdefgh1234 ("doc: add some parameter") + Cc: author@example.com + + Signed-off-by: Alex Smith + +* When fixing an error or warning it is useful to add the error message and instructions on how to reproduce it. + +* Use correct capitalization, punctuation and spelling. + +In addition to the ``Signed-off-by:`` name the commit messages can also have +tags for who reported, suggested, tested and reviewed the patch being +posted. Please refer to the `Tested, Acked and Reviewed by`_ section. + +Patch Fix Related Issues +~~~~~~~~~~~~~~~~~~~~~~~~ + +`Coverity `_ +is a tool for static code analysis. +It is used as a cloud-based service used to scan the DPDK source code, +and alert developers of any potential defects in the source code. +When fixing an issue found by Coverity, the patch must contain a Coverity issue ID +in the body of the commit message. For example:: + + + doc: fix some parameter description + + Update the docs, fixing description of some parameter. + + Coverity issue: 12345 + Fixes: abcdefgh1234 ("doc: add some parameter") + Cc: author@example.com + + Signed-off-by: Alex Smith + + +`Bugzilla `_ +is a bug- or issue-tracking system. +Bug-tracking systems allow individual or groups of developers +effectively to keep track of outstanding problems with their product. +When fixing an issue raised in Bugzilla, the patch must contain +a Bugzilla issue ID in the body of the commit message. +For example:: + + doc: fix some parameter description + + Update the docs, fixing description of some parameter. + + Bugzilla ID: 12345 + Fixes: abcdefgh1234 ("doc: add some parameter") + Cc: author@example.com + + Signed-off-by: Alex Smith + +Patch for Stable Releases +~~~~~~~~~~~~~~~~~~~~~~~~~ + +All fix patches to the master branch that are candidates for backporting +should also be CCed to the `stable@dpdk.org `_ +mailing list. +In the commit message body the Cc: stable@dpdk.org should be inserted as follows:: + + doc: fix some parameter description + + Update the docs, fixing description of some parameter. + + Fixes: abcdefgh1234 ("doc: add some parameter") + Cc: stable@dpdk.org + + Signed-off-by: Alex Smith + +For further information on stable contribution you can go to +:doc:`Stable Contribution Guide `. + + +Creating Patches +---------------- + +It is possible to send patches directly from git but for new contributors it is recommended to generate the +patches with ``git format-patch`` and then when everything looks okay, and the patches have been checked, to +send them with ``git send-email``. + +Here are some examples of using ``git format-patch`` to generate patches: + +.. code-block:: console + + # Generate a patch from the last commit. + git format-patch -1 + + # Generate a patch from the last 3 commits. + git format-patch -3 + + # Generate the patches in a directory. + git format-patch -3 -o ~/patch/ + + # Add a cover letter to explain a patchset. + git format-patch -3 -o ~/patch/ --cover-letter + + # Add a prefix with a version number. + git format-patch -3 -o ~/patch/ -v 2 + + +Cover letters are useful for explaining a patchset and help to generate a logical threading to the patches. +Smaller notes can be put inline in the patch after the ``---`` separator, for example:: + + Subject: [PATCH] fm10k/base: add FM10420 device ids + + Add the device ID for Boulder Rapids and Atwood Channel to enable + drivers to support those devices. + + Signed-off-by: Alex Smith + --- + + ADD NOTES HERE. + + drivers/net/fm10k/base/fm10k_api.c | 6 ++++++ + drivers/net/fm10k/base/fm10k_type.h | 6 ++++++ + 2 files changed, 12 insertions(+) + ... + +Version 2 and later of a patchset should also include a short log of the changes so the reviewer knows what has changed. +This can be added to the cover letter or the annotations. +For example:: + + --- + v3: + * Fixed issued with version.map. + + v2: + * Added i40e support. + * Renamed ethdev functions from rte_eth_ieee15888_*() to rte_eth_timesync_*() + since 802.1AS can be supported through the same interfaces. + + +.. _contrib_checkpatch: + +Checking the Patches +-------------------- + +Patches should be checked for formatting and syntax issues using the ``checkpatches.sh`` script in the ``devtools`` +directory of the DPDK repo. +This uses the Linux kernel development tool ``checkpatch.pl`` which can be obtained by cloning, and periodically, +updating the Linux kernel sources. + +The path to the original Linux script must be set in the environment variable ``DPDK_CHECKPATCH_PATH``. + +Spell checking of commonly misspelled words +can be enabled by downloading the codespell dictionary:: + + https://raw.githubusercontent.com/codespell-project/codespell/master/codespell_lib/data/dictionary.txt + +The path to the downloaded ``dictionary.txt`` must be set +in the environment variable ``DPDK_CHECKPATCH_CODESPELL``. + +Environment variables required by the development tools, +are loaded from the following files, in order of preference:: + + .develconfig + ~/.config/dpdk/devel.config + /etc/dpdk/devel.config. + +Once the environment variable is set, the script can be run as follows:: + + devtools/checkpatches.sh ~/patch/ + +The script usage is:: + + checkpatches.sh [-h] [-q] [-v] [patch1 [patch2] ...]]" + +Where: + +* ``-h``: help, usage. +* ``-q``: quiet. Don't output anything for files without issues. +* ``-v``: verbose. +* ``patchX``: path to one or more patches. + +Then the git logs should be checked using the ``check-git-log.sh`` script. + +The script usage is:: + + check-git-log.sh [range] + +Where the range is a ``git log`` option. + + +.. _contrib_check_compilation: + +Checking Compilation +-------------------- + +Makefile System +~~~~~~~~~~~~~~~ + +Compilation of patches and changes should be tested using the ``test-build.sh`` script in the ``devtools`` +directory of the DPDK repo:: + + devtools/test-build.sh x86_64-native-linux-gcc+next+shared + +The script usage is:: + + test-build.sh [-h] [-jX] [-s] [config1 [config2] ...]] + +Where: + +* ``-h``: help, usage. +* ``-jX``: use X parallel jobs in "make". +* ``-s``: short test with only first config and without examples/doc. +* ``config``: default config name plus config switches delimited with a ``+`` sign. + +Examples of configs are:: + + x86_64-native-linux-gcc + x86_64-native-linux-gcc+next+shared + x86_64-native-linux-clang+shared + +The builds can be modified via the following environmental variables: + +* ``DPDK_BUILD_TEST_CONFIGS`` (target1+option1+option2 target2) +* ``DPDK_BUILD_TEST_DIR`` +* ``DPDK_DEP_CFLAGS`` +* ``DPDK_DEP_LDFLAGS`` +* ``DPDK_DEP_PCAP`` (y/[n]) +* ``DPDK_NOTIFY`` (notify-send) + +These can be set from the command line or in the config files shown above in the :ref:`contrib_checkpatch`. + +The recommended configurations and options to test compilation prior to submitting patches are:: + + x86_64-native-linux-gcc+shared+next + x86_64-native-linux-clang+shared + i686-native-linux-gcc + + export DPDK_DEP_ZLIB=y + export DPDK_DEP_PCAP=y + export DPDK_DEP_SSL=y + +Meson System +~~~~~~~~~~~~ + +Compilation of patches is to be tested with ``devtools/test-meson-builds.sh`` script. + +The script internally checks for dependencies, then builds for several +combinations of compilation configuration. +By default, each build will be put in a subfolder of the current working directory. +However, if it is preferred to place the builds in a different location, +the environment variable ``DPDK_BUILD_TEST_DIR`` can be set to that desired location. +For example, setting ``DPDK_BUILD_TEST_DIR=__builds`` will put all builds +in a single subfolder called "__builds" created in the current directory. +Setting ``DPDK_BUILD_TEST_DIR`` to an absolute directory path e.g. ``/tmp`` is also supported. + + +.. _integrated_abi_check: + +Checking ABI compatibility +-------------------------- + +By default, ABI compatibility checks are disabled. + +To enable them, a reference version must be selected via the environment +variable ``DPDK_ABI_REF_VERSION``. + +The ``devtools/test-build.sh`` and ``devtools/test-meson-builds.sh`` scripts +then build this reference version in a temporary directory and store the +results in a subfolder of the current working directory. +The environment variable ``DPDK_ABI_REF_DIR`` can be set so that the results go +to a different location. + + +Sending Patches +--------------- + +Patches should be sent to the mailing list using ``git send-email``. +You can configure an external SMTP with something like the following:: + + [sendemail] + smtpuser = name@domain.com + smtpserver = smtp.domain.com + smtpserverport = 465 + smtpencryption = ssl + +See the `Git send-email `_ documentation for more details. + +The patches should be sent to ``dev@dpdk.org``. +If the patches are a change to existing files then you should send them TO the maintainer(s) and CC ``dev@dpdk.org``. +The appropriate maintainer can be found in the ``MAINTAINERS`` file:: + + git send-email --to maintainer@some.org --cc dev@dpdk.org 000*.patch + +Script ``get-maintainer.sh`` can be used to select maintainers automatically:: + + git send-email --to-cmd ./devtools/get-maintainer.sh --cc dev@dpdk.org 000*.patch + +New additions can be sent without a maintainer:: + + git send-email --to dev@dpdk.org 000*.patch + +You can test the emails by sending it to yourself or with the ``--dry-run`` option. + +If the patch is in relation to a previous email thread you can add it to the same thread using the Message ID:: + + git send-email --to dev@dpdk.org --in-reply-to <1234-foo@bar.com> 000*.patch + +The Message ID can be found in the raw text of emails or at the top of each Patchwork patch, +`for example `_. +Shallow threading (``--thread --no-chain-reply-to``) is preferred for a patch series. + +Once submitted your patches will appear on the mailing list and in Patchwork. + +Experienced committers may send patches directly with ``git send-email`` without the ``git format-patch`` step. +The options ``--annotate`` and ``confirm = always`` are recommended for checking patches before sending. + + +Backporting patches for Stable Releases +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes a maintainer or contributor wishes, or can be asked, to send a patch +for a stable release rather than mainline. +In this case the patch(es) should be sent to ``stable@dpdk.org``, +not to ``dev@dpdk.org``. + +Given that there are multiple stable releases being maintained at the same time, +please specify exactly which branch(es) the patch is for +using ``git send-email --subject-prefix='PATCH 16.11' ...`` +and also optionally in the cover letter or in the annotation. + + +The Review Process +------------------ + +Patches are reviewed by the community, relying on the experience and +collaboration of the members to double-check each other's work. There are a +number of ways to indicate that you have checked a patch on the mailing list. + + +Tested, Acked and Reviewed by +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To indicate that you have interacted with a patch on the mailing list you +should respond to the patch in an email with one of the following tags: + + * Reviewed-by: + * Acked-by: + * Tested-by: + * Reported-by: + * Suggested-by: + +The tag should be on a separate line as follows:: + + tag-here: Name Surname + +Each of these tags has a specific meaning. In general, the DPDK community +follows the kernel usage of the tags. A short summary of the meanings of each +tag is given here for reference: + +.. _statement: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#reviewer-s-statement-of-oversight + +``Reviewed-by:`` is a strong statement_ that the patch is an appropriate state +for merging without any remaining serious technical issues. Reviews from +community members who are known to understand the subject area and to perform +thorough reviews will increase the likelihood of the patch getting merged. + +``Acked-by:`` is a record that the person named was not directly involved in +the preparation of the patch but wishes to signify and record their acceptance +and approval of it. + +``Tested-by:`` indicates that the patch has been successfully tested (in some +environment) by the person named. + +``Reported-by:`` is used to acknowledge person who found or reported the bug. + +``Suggested-by:`` indicates that the patch idea was suggested by the named +person. + + + +Steps to getting your patch merged +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The more work you put into the previous steps the easier it will be to get a +patch accepted. The general cycle for patch review and acceptance is: + +#. Submit the patch. + +#. Check the automatic test reports in the coming hours. + +#. Wait for review comments. While you are waiting review some other patches. + +#. Fix the review comments and submit a ``v n+1`` patchset:: + + git format-patch -3 -v 2 + +#. Update Patchwork to mark your previous patches as "Superseded". + +#. If the patch is deemed suitable for merging by the relevant maintainer(s) or other developers they will ``ack`` + the patch with an email that includes something like:: + + Acked-by: Alex Smith + + **Note**: When acking patches please remove as much of the text of the patch email as possible. + It is generally best to delete everything after the ``Signed-off-by:`` line. + +#. Having the patch ``Reviewed-by:`` and/or ``Tested-by:`` will also help the patch to be accepted. + +#. If the patch isn't deemed suitable based on being out of scope or conflicting with existing functionality + it may receive a ``nack``. + In this case you will need to make a more convincing technical argument in favor of your patches. + +#. In addition a patch will not be accepted if it doesn't address comments from a previous version with fixes or + valid arguments. + +#. It is the responsibility of a maintainer to ensure that patches are reviewed and to provide an ``ack`` or + ``nack`` of those patches as appropriate. + +#. Once a patch has been acked by the relevant maintainer, reviewers may still comment on it for a further + two weeks. After that time, the patch should be merged into the relevant git tree for the next release. + Additional notes and restrictions: + + * Patches should be acked by a maintainer at least two days before the release merge + deadline, in order to make that release. + * For patches acked with less than two weeks to go to the merge deadline, all additional + comments should be made no later than two days before the merge deadline. + * After the appropriate time for additional feedback has passed, if the patch has not yet + been merged to the relevant tree by the committer, it should be treated as though it had, + in that any additional changes needed to it must be addressed by a follow-on patch, rather + than rework of the original. + * Trivial patches may be merged sooner than described above at the tree committer's + discretion. diff --git a/src/spdk/dpdk/doc/guides/contributing/stable.rst b/src/spdk/dpdk/doc/guides/contributing/stable.rst new file mode 100644 index 000000000..890bbeccc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/stable.rst @@ -0,0 +1,125 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 The DPDK contributors + +.. _stable_lts_releases: + +DPDK Stable Releases and Long Term Support +========================================== + +This section sets out the guidelines for the DPDK Stable Releases and the DPDK +Long Term Support releases (LTS). + + +Introduction +------------ + +The purpose of the DPDK Stable Releases is to maintain releases of DPDK with +backported fixes over an extended period of time. This provides downstream +consumers of DPDK with a stable target on which to base applications or +packages. + +The Long Term Support release (LTS) is a designation applied to a Stable +Release to indicate longer term support. + + +Stable Releases +--------------- + +Any release of DPDK can be designated as a Stable Release if a +maintainer volunteers to maintain it and there is a commitment from major +contributors to validate it before releases. If a release is to be designated +as a Stable Release, it should be done by 1 month after the master release. + +A Stable Release is used to backport fixes from an ``N`` release back to an +``N-1`` release, for example, from 16.11 to 16.07. + +The duration of a stable is one complete release cycle (3 months). It can be +longer, up to 1 year, if a maintainer continues to support the stable branch, +or if users supply backported fixes, however the explicit commitment should be +for one release cycle. + +The release cadence is determined by the maintainer based on the number of +bugfixes and the criticality of the bugs. Releases should be coordinated with +the validation engineers to ensure that a tagged release has been tested. + + +LTS Release +----------- + +A stable release can be designated as an LTS release based on community +agreement and a commitment from a maintainer. The current policy is that each +year's November (X.11) release will be maintained as an LTS for 2 years. + +After the X.11 release, an LTS branch will be created for it at +https://git.dpdk.org/dpdk-stable where bugfixes will be backported to. + +A LTS release may align with the declaration of a new major ABI version, +please read the :doc:`abi_policy` for more information. + +It is anticipated that there will be at least 4 releases per year of the LTS +or approximately 1 every 3 months. However, the cadence can be shorter or +longer depending on the number and criticality of the backported +fixes. Releases should be coordinated with the validation engineers to ensure +that a tagged release has been tested. + +For a list of the currently maintained stable/LTS branches please see +the latest `stable roadmap `_. + +At the end of the 2 years, a final X.11.N release will be made and at that +point the LTS branch will no longer be maintained with no further releases. + + +What changes should be backported +--------------------------------- + +Backporting should be limited to bug fixes. All patches accepted on the master +branch with a Fixes: tag should be backported to the relevant stable/LTS +branches, unless the submitter indicates otherwise. If there are exceptions, +they will be discussed on the mailing lists. + +Fixes suitable for backport should have a ``Cc: stable@dpdk.org`` tag in the +commit message body as follows:: + + doc: fix some parameter description + + Update the docs, fixing description of some parameter. + + Fixes: abcdefgh1234 ("doc: add some parameter") + Cc: stable@dpdk.org + + Signed-off-by: Alex Smith + + +Fixes not suitable for backport should not include the ``Cc: stable@dpdk.org`` tag. + +Features should not be backported to stable releases. It may be acceptable, in +limited cases, to back port features for the LTS release where: + +* There is a justifiable use case (for example a new PMD). +* The change is non-invasive. +* The work of preparing the backport is done by the proposer. +* There is support within the community. + + +The Stable Mailing List +----------------------- + +The Stable and LTS release are coordinated on the stable@dpdk.org mailing +list. + +All fix patches to the master branch that are candidates for backporting +should also be CCed to the `stable@dpdk.org `_ +mailing list. + + +Releasing +--------- + +A Stable Release will be released by: + +* Tagging the release with YY.MM.n (year, month, number). +* Uploading a tarball of the release to dpdk.org. +* Sending an announcement to the `announce@dpdk.org `_ + list. + +Stable releases are available on the `dpdk.org download page `_. diff --git a/src/spdk/dpdk/doc/guides/contributing/vulnerability.rst b/src/spdk/dpdk/doc/guides/contributing/vulnerability.rst new file mode 100644 index 000000000..b6300252a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/contributing/vulnerability.rst @@ -0,0 +1,325 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2019 The DPDK contributors + +DPDK Vulnerability Management Process +===================================== + +Scope +----- + +Only the main repositories (dpdk and dpdk-stable) of the core project +are in the scope of this security process (including experimental APIs). +If a stable branch is declared unmaintained (end of life), +no fix will be applied. + +All vulnerabilities are bugs, but not every bug is a vulnerability. +Vulnerabilities compromise one or more of: + +* Confidentiality (personal or corporate confidential data). +* Integrity (trustworthiness and correctness). +* Availability (uptime and service). + +If in doubt, please consider the vulnerability as security sensitive. +At worst, the response will be to report the bug through the usual channels. + + +Finding +------- + +There is no pro-active security engineering effort at the moment. + +Please report any security issue you find in DPDK as described below. + + +Report +------ + +Do not use Bugzilla (unsecured). +Instead, send GPG-encrypted emails +to `security@dpdk.org `_. +Anyone can post to this list. +In order to reduce the disclosure of a vulnerability in the early stages, +membership of this list is intentionally limited to a `small number of people +`_. + +It is additionally encouraged to GPG-sign one-on-one conversations +as part of the security process. + +As it is with any bug, the more information provided, +the easier it will be to diagnose and fix. +If you already have a fix, please include it with your report, +as that can speed up the process considerably. + +In the report, please note how you would like to be credited +for discovering the issue +and the details of any embargo you would like to impose. + +If the vulnerability is not public yet, +no patch or information should be disclosed publicly. +If a fix is already published, +the reporting process must be followed anyway, as described below. + + +Confirmation +------------ + +Upon reception of the report, a security team member should reply +to the reporter acknowledging that the report has been received. + +The DPDK security team reviews the security vulnerability reported. +Area experts not members of the security team may be involved in the process. +In case the reported issue is not qualified as a security vulnerability, +the security team will request the submitter to report it +using the usual channel (Bugzilla). +If qualified, the security team will assess which DPDK version are affected. +A bugzilla ID (allocated in a `reserved pool +`_) +is assigned to the vulnerability, and kept empty until public disclosure. + +The security team calculates the severity score with +`CVSS calculator `_ +based on inputs from the reporter and its own assessment of the vulnerability, +and agrees on the score with the reporter. + +An embargo may be put in place depending on the severity of the vulnerability. +If an embargo is decided, its duration should be suggested by the security team +and negotiated with the reporter. +Embargo duration between vulnerability confirmation and public disclosure +should be between **one and ten weeks**. +If an embargo is not required, the vulnerability may be fixed +using the standard patch process, once a CVE number has been assigned. + +The confirmation mail should be sent within **3 business days**. + +Following information must be included in the mail: + +* Confirmation +* CVSS severity and score +* Embargo duration +* Reporter credit +* Bug ID (empty and restricted for future reference) + +CVE Request +----------- + +The security team develops a security advisory document. +The security team may, at its discretion, +include the reporter (via "CC") in developing the security advisory document, +but in any case should accept feedback +from the reporter before finalizing the document. +When the document is final, the security team needs to +request a CVE identifier from a CNA. + +The CVE request should be sent +to `secalert@redhat.com `_ +using GPG encrypted email +(see `contact details `_). + + +CVE Request Template with Embargo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + A vulnerability was discovered in the DPDK project. + In order to ensure full traceability, we need a CVE number assigned + that we can attach to private and public notifications. + Please treat the following information as confidential during the embargo + until further public disclosure. + + [PRODUCT]: + [VERSION]: + [PROBLEMTYPE]: + [SEVERITY]: + [REFERENCES]: { bug_url } + [DESCRIPTION]: + + Thanks + { DPDK_security_team_member }, on behalf of the DPDK security team + + +CVE Request Template without Embargo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + A vulnerability was discovered in the DPDK project. + In order to ensure full traceability, we need a CVE number assigned + that we can attach to private and public notifications. + + [PRODUCT]: + [VERSION]: + [PROBLEMTYPE]: + [SEVERITY]: + [REFERENCES]: { bug_url } + [DESCRIPTION]: + + Thanks + { DPDK_security_team_member }, on behalf of the DPDK security team + + +Fix Development and Review +-------------------------- + +If the fix is already published, this step is skipped, +and the pre-release disclosure is replaced with the private disclosure, +as described below. It must not be considered as the standard process. + +This step may be started in parallel with CVE creation. +The patches fixing the vulnerability are developed and reviewed +by the security team and +by elected area experts that agree to maintain confidentiality. + +The CVE id and the bug id must be referenced in the patch. + +Backports to the identified affected versions are done once the fix is ready. + + +Pre-Release Disclosure +---------------------- + +When the fix is ready, the security advisory and patches are sent +to downstream stakeholders +(`security-prerelease@dpdk.org `_), +specifying the date and time of the end of the embargo. +The communicated public disclosure date should be **less than one week** + +Downstream stakeholders are expected not to deploy or disclose patches +until the embargo is passed, otherwise they will be removed from the list. + +Downstream stakeholders (in `security-prerelease list +`_), are: + +* Operating system vendors known to package DPDK +* Major DPDK users, considered trustworthy by the technical board, who + have made the request to `techboard@dpdk.org `_ + +The `OSS security private mailing list mailto:distros@vs.openwall.org>` will +also be contacted one week before the end of the embargo, as indicated by `the +OSS-security process ` +and using the PGP key listed on the same page, describing the details of the +vulnerability and sharing the patch[es]. Distributions and major vendors follow +this private mailing list, and it functions as a single point of contact for +embargoed advance notices for open source projects. + +The security advisory will be based on below template, +and will be sent signed with a security team's member GPG key. + + +Pre-Release Mail Template +~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + This is an advance warning of a vulnerability discovered in DPDK, + to give you, as downstream stakeholders, a chance to coordinate + the release of fixes and reduce the vulnerability window. + Please treat the following information as confidential until + the proposed public disclosure date. + + { impact_description } + + Proposed patches are attached. + Unless a flaw is discovered in them, these patches will be merged + to { branches } on the public disclosure date. + + CVE: { cve_id } + Severity: { severity } + CVSS scores: { cvss_scores } + + Proposed public disclosure date/time: { disclosure_date } at 15:00 UTC. + Please do not make the issue public (or release public patches) + before this coordinated embargo date. + +If the issue is leaked during the embargo, the same procedure is followed +with only a few days delay between the pre-release and the public disclosure. + + +Private Disclosure +------------------ + +If a vulnerability is unintentionally already fixed in the public repository, +a security advisory is sent to downstream stakeholders +(`security-prerelease@dpdk.org `_), +giving few days to prepare for updating before the public disclosure. + + +Private Disclosure Mail Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + This is a warning of a vulnerability discovered in DPDK, + to give you, as downstream stakeholders, a chance to coordinate + the deployment of fixes before a CVE is public. + + Please treat the following information as confidential until + the proposed public disclosure date. + + { impact_description } + + Commits: { commit_ids with branch number } + + CVE: { cve_id } + Severity: { severity } + CVSS scores: { cvss_scores } + + Proposed public disclosure date/time: { disclosure_date }. + Please do not make the vulnerability information public + before this coordinated embargo date. + + +Public Disclosure +----------------- + +On embargo expiration, following tasks will be done simultaneously: + +* The assigned bug is filled by a member of the security team, + with all relevant information, and it is made public. +* The patches are pushed to the appropriate branches. +* For long and short term stable branches fixed, + new versions should be released. + +Releases on Monday to Wednesday are preferred, so that system administrators +do not have to deal with security updates over the weekend. + +The security advisory is posted +to `announce@dpdk.org `_ and to `the public OSS-security +mailing list ` as soon as the patches +are pushed to the appropriate branches. + +Patches are then sent to `dev@dpdk.org `_ +and `stable@dpdk.org `_ accordingly. + + +Release Mail Template +~~~~~~~~~~~~~~~~~~~~~ + +:: + + A vulnerability was fixed in DPDK. + Some downstream stakeholders were warned in advance + in order to coordinate the release of fixes + and reduce the vulnerability window. + + { impact_description } + + Commits: { commit_ids with branch number } + + CVE: { cve_id } + Bugzilla: { bug_url } + Severity: { severity } + CVSS scores: { cvss_scores } + + +References +---------- + +* `A minimal security response process + `_ +* `fd.io Vulnerability Management + `_ +* `Open Daylight Vulnerability Management + `_ +* `CVE Assignment Information Format + `_ diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/aesni_gcm.rst b/src/spdk/dpdk/doc/guides/cryptodevs/aesni_gcm.rst new file mode 100644 index 000000000..74e0de63a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/aesni_gcm.rst @@ -0,0 +1,121 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016-2020 Intel Corporation. + +AES-NI GCM Crypto Poll Mode Driver +================================== + + +The AES-NI GCM PMD (**librte_pmd_aesni_gcm**) provides poll mode crypto driver +support for utilizing Intel multi buffer library (see AES-NI Multi-buffer PMD documentation +to learn more about it, including installation). + +The AES-NI GCM PMD supports synchronous mode of operation with +``rte_cryptodev_sym_cpu_crypto_process`` function call for both AES-GCM and +GMAC, however GMAC support is limited to one segment per operation. Please +refer to ``rte_crypto`` programmer's guide for more detail. + +Features +-------- + +AESNI GCM PMD has support for: + +Authentication algorithms: + +* RTE_CRYPTO_AUTH_AES_GMAC + +AEAD algorithms: + +* RTE_CRYPTO_AEAD_AES_GCM + +Limitations +----------- + +* In out-of-place operations, chained destination mbufs are not supported. +* Chained mbufs are only supported by RTE_CRYPTO_AEAD_AES_GCM algorithm, + not RTE_CRYPTO_AUTH_AES_GMAC. +* Cipher only is not supported. + + +Installation +------------ + +To build DPDK with the AESNI_GCM_PMD the user is required to download the multi-buffer +library from `here `_ +and compile it on their user system before building DPDK. +The latest version of the library supported by this PMD is v0.54, which +can be downloaded in ``_. + +.. code-block:: console + + make + make install + +The library requires NASM to be built. Depending on the library version, it might +require a minimum NASM version (e.g. v0.54 requires at least NASM 2.14). + +NASM is packaged for different OS. However, on some OS the version is too old, +so a manual installation is required. In that case, NASM can be downloaded from +`NASM website `_. +Once it is downloaded, extract it and follow these steps: + +.. code-block:: console + + ./configure + make + make install + +.. note:: + + Compilation of the Multi-Buffer library is broken when GCC < 5.0, if library <= v0.53. + If a lower GCC version than 5.0, the workaround proposed by the following link + should be used: ``_. + + +As a reference, the following table shows a mapping between the past DPDK versions +and the external crypto libraries supported by them: + +.. _table_aesni_gcm_versions: + +.. table:: DPDK and external crypto library version compatibility + + ============= ================================ + DPDK version Crypto library version + ============= ================================ + 16.04 - 16.11 Multi-buffer library 0.43 - 0.44 + 17.02 - 17.05 ISA-L Crypto v2.18 + 17.08 - 18.02 Multi-buffer library 0.46 - 0.48 + 18.05 - 19.02 Multi-buffer library 0.49 - 0.52 + 19.05+ Multi-buffer library 0.52 - 0.54 + ============= ================================ + + +Initialization +-------------- + +In order to enable this virtual crypto PMD, user must: + +* Build the multi buffer library (explained in Installation section). + +* Set CONFIG_RTE_LIBRTE_PMD_AESNI_GCM=y in config/common_base. + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_aesni_gcm") within the application. + +* Use --vdev="crypto_aesni_gcm" in the EAL options, which will call rte_vdev_init() internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* socket_id: Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). + +* max_nb_queue_pairs: Specify the maximum number of queue pairs in the device (8 by default). + +* max_nb_sessions: Specify the maximum number of sessions that can be created (2048 by default). + +Example: + +.. code-block:: console + + ./l2fwd-crypto -l 1 -n 4 --vdev="crypto_aesni_gcm,socket_id=0,max_nb_sessions=128" \ + -- -p 1 --cdev SW --chain AEAD --aead_algo "aes-gcm" diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/aesni_mb.rst b/src/spdk/dpdk/doc/guides/cryptodevs/aesni_mb.rst new file mode 100644 index 000000000..cc64f1243 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/aesni_mb.rst @@ -0,0 +1,161 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2015-2018 Intel Corporation. + +AESN-NI Multi Buffer Crypto Poll Mode Driver +============================================ + + +The AESNI MB PMD (**librte_pmd_aesni_mb**) provides poll mode crypto driver +support for utilizing Intel multi buffer library, see the white paper +`Fast Multi-buffer IPsec Implementations on Intel® Architecture Processors +`_. + +The AES-NI MB PMD has current only been tested on Fedora 21 64-bit with gcc. + +The AES-NI MB PMD supports synchronous mode of operation with +``rte_cryptodev_sym_cpu_crypto_process`` function call. + +Features +-------- + +AESNI MB PMD has support for: + +Cipher algorithms: + +* RTE_CRYPTO_CIPHER_AES128_CBC +* RTE_CRYPTO_CIPHER_AES192_CBC +* RTE_CRYPTO_CIPHER_AES256_CBC +* RTE_CRYPTO_CIPHER_AES128_CTR +* RTE_CRYPTO_CIPHER_AES192_CTR +* RTE_CRYPTO_CIPHER_AES256_CTR +* RTE_CRYPTO_CIPHER_AES_DOCSISBPI +* RTE_CRYPTO_CIPHER_DES_CBC +* RTE_CRYPTO_CIPHER_3DES_CBC +* RTE_CRYPTO_CIPHER_DES_DOCSISBPI + +Hash algorithms: + +* RTE_CRYPTO_HASH_MD5_HMAC +* RTE_CRYPTO_HASH_SHA1_HMAC +* RTE_CRYPTO_HASH_SHA224_HMAC +* RTE_CRYPTO_HASH_SHA256_HMAC +* RTE_CRYPTO_HASH_SHA384_HMAC +* RTE_CRYPTO_HASH_SHA512_HMAC +* RTE_CRYPTO_HASH_AES_XCBC_HMAC +* RTE_CRYPTO_HASH_AES_CMAC +* RTE_CRYPTO_HASH_AES_GMAC +* RTE_CRYPTO_HASH_SHA1 +* RTE_CRYPTO_HASH_SHA224 +* RTE_CRYPTO_HASH_SHA256 +* RTE_CRYPTO_HASH_SHA384 +* RTE_CRYPTO_HASH_SHA512 + +AEAD algorithms: + +* RTE_CRYPTO_AEAD_AES_CCM +* RTE_CRYPTO_AEAD_AES_GCM + +Limitations +----------- + +* Chained mbufs are not supported. + + +Installation +------------ + +To build DPDK with the AESNI_MB_PMD the user is required to download the multi-buffer +library from `here `_ +and compile it on their user system before building DPDK. +The latest version of the library supported by this PMD is v0.54, which +can be downloaded from ``_. + +.. code-block:: console + + make + make install + +The library requires NASM to be built. Depending on the library version, it might +require a minimum NASM version (e.g. v0.54 requires at least NASM 2.14). + +NASM is packaged for different OS. However, on some OS the version is too old, +so a manual installation is required. In that case, NASM can be downloaded from +`NASM website `_. +Once it is downloaded, extract it and follow these steps: + +.. code-block:: console + + ./configure + make + make install + +.. note:: + + Compilation of the Multi-Buffer library is broken when GCC < 5.0, if library <= v0.53. + If a lower GCC version than 5.0, the workaround proposed by the following link + should be used: ``_. + +As a reference, the following table shows a mapping between the past DPDK versions +and the Multi-Buffer library version supported by them: + +.. _table_aesni_mb_versions: + +.. table:: DPDK and Multi-Buffer library version compatibility + + ============== ============================ + DPDK version Multi-buffer library version + ============== ============================ + 2.2 - 16.11 0.43 - 0.44 + 17.02 0.44 + 17.05 - 17.08 0.45 - 0.48 + 17.11 0.47 - 0.48 + 18.02 0.48 + 18.05 - 19.02 0.49 - 0.52 + 19.05 - 19.08 0.52 + 19.11+ 0.52 - 0.54 + ============== ============================ + + +Initialization +-------------- + +In order to enable this virtual crypto PMD, user must: + +* Build the multi buffer library (explained in Installation section). + +* Set CONFIG_RTE_LIBRTE_PMD_AESNI_MB=y in config/common_base. + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_aesni_mb") within the application. + +* Use --vdev="crypto_aesni_mb" in the EAL options, which will call rte_vdev_init() internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* socket_id: Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). + +* max_nb_queue_pairs: Specify the maximum number of queue pairs in the device (8 by default). + +* max_nb_sessions: Specify the maximum number of sessions that can be created (2048 by default). + +Example: + +.. code-block:: console + + ./l2fwd-crypto -l 1 -n 4 --vdev="crypto_aesni_mb,socket_id=0,max_nb_sessions=128" \ + -- -p 1 --cdev SW --chain CIPHER_HASH --cipher_algo "aes-cbc" --auth_algo "sha1-hmac" + +Extra notes +----------- + +For AES Counter mode (AES-CTR), the library supports two different sizes for Initialization +Vector (IV): + +* 12 bytes: used mainly for IPsec, as it requires 12 bytes from the user, which internally + are appended the counter block (4 bytes), which is set to 1 for the first block + (no padding required from the user) + +* 16 bytes: when passing 16 bytes, the library will take them and use the last 4 bytes + as the initial counter block for the first block. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/armv8.rst b/src/spdk/dpdk/doc/guides/cryptodevs/armv8.rst new file mode 100644 index 000000000..fee85354b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/armv8.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Cavium, Inc + +ARMv8 Crypto Poll Mode Driver +============================= + +This code provides the initial implementation of the ARMv8 crypto PMD. +The driver uses ARMv8 cryptographic extensions to process chained crypto +operations in an optimized way. The core functionality is provided by +a low-level library, written in the assembly code. + +Features +-------- + +ARMv8 Crypto PMD has support for the following algorithm pairs: + +Supported cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_AES_CBC`` + +Supported authentication algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` + +Installation +------------ + +In order to enable this virtual crypto PMD, user must: + +* Download AArch64 crypto library source code from + `here `_ + +* Export the environmental variable ARMV8_CRYPTO_LIB_PATH with + the path to ``AArch64cryptolib`` library. + +* Build the library by invoking: + +.. code-block:: console + + make -C $ARMV8_CRYPTO_LIB_PATH/ + +* Set CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO=y in + config/defconfig_arm64-armv8a-linux-gcc + +The corresponding device can be created only if the following features +are supported by the CPU: + +* ``RTE_CPUFLAG_AES`` +* ``RTE_CPUFLAG_SHA1`` +* ``RTE_CPUFLAG_SHA2`` +* ``RTE_CPUFLAG_NEON`` + +Initialization +-------------- + +User can use app/test application to check how to use this PMD and to verify +crypto processing. + +Test name is cryptodev_sw_armv8_autotest. + +Limitations +----------- + +* Maximum number of sessions is 2048. +* Only chained operations are supported. +* AES-128-CBC is the only supported cipher variant. +* Cipher input data has to be a multiple of 16 bytes. +* Digest input data has to be a multiple of 8 bytes. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/caam_jr.rst b/src/spdk/dpdk/doc/guides/cryptodevs/caam_jr.rst new file mode 100644 index 000000000..6d9daf736 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/caam_jr.rst @@ -0,0 +1,150 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 NXP + + +NXP CAAM JOB RING (caam_jr) +=========================== + +The caam_jr PMD provides poll mode crypto driver support for NXP SEC 4.x+ (CAAM) +hardware accelerator. More information is available at: + +`NXP Cryptographic Acceleration Technology `_. + +Architecture +------------ + +SEC is the SOC's security engine, which serves as NXP's latest cryptographic +acceleration and offloading hardware. It combines functions previously +implemented in separate modules to create a modular and scalable acceleration +and assurance engine. It also implements block encryption algorithms, stream +cipher algorithms, hashing algorithms, public key algorithms, run-time +integrity checking, and a hardware random number generator. SEC performs +higher-level cryptographic operations than previous NXP cryptographic +accelerators. This provides significant improvement to system level performance. + +SEC HW accelerator above 4.x+ version are also known as CAAM. + +caam_jr PMD is one of DPAA drivers which uses uio interface to interact with +Linux kernel for configure and destroy the device instance (ring). + + +Implementation +-------------- + +SEC provides platform assurance by working with SecMon, which is a companion +logic block that tracks the security state of the SOC. SEC is programmed by +means of descriptors (not to be confused with frame descriptors (FDs)) that +indicate the operations to be performed and link to the message and +associated data. SEC incorporates two DMA engines to fetch the descriptors, +read the message data, and write the results of the operations. The DMA +engine provides a scatter/gather capability so that SEC can read and write +data scattered in memory. SEC may be configured by means of software for +dynamic changes in byte ordering. The default configuration for this version +of SEC is little-endian mode. + +Note that one physical Job Ring represent one caam_jr device. + +Features +-------- + +The CAAM_JR PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES128_CBC`` +* ``RTE_CRYPTO_CIPHER_AES192_CBC`` +* ``RTE_CRYPTO_CIPHER_AES256_CBC`` +* ``RTE_CRYPTO_CIPHER_AES128_CTR`` +* ``RTE_CRYPTO_CIPHER_AES192_CTR`` +* ``RTE_CRYPTO_CIPHER_AES256_CTR`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` + +AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` + +Supported DPAA SoCs +-------------------- + +* LS1046A/LS1026A +* LS1043A/LS1023A +* LS1028A +* LS1012A + +Limitations +----------- + +* Hash followed by Cipher mode is not supported +* Only supports the session-oriented API implementation (session-less APIs are not supported). + +Prerequisites +------------- + +caam_jr driver has following dependencies are not part of DPDK and must be installed separately: + +* **NXP Linux SDK** + + NXP Linux software development kit (SDK) includes support for the family + of QorIQ® ARM-Architecture-based system on chip (SoC) processors + and corresponding boards. + + It includes the Linux board support packages (BSPs) for NXP SoCs, + a fully operational tool chain, kernel and board specific modules. + + SDK and related information can be obtained from: `NXP QorIQ SDK `_. + +Currently supported by DPDK: + +* NXP SDK **18.09+**. +* Supported architectures: **arm64 LE**. + +* Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file +to enable caam_jr PMD. + +Please note that enabling debugging options may affect system performance. + +* ``CONFIG_RTE_LIBRTE_PMD_CAAM_JR`` (default ``n``) + By default it is only enabled in common_linux config. + Toggle compilation of the ``librte_pmd_caam_jr`` driver. + +* ``CONFIG_RTE_LIBRTE_PMD_CAAM_JR_BE`` (default ``n``) + By default it is disabled. + It can be used when the underlying hardware supports the CAAM in BE mode. + LS1043A, LS1046A and LS1012A support CAAM in BE mode. + LS1028A supports CAAM in LE mode. + +Installations +------------- +To compile the caam_jr PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-armv8a-linux-gcc install + +Enabling logs +------------- + +For enabling logs, use the following EAL parameter: + +.. code-block:: console + + ./your_crypto_application --log-level=pmd.crypto.caam, diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/ccp.rst b/src/spdk/dpdk/doc/guides/cryptodevs/ccp.rst new file mode 100644 index 000000000..a43fe92de --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/ccp.rst @@ -0,0 +1,140 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +AMD CCP Poll Mode Driver +======================== + +This code provides the initial implementation of the ccp poll mode driver. +The CCP poll mode driver library (librte_pmd_ccp) implements support for +AMD’s cryptographic co-processor (CCP). The CCP PMD is a virtual crypto +poll mode driver which schedules crypto operations to one or more available +CCP hardware engines on the platform. The CCP PMD provides poll mode crypto +driver support for the following hardware accelerator devices:: + + AMD Cryptographic Co-processor (0x1456) + AMD Cryptographic Co-processor (0x1468) + +Features +-------- + +CCP crypto PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_AES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES_ECB`` +* ``RTE_CRYPTO_CIPHER_AES_CTR`` +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1`` +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` +* ``RTE_CRYPTO_AUTH_AES_CMAC`` +* ``RTE_CRYPTO_AUTH_SHA3_224`` +* ``RTE_CRYPTO_AUTH_SHA3_224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA3_256`` +* ``RTE_CRYPTO_AUTH_SHA3_256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA3_384`` +* ``RTE_CRYPTO_AUTH_SHA3_384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA3_512`` +* ``RTE_CRYPTO_AUTH_SHA3_512_HMAC`` + +AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` + +Installation +------------ + +To compile ccp PMD, it has to be enabled in the config/common_base file and openssl +packages have to be installed in the build environment. + +* ``CONFIG_RTE_LIBRTE_PMD_CCP=y`` + +For Ubuntu 16.04 LTS use below to install openssl in the build system: + +.. code-block:: console + + sudo apt-get install openssl + +This code was verified on Ubuntu 16.04. + +Initialization +-------------- + +Bind the CCP devices to DPDK UIO driver module before running the CCP PMD stack. +e.g. for the 0x1456 device:: + + cd to the top-level DPDK directory + modprobe uio + insmod ./build/kmod/igb_uio.ko + echo "1022 1456" > /sys/bus/pci/drivers/igb_uio/new_id + +Another way to bind the CCP devices to DPDK UIO driver is by using the ``dpdk-devbind.py`` script. +The following command assumes ``BFD`` as ``0000:09:00.2``:: + + cd to the top-level DPDK directory + ./usertools/dpdk-devbind.py -b igb_uio 0000:09:00.2 + +In order to enable the ccp crypto PMD, user must set CONFIG_RTE_LIBRTE_PMD_CCP=y in config/common_base. + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_ccp") within the application. + +* Use --vdev="crypto_ccp" in the EAL options, which will call rte_vdev_init() internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* socket_id: Specify the socket where the memory for the device is going to be allocated. + (by default, socket_id will be the socket where the core that is creating the PMD is running on). + +* max_nb_queue_pairs: Specify the maximum number of queue pairs in the device. + +* max_nb_sessions: Specify the maximum number of sessions that can be created (2048 by default). + +* ccp_auth_opt: Specify authentication operations to perform on CPU using openssl APIs. + +To validate ccp pmd, l2fwd-crypto example can be used with following command: + +.. code-block:: console + + sudo ./build/l2fwd-crypto -l 1 -n 4 --vdev "crypto_ccp" -- -p 0x1 + --chain CIPHER_HASH --cipher_op ENCRYPT --cipher_algo aes-cbc + --cipher_key 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:0f + --cipher_iv 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:ff + --auth_op GENERATE --auth_algo sha1-hmac + --auth_key 11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + :11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + :11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + +The CCP PMD also supports computing authentication over CPU with cipher offloaded to CCP. +To enable this feature, pass an additional argument as ccp_auth_opt=1 to --vdev parameters as +following: + +.. code-block:: console + + sudo ./build/l2fwd-crypto -l 1 -n 4 --vdev "crypto_ccp,ccp_auth_opt=1" -- -p 0x1 + --chain CIPHER_HASH --cipher_op ENCRYPT --cipher_algo aes-cbc + --cipher_key 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:0f + --cipher_iv 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:ff + --auth_op GENERATE --auth_algo sha1-hmac + --auth_key 11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + :11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + :11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + +Limitations +----------- + +* Chained mbufs are not supported. +* MD5_HMAC is supported only for CPU based authentication. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/dpaa2_sec.rst b/src/spdk/dpdk/doc/guides/cryptodevs/dpaa2_sec.rst new file mode 100644 index 000000000..3053636b8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/dpaa2_sec.rst @@ -0,0 +1,206 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2016 NXP + + + +NXP DPAA2 CAAM (DPAA2_SEC) +========================== + +The DPAA2_SEC PMD provides poll mode crypto driver support for NXP DPAA2 CAAM +hardware accelerator. + +Architecture +------------ + +SEC is the SOC's security engine, which serves as NXP's latest cryptographic +acceleration and offloading hardware. It combines functions previously +implemented in separate modules to create a modular and scalable acceleration +and assurance engine. It also implements block encryption algorithms, stream +cipher algorithms, hashing algorithms, public key algorithms, run-time +integrity checking, and a hardware random number generator. SEC performs +higher-level cryptographic operations than previous NXP cryptographic +accelerators. This provides significant improvement to system level performance. + +DPAA2_SEC is one of the hardware resource in DPAA2 Architecture. More information +on DPAA2 Architecture is described in :ref:`dpaa2_overview`. + +DPAA2_SEC PMD is one of DPAA2 drivers which interacts with Management Complex (MC) +portal to access the hardware object - DPSECI. The MC provides access to create, +discover, connect, configure and destroy dpseci objects in DPAA2_SEC PMD. + +DPAA2_SEC PMD also uses some of the other hardware resources like buffer pools, +queues, queue portals to store and to enqueue/dequeue data to the hardware SEC. + +DPSECI objects are detected by PMD using a resource container called DPRC (like +in :ref:`dpaa2_overview`). + +For example: + +.. code-block:: console + + DPRC.1 (bus) + | + +--+--------+-------+-------+-------+---------+ + | | | | | | + DPMCP.1 DPIO.1 DPBP.1 DPNI.1 DPMAC.1 DPSECI.1 + DPMCP.2 DPIO.2 DPNI.2 DPMAC.2 DPSECI.2 + DPMCP.3 + +Implementation +-------------- + +SEC provides platform assurance by working with SecMon, which is a companion +logic block that tracks the security state of the SOC. SEC is programmed by +means of descriptors (not to be confused with frame descriptors (FDs)) that +indicate the operations to be performed and link to the message and +associated data. SEC incorporates two DMA engines to fetch the descriptors, +read the message data, and write the results of the operations. The DMA +engine provides a scatter/gather capability so that SEC can read and write +data scattered in memory. SEC may be configured by means of software for +dynamic changes in byte ordering. The default configuration for this version +of SEC is little-endian mode. + +A block diagram similar to dpaa2 NIC is shown below to show where DPAA2_SEC +fits in the DPAA2 Bus model + +.. code-block:: console + + + +----------------+ + | DPDK DPAA2_SEC | + | PMD | + +----------------+ +------------+ + | MC SEC object |.......| Mempool | + . . . . . . . . . | (DPSECI) | | (DPBP) | + . +---+---+--------+ +-----+------+ + . ^ | . + . | | . + . | | . + . +---+---V----+ . + . . . . . . . . . . .| DPIO driver| . + . . | (DPIO) | . + . . +-----+------+ . + . . | QBMAN | . + . . | Driver | . + +----+------+-------+ +-----+----- | . + | dpaa2 bus | | . + | VFIO fslmc-bus |....................|......................... + | | | + | /bus/fslmc | | + +-------------------+ | + | + ========================== HARDWARE =====|======================= + DPIO + | + DPSECI---DPBP + =========================================|======================== + + + +Features +-------- + +The DPAA2_SEC PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES128_CBC`` +* ``RTE_CRYPTO_CIPHER_AES192_CBC`` +* ``RTE_CRYPTO_CIPHER_AES256_CBC`` +* ``RTE_CRYPTO_CIPHER_AES128_CTR`` +* ``RTE_CRYPTO_CIPHER_AES192_CTR`` +* ``RTE_CRYPTO_CIPHER_AES256_CTR`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` + +AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` + +Supported DPAA2 SoCs +-------------------- + +* LS2160A +* LS2084A/LS2044A +* LS2088A/LS2048A +* LS1088A/LS1048A + +Whitelisting & Blacklisting +--------------------------- + +For blacklisting a DPAA2 SEC device, following commands can be used. + + .. code-block:: console + + -b "fslmc:dpseci.x" -- ... + +Where x is the device object id as configured in resource container. + +Limitations +----------- + +* Hash followed by Cipher mode is not supported +* Only supports the session-oriented API implementation (session-less APIs are not supported). + +Prerequisites +------------- + +DPAA2_SEC driver has similar pre-requisites as described in :ref:`dpaa2_overview`. +The following dependencies are not part of DPDK and must be installed separately: + +See :doc:`../platform/dpaa2` for setup information + +Currently supported by DPDK: + +- NXP SDK **19.09+**. +- MC Firmware version **10.18.0** and higher. +- Supported architectures: **arm64 LE**. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +Basic DPAA2 config file options are described in :ref:`dpaa2_overview`. +In addition to those, the following options can be modified in the ``config`` file +to enable DPAA2_SEC PMD. + +Please note that enabling debugging options may affect system performance. + +* ``CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC`` (default ``n``) + By default it is only enabled in defconfig_arm64-dpaa-* config. + Toggle compilation of the ``librte_pmd_dpaa2_sec`` driver. + +Installations +------------- +To compile the DPAA2_SEC PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-dpaa-linux-gcc install + +Enabling logs +------------- + +For enabling logs, use the following EAL parameter: + +.. code-block:: console + + ./your_crypto_application --log-level=pmd.crypto.dpaa2: + +Using ``crypto.dpaa2`` as log matching criteria, all Crypto PMD logs can be +enabled which are lower than logging ``level``. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/dpaa_sec.rst b/src/spdk/dpdk/doc/guides/cryptodevs/dpaa_sec.rst new file mode 100644 index 000000000..db3c8e918 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/dpaa_sec.rst @@ -0,0 +1,150 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2017 NXP + + + +NXP DPAA CAAM (DPAA_SEC) +======================== + +The DPAA_SEC PMD provides poll mode crypto driver support for NXP DPAA CAAM +hardware accelerator. + +Architecture +------------ + +SEC is the SOC's security engine, which serves as NXP's latest cryptographic +acceleration and offloading hardware. It combines functions previously +implemented in separate modules to create a modular and scalable acceleration +and assurance engine. It also implements block encryption algorithms, stream +cipher algorithms, hashing algorithms, public key algorithms, run-time +integrity checking, and a hardware random number generator. SEC performs +higher-level cryptographic operations than previous NXP cryptographic +accelerators. This provides significant improvement to system level performance. + +DPAA_SEC is one of the hardware resource in DPAA Architecture. More information +on DPAA Architecture is described in :ref:`dpaa_overview`. + +DPAA_SEC PMD is one of DPAA drivers which interacts with QBMAN to create, +configure and destroy the device instance using queue pair with CAAM portal. + +DPAA_SEC PMD also uses some of the other hardware resources like buffer pools, +queues, queue portals to store and to enqueue/dequeue data to the hardware SEC. + +Implementation +-------------- + +SEC provides platform assurance by working with SecMon, which is a companion +logic block that tracks the security state of the SOC. SEC is programmed by +means of descriptors (not to be confused with frame descriptors (FDs)) that +indicate the operations to be performed and link to the message and +associated data. SEC incorporates two DMA engines to fetch the descriptors, +read the message data, and write the results of the operations. The DMA +engine provides a scatter/gather capability so that SEC can read and write +data scattered in memory. SEC may be configured by means of software for +dynamic changes in byte ordering. The default configuration for this version +of SEC is little-endian mode. + +Features +-------- + +The DPAA PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES128_CBC`` +* ``RTE_CRYPTO_CIPHER_AES192_CBC`` +* ``RTE_CRYPTO_CIPHER_AES256_CBC`` +* ``RTE_CRYPTO_CIPHER_AES128_CTR`` +* ``RTE_CRYPTO_CIPHER_AES192_CTR`` +* ``RTE_CRYPTO_CIPHER_AES256_CTR`` +* ``RTE_CRYPTO_CIPHER_SNOW3G_UEA2`` +* ``RTE_CRYPTO_CIPHER_ZUC_EEA3`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_SNOW3G_UIA2`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` +* ``RTE_CRYPTO_AUTH_ZUC_EIA3`` + +AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` + +Supported DPAA SoCs +-------------------- + +* LS1046A/LS1026A +* LS1043A/LS1023A + +Whitelisting & Blacklisting +--------------------------- + +For blacklisting a DPAA device, following commands can be used. + + .. code-block:: console + + -b "dpaa:dpaa_sec-X" -- ... + e.g. "dpaa:dpaa_sec-1" + + or to disable all 4 SEC devices + -b "dpaa:dpaa_sec-1" -b "dpaa:dpaa_sec-2" -b "dpaa:dpaa_sec-3" -b "dpaa:dpaa_sec-4" + +Limitations +----------- + +* Hash followed by Cipher mode is not supported +* Only supports the session-oriented API implementation (session-less APIs are not supported). + +Prerequisites +------------- + +DPAA_SEC driver has similar pre-requisites as described in :ref:`dpaa_overview`. + +See :doc:`../platform/dpaa` for setup information + + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +Basic DPAA config file options are described in :ref:`dpaa_overview`. +In addition to those, the following options can be modified in the ``config`` file +to enable DPAA_SEC PMD. + +Please note that enabling debugging options may affect system performance. + +* ``CONFIG_RTE_LIBRTE_PMD_DPAA_SEC`` (default ``n``) + By default it is only enabled in defconfig_arm64-dpaa-* config. + Toggle compilation of the ``librte_pmd_dpaa_sec`` driver. + +Installations +------------- +To compile the DPAA_SEC PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-dpaa-linux-gcc install + +Enabling logs +------------- + +For enabling logs, use the following EAL parameter: + +.. code-block:: console + + ./your_crypto_application --log-level=pmd.crypto.dpaa: + +Using ``pmd.crypto.dpaa`` as log matching criteria, all Crypto PMD logs can be +enabled which are lower than logging ``level``. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_gcm.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_gcm.ini new file mode 100644 index 000000000..c59229866 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_gcm.ini @@ -0,0 +1,41 @@ +; +; Supported features of the 'aesni_gcm' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +CPU AESNI = Y +CPU SSE = Y +CPU AVX = Y +CPU AVX2 = Y +CPU AVX512 = Y +In Place SGL = Y +OOP SGL In LB Out = Y +OOP LB In LB Out = Y +CPU crypto = Y +Symmetric sessionless = Y +; +; Supported crypto algorithms of the 'aesni_gcm' crypto driver. +; +[Cipher] + +; +; Supported authentication algorithms of the 'aesni_gcm' crypto driver. +; +[Auth] +AES GMAC = Y + +; +; Supported AEAD algorithms of the 'aesni_gcm' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'aesni_gcm' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_mb.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_mb.ini new file mode 100644 index 000000000..49cb82a39 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/aesni_mb.ini @@ -0,0 +1,64 @@ +; +; Supported features of the 'aesni_mb' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +CPU SSE = Y +CPU AVX = Y +CPU AVX2 = Y +CPU AVX512 = Y +CPU AESNI = Y +OOP LB In LB Out = Y +CPU crypto = Y +Symmetric sessionless = Y + +; +; Supported crypto algorithms of the 'aesni_mb' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +AES DOCSIS BPI = Y +DES CBC = Y +3DES CBC = Y +DES DOCSIS BPI = Y + +; +; Supported authentication algorithms of the 'aesni_mb' crypto driver. +; +[Auth] +MD5 HMAC = Y +SHA1 = Y +SHA1 HMAC = Y +SHA224 = Y +SHA224 HMAC = Y +SHA256 = Y +SHA256 HMAC = Y +SHA384 = Y +SHA384 HMAC = Y +SHA512 = Y +SHA512 HMAC = Y +AES XCBC MAC = Y +AES CMAC (128) = Y +AES GMAC = Y + +; +; Supported AEAD algorithms of the 'aesni_mb' crypto driver. +; +[AEAD] +AES CCM (128) = Y +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'aesni_mb' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/armv8.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/armv8.ini new file mode 100644 index 000000000..beb40c057 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/armv8.ini @@ -0,0 +1,34 @@ +; +; Supported features of the 'armv8' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +CPU NEON = Y +CPU ARM CE = Y +Symmetric sessionless = Y + +; +; Supported crypto algorithms of the 'armv8' crypto driver. +; +[Cipher] +AES CBC (128) = Y + +; +; Supported authentication algorithms of the 'armv8' crypto driver. +; +[Auth] +SHA1 HMAC = Y +SHA256 HMAC = Y + +; +; Supported AEAD algorithms of the 'armv8' crypto driver. +; +[AEAD] + +; +; Supported Asymmetric algorithms of the 'armv8' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/caam_jr.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/caam_jr.ini new file mode 100644 index 000000000..8d55cdb6c --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/caam_jr.ini @@ -0,0 +1,51 @@ +; +; Supported features of the 'caam_jr' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +Protocol offload = Y +In Place SGL = Y +OOP SGL In SGL Out = Y +OOP SGL In LB Out = Y +OOP LB In SGL Out = Y +OOP LB In LB Out = Y + +; +; Supported crypto algorithms of the 'dpaa2_sec' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +3DES CBC = Y + +; +; Supported authentication algorithms of the 'dpaa2_sec' crypto driver. +; +[Auth] +MD5 HMAC = Y +SHA1 HMAC = Y +SHA224 HMAC = Y +SHA256 HMAC = Y +SHA384 HMAC = Y +SHA512 HMAC = Y + +; +; Supported AEAD algorithms of the 'dpaa2_sec' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'dpaa2_sec' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/ccp.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/ccp.ini new file mode 100644 index 000000000..866d4c52e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/ccp.ini @@ -0,0 +1,65 @@ +; +; Supported features of the 'ccp' crypto poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +Symmetric sessionless = Y + +; +; Supported crypto algorithms of the 'ccp' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES ECB (128) = Y +AES ECB (192) = Y +AES ECB (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +3DES CBC = Y + +; +; Supported authentication algorithms of the 'ccp' crypto driver. +; +[Auth] +MD5 HMAC = Y +SHA1 = Y +SHA1 HMAC = Y +SHA224 = Y +SHA224 HMAC = Y +SHA256 = Y +SHA256 HMAC = Y +SHA384 = Y +SHA384 HMAC = Y +SHA512 = Y +SHA512 HMAC = Y +AES CMAC (128) = Y +AES CMAC (192) = Y +AES CMAC (256) = Y +SHA3_224 = Y +SHA3_224 HMAC = Y +SHA3_256 = Y +SHA3_256 HMAC = Y +SHA3_384 = Y +SHA3_384 HMAC = Y +SHA3_512 = Y +SHA3_512 HMAC = Y + +; +; Supported AEAD algorithms of the 'ccp' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'ccp' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/default.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/default.ini new file mode 100644 index 000000000..133a246ee --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/default.ini @@ -0,0 +1,116 @@ +; +; Features of a default crypto driver. +; +; This file defines the features that are valid for inclusion in +; the other driver files and also the order that they appear in +; the features table in the documentation. +; +[Features] +Symmetric crypto = +Asymmetric crypto = +Sym operation chaining = +HW Accelerated = +Protocol offload = +CPU SSE = +CPU AVX = +CPU AVX2 = +CPU AVX512 = +CPU AESNI = +CPU NEON = +CPU ARM CE = +In Place SGL = +OOP SGL In SGL Out = +OOP SGL In LB Out = +OOP LB In SGL Out = +OOP LB In LB Out = +RSA PRIV OP KEY EXP = +RSA PRIV OP KEY QT = +Digest encrypted = +Asymmetric sessionless = +CPU crypto = +Symmetric sessionless = +Non-Byte aligned data = + +; +; Supported crypto algorithms of a default crypto driver. +; +[Cipher] +NULL = +AES CBC (128) = +AES CBC (192) = +AES CBC (256) = +AES ECB (128) = +AES ECB (192) = +AES ECB (256) = +AES CTR (128) = +AES CTR (192) = +AES CTR (256) = +AES XTS (128) = +AES XTS (192) = +AES XTS (256) = +AES DOCSIS BPI = +3DES CBC = +3DES CTR = +3DES ECB = +DES CBC = +DES DOCSIS BPI = +SNOW3G UEA2 = +KASUMI F8 = +ZUC EEA3 = + +; +; Supported authentication algorithms of a default crypto driver. +; +[Auth] +NULL = +MD5 = +MD5 HMAC = +SHA1 = +SHA1 HMAC = +SHA224 = +SHA224 HMAC = +SHA256 = +SHA256 HMAC = +SHA384 = +SHA384 HMAC = +SHA512 = +SHA512 HMAC = +AES XCBC MAC = +AES GMAC = +SNOW3G UIA2 = +KASUMI F9 = +ZUC EIA3 = +AES CMAC (128) = +AES CMAC (192) = +AES CMAC (256) = +SHA3_224 = +SHA3_224 HMAC = +SHA3_256 = +SHA3_256 HMAC = +SHA3_384 = +SHA3_384 HMAC = +SHA3_512 = +SHA3_512 HMAC = + +; +; Supported AEAD algorithms of a default crypto driver. +; +[AEAD] +AES GCM (128) = +AES GCM (192) = +AES GCM (256) = +AES CCM (128) = +AES CCM (192) = +AES CCM (256) = +CHACHA20-POLY1305 = +; +; Supported Asymmetric algorithms of a default crypto driver. +; +[Asymmetric] +RSA = +DSA = +Modular Exponentiation = +Modular Inversion = +Diffie-hellman = +ECDSA = +ECPM = diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa2_sec.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa2_sec.ini new file mode 100644 index 000000000..3a88828b8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa2_sec.ini @@ -0,0 +1,55 @@ +; +; Supported features of the 'dpaa2_sec' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +Protocol offload = Y +In Place SGL = Y +OOP SGL In SGL Out = Y +OOP SGL In LB Out = Y +OOP LB In SGL Out = Y +OOP LB In LB Out = Y + +; +; Supported crypto algorithms of the 'dpaa2_sec' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +3DES CBC = Y +SNOW3G UEA2 = Y +ZUC EEA3 = Y + +; +; Supported authentication algorithms of the 'dpaa2_sec' crypto driver. +; +[Auth] +MD5 HMAC = Y +SHA1 HMAC = Y +SHA224 HMAC = Y +SHA256 HMAC = Y +SHA384 HMAC = Y +SHA512 HMAC = Y +SNOW3G UIA2 = Y +ZUC EIA3 = Y + +; +; Supported AEAD algorithms of the 'dpaa2_sec' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'dpaa2_sec' crypto driver. +; +[Asymmetric] \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa_sec.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa_sec.ini new file mode 100644 index 000000000..243f3e1d6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/dpaa_sec.ini @@ -0,0 +1,55 @@ +; +; Supported features of the 'dpaa_sec' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +Protocol offload = Y +In Place SGL = Y +OOP SGL In SGL Out = Y +OOP SGL In LB Out = Y +OOP LB In SGL Out = Y +OOP LB In LB Out = Y + +; +; Supported crypto algorithms of the 'dpaa_sec' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +3DES CBC = Y +SNOW3G UEA2 = Y +ZUC EEA3 = Y + +; +; Supported authentication algorithms of the 'dpaa_sec' crypto driver. +; +[Auth] +MD5 HMAC = Y +SHA1 HMAC = Y +SHA224 HMAC = Y +SHA256 HMAC = Y +SHA384 HMAC = Y +SHA512 HMAC = Y +SNOW3G UIA2 = Y +ZUC EIA3 = Y + +; +; Supported AEAD algorithms of the 'dpaa_sec' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'dpaa_sec' crypto driver. +; +[Asymmetric] \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/kasumi.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/kasumi.ini new file mode 100644 index 000000000..8380a5765 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/kasumi.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'kasumi' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +Symmetric sessionless = Y +Non-Byte aligned data = Y + +; +; Supported crypto algorithms of the 'kasumi' crypto driver. +; +[Cipher] +KASUMI F8 = Y +; +; Supported authentication algorithms of the 'kasumi' crypto driver. +; +[Auth] +KASUMI F9 = Y + +; +; Supported AEAD algorithms of the 'kasumi' crypto driver. +; +[AEAD] + +; +; Supported Asymmetric algorithms of the 'kasumi' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/mvsam.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/mvsam.ini new file mode 100644 index 000000000..829deff68 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/mvsam.ini @@ -0,0 +1,59 @@ +; Supported features of the 'mvsam' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +OOP SGL In LB Out = Y +OOP LB In LB Out = Y + +; +; Supported crypto algorithms of a default crypto driver. +; +[Cipher] +NULL = Y +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +AES ECB (128) = Y +AES ECB (192) = Y +AES ECB (256) = Y +3DES CBC = Y +3DES CTR = Y + +; +; Supported authentication algorithms of a default crypto driver. +; +[Auth] +NULL = Y +MD5 = Y +MD5 HMAC = Y +SHA1 = Y +SHA1 HMAC = Y +SHA224 = Y +SHA224 HMAC = Y +SHA256 = Y +SHA256 HMAC = Y +SHA384 = Y +SHA384 HMAC = Y +SHA512 = Y +SHA512 HMAC = Y +AES GMAC = Y + +; +; Supported AEAD algorithms of a default crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'mvsam' crypto driver. +; +[Asymmetric] \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/nitrox.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/nitrox.ini new file mode 100644 index 000000000..183494731 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/nitrox.ini @@ -0,0 +1,41 @@ +; +; Supported features of the 'nitrox' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +In Place SGL = Y +OOP SGL In SGL Out = Y +OOP SGL In LB Out = Y +OOP LB In SGL Out = Y +OOP LB In LB Out = Y + +; +; Supported crypto algorithms of the 'nitrox' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +3DES CBC = Y + +; +; Supported authentication algorithms of the 'nitrox' crypto driver. +; +[Auth] +SHA1 HMAC = Y +SHA224 HMAC = Y +SHA256 HMAC = Y + +; +; Supported AEAD algorithms of the 'nitrox' crypto driver. +; +[AEAD] + +; +; Supported Asymmetric algorithms of the 'nitrox' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/null.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/null.ini new file mode 100644 index 000000000..a1c3e22a3 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/null.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'null' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +In Place SGL = Y + +; +; Supported crypto algorithms of the 'null' crypto driver. +; +[Cipher] +NULL = Y + +; +; Supported authentication algorithms of the 'null' crypto driver. +; +[Auth] +NULL = Y + +; +; Supported AEAD algorithms of the 'null' crypto driver. +; +[AEAD] + +; +; Supported Asymmetric algorithms of the 'null' crypto driver. +; +[Asymmetric] \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx.ini new file mode 100644 index 000000000..09affb21d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx.ini @@ -0,0 +1,78 @@ +; +; Supported features of the 'octeontx' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Asymmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +In Place SGL = Y +OOP SGL In LB Out = Y +OOP SGL In SGL Out = Y +RSA PRIV OP KEY QT = Y +Symmetric sessionless = Y +Non-Byte aligned data = Y + +; +; Supported crypto algorithms of 'octeontx' crypto driver. +; +[Cipher] +NULL = Y +3DES CBC = Y +3DES ECB = Y +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +AES XTS (128) = Y +AES XTS (256) = Y +DES CBC = Y +KASUMI F8 = Y +SNOW3G UEA2 = Y +ZUC EEA3 = Y + +; +; Supported authentication algorithms of 'octeontx' crypto driver. +; +[Auth] +NULL = Y +AES GMAC = Y +KASUMI F9 = Y +MD5 = Y +MD5 HMAC = Y +SHA1 = Y +SHA1 HMAC = Y +SHA224 = Y +SHA224 HMAC = Y +SHA256 = Y +SHA256 HMAC = Y +SHA384 = Y +SHA384 HMAC = Y +SHA512 = Y +SHA512 HMAC = Y +SNOW3G UIA2 = Y +ZUC EIA3 = Y + +; +; Supported AEAD algorithms of 'octeontx' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'octeontx' crypto driver. +; +[Asymmetric] +RSA = Y +DSA = +Modular Exponentiation = Y +Modular Inversion = +Diffie-hellman = +ECDSA = Y +ECPM = Y diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx2.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx2.ini new file mode 100644 index 000000000..cdcaf709d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/octeontx2.ini @@ -0,0 +1,78 @@ +; +; Supported features of the 'octeontx2' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Asymmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +In Place SGL = Y +OOP SGL In LB Out = Y +OOP SGL In SGL Out = Y +RSA PRIV OP KEY QT = Y +Symmetric sessionless = Y +Non-Byte aligned data = Y + +; +; Supported crypto algorithms of 'octeontx2' crypto driver. +; +[Cipher] +NULL = Y +3DES CBC = Y +3DES ECB = Y +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +AES XTS (128) = Y +AES XTS (256) = Y +DES CBC = Y +KASUMI F8 = Y +SNOW3G UEA2 = Y +ZUC EEA3 = Y + +; +; Supported authentication algorithms of 'octeontx2' crypto driver. +; +[Auth] +NULL = Y +AES GMAC = Y +KASUMI F9 = Y +MD5 = Y +MD5 HMAC = Y +SHA1 = Y +SHA1 HMAC = Y +SHA224 = Y +SHA224 HMAC = Y +SHA256 = Y +SHA256 HMAC = Y +SHA384 = Y +SHA384 HMAC = Y +SHA512 = Y +SHA512 HMAC = Y +SNOW3G UIA2 = Y +ZUC EIA3 = Y + +; +; Supported AEAD algorithms of 'octeontx2' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'octeontx2' crypto driver. +; +[Asymmetric] +RSA = Y +DSA = +Modular Exponentiation = Y +Modular Inversion = +Diffie-hellman = +ECDSA = Y +ECPM = Y diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/openssl.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/openssl.ini new file mode 100644 index 000000000..74bb987ec --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/openssl.ini @@ -0,0 +1,67 @@ +; +; Supported features of the 'openssl' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +In Place SGL = Y +OOP SGL In LB Out = Y +OOP LB In LB Out = Y +Asymmetric crypto = Y +RSA PRIV OP KEY EXP = Y +RSA PRIV OP KEY QT = Y +Symmetric sessionless = Y + +; +; Supported crypto algorithms of the 'openssl' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +3DES CBC = Y +3DES CTR = Y +DES DOCSIS BPI = Y +; +; Supported authentication algorithms of the 'openssl' crypto driver. +; +[Auth] +MD5 = Y +MD5 HMAC = Y +SHA1 = Y +SHA1 HMAC = Y +SHA224 = Y +SHA224 HMAC = Y +SHA256 = Y +SHA256 HMAC = Y +SHA384 = Y +SHA384 HMAC = Y +SHA512 = Y +SHA512 HMAC = Y +AES GMAC = Y + +; +; Supported AEAD algorithms of the 'openssl' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y +AES CCM (128) = Y +AES CCM (192) = Y +AES CCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'openssl' crypto driver. +; +[Asymmetric] +RSA = Y +DSA = Y +Modular Exponentiation = Y +Modular Inversion = Y +Diffie-hellman = Y diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/qat.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/qat.ini new file mode 100644 index 000000000..a72241997 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/qat.ini @@ -0,0 +1,81 @@ +; +; Supported features of the 'qat' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +HW Accelerated = Y +In Place SGL = Y +OOP SGL In SGL Out = Y +OOP SGL In LB Out = Y +OOP LB In SGL Out = Y +OOP LB In LB Out = Y +Digest encrypted = Y +Asymmetric sessionless = Y +RSA PRIV OP KEY EXP = Y +RSA PRIV OP KEY QT = Y + +; +; Supported crypto algorithms of the 'qat' crypto driver. +; +[Cipher] +NULL = Y +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y +AES CTR (128) = Y +AES CTR (192) = Y +AES CTR (256) = Y +AES XTS (128) = Y +AES XTS (256) = Y +3DES CBC = Y +3DES CTR = Y +DES CBC = Y +SNOW3G UEA2 = Y +KASUMI F8 = Y +AES DOCSIS BPI = Y +DES DOCSIS BPI = Y +ZUC EEA3 = Y +; +; Supported authentication algorithms of the 'qat' crypto driver. +; +[Auth] +NULL = Y +MD5 HMAC = Y +SHA1 = Y +SHA1 HMAC = Y +SHA224 = Y +SHA224 HMAC = Y +SHA256 = Y +SHA256 HMAC = Y +SHA384 = Y +SHA384 HMAC = Y +SHA512 = Y +SHA512 HMAC = Y +AES GMAC = Y +SNOW3G UIA2 = Y +KASUMI F9 = Y +AES XCBC MAC = Y +ZUC EIA3 = Y +AES CMAC (128) = Y + +; +; Supported AEAD algorithms of the 'qat' crypto driver. +; +[AEAD] +AES GCM (128) = Y +AES GCM (192) = Y +AES GCM (256) = Y +AES CCM (128) = Y +AES CCM (192) = Y +AES CCM (256) = Y + +; +; Supported Asymmetric algorithms of the 'qat' crypto driver. +; +[Asymmetric] +Modular Exponentiation = Y +Modular Inversion = Y +RSA = Y diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/snow3g.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/snow3g.ini new file mode 100644 index 000000000..b2caefe3a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/snow3g.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'snow3g' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +Symmetric sessionless = Y +Non-Byte aligned data = Y + +; +; Supported crypto algorithms of the 'snow3g' crypto driver. +; +[Cipher] +SNOW3G UEA2 = Y +; +; Supported authentication algorithms of the 'snow3g' crypto driver. +; +[Auth] +SNOW3G UIA2 = Y + +; +; Supported AEAD algorithms of the 'snow3g' crypto driver. +; +[AEAD] + +; +; Supported Asymmetric algorithms of the 'snow3g' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/virtio.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/virtio.ini new file mode 100644 index 000000000..5dff09482 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/virtio.ini @@ -0,0 +1,32 @@ +; Supported features of the 'virtio' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +OOP LB In LB Out = Y + +; +; Supported crypto algorithms of the 'virtio' crypto driver. +; +[Cipher] +AES CBC (128) = Y +AES CBC (192) = Y +AES CBC (256) = Y + +; +; Supported authentication algorithms of the 'virtio' crypto driver. +; +[Auth] +SHA1 HMAC = Y + +; +; Supported AEAD algorithms of the 'virtio' crypto driver. +; +[AEAD] + +; +; Supported Asymmetric algorithms of the 'virtio' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/features/zuc.ini b/src/spdk/dpdk/doc/guides/cryptodevs/features/zuc.ini new file mode 100644 index 000000000..21d074f9b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/features/zuc.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'zuc' crypto driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Symmetric crypto = Y +Sym operation chaining = Y +Symmetric sessionless = Y +Non-Byte aligned data = Y + +; +; Supported crypto algorithms of the 'zuc' crypto driver. +; +[Cipher] +ZUC EEA3 = Y +; +; Supported authentication algorithms of the 'zuc' crypto driver. +; +[Auth] +ZUC EIA3 = Y + +; +; Supported AEAD algorithms of the 'zuc' crypto driver. +; +[AEAD] + +; +; Supported Asymmetric algorithms of the 'zuc' crypto driver. +; +[Asymmetric] diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/img/scheduler-overview.svg b/src/spdk/dpdk/doc/guides/cryptodevs/img/scheduler-overview.svg new file mode 100644 index 000000000..82bb775b6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/img/scheduler-overview.svg @@ -0,0 +1,277 @@ + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + Rounded Rectangle.55 + User Application + + + + + + + + + + + + + + + + + + + + + + + + + + User Application + + Rounded Rectangle.135 + Cryptodev + + + + + + + + + + + + + + + + + + + + + + + + + + Cryptodev + + Rounded Rectangle.136 + Cryptodev + + + + + + + + + + + + + + + + + + + + + + + + + + Cryptodev + + Rounded Rectangle.137 + Cryptodev + + + + + + + + + + + + + + + + + + + + + + + + + + Cryptodev + + Sheet.139 + + Rounded Rectangle.40 + Cryptodev Scheduler + + + + + + + + + + + + + + + + + + + + + + + + + + Cryptodev Scheduler + + Rounded Rectangle.138 + Crypto Op Distribution Mechanism + + + + + + + + + + + + + + + + + + + + + + + Crypto Op Distribution Mechanism + + + Dynamic connector.229 + + + + Dynamic connector.141 + + + + Dynamic connector.142 + + + + Dynamic connector.143 + + + + Dynamic connector.144 + + + + Dynamic connector.145 + + + + Dynamic connector.146 + + + + Dynamic connector.147 + + + + diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/index.rst b/src/spdk/dpdk/doc/guides/cryptodevs/index.rst new file mode 100644 index 000000000..a67ed5a28 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/index.rst @@ -0,0 +1,31 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2015 - 2017 Intel Corporation. + +Crypto Device Drivers +===================== + + +.. toctree:: + :maxdepth: 2 + :numbered: + + overview + aesni_mb + aesni_gcm + armv8 + caam_jr + ccp + dpaa2_sec + dpaa_sec + kasumi + octeontx + octeontx2 + openssl + mvsam + nitrox + null + scheduler + snow3g + qat + virtio + zuc diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/kasumi.rst b/src/spdk/dpdk/doc/guides/cryptodevs/kasumi.rst new file mode 100644 index 000000000..09a538f81 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/kasumi.rst @@ -0,0 +1,138 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016-2019 Intel Corporation. + +KASUMI Crypto Poll Mode Driver +=============================== + +The KASUMI PMD (**librte_pmd_kasumi**) provides poll mode crypto driver support for +utilizing `Intel IPSec Multi-buffer library `_ +which implements F8 and F9 functions for KASUMI UEA1 cipher and UIA1 hash algorithms. + +Features +-------- + +KASUMI PMD has support for: + +Cipher algorithm: + +* RTE_CRYPTO_CIPHER_KASUMI_F8 + +Authentication algorithm: + +* RTE_CRYPTO_AUTH_KASUMI_F9 + +Limitations +----------- + +* Chained mbufs are not supported. +* KASUMI(F9) supported only if hash offset and length field is byte-aligned. +* In-place bit-level operations for KASUMI(F8) are not supported + (if length and/or offset of data to be ciphered is not byte-aligned). + + +Installation +------------ + +To build DPDK with the KASUMI_PMD the user is required to download the multi-buffer +library from `here `_ +and compile it on their user system before building DPDK. +The latest version of the library supported by this PMD is v0.54, which +can be downloaded from ``_. + +After downloading the library, the user needs to unpack and compile it +on their system before building DPDK: + +.. code-block:: console + + make + make install + +The library requires NASM to be built. Depending on the library version, it might +require a minimum NASM version (e.g. v0.54 requires at least NASM 2.14). + +NASM is packaged for different OS. However, on some OS the version is too old, +so a manual installation is required. In that case, NASM can be downloaded from +`NASM website `_. +Once it is downloaded, extract it and follow these steps: + +.. code-block:: console + + ./configure + make + make install + +.. note:: + + Compilation of the Multi-Buffer library is broken when GCC < 5.0, if library <= v0.53. + If a lower GCC version than 5.0, the workaround proposed by the following link + should be used: ``_. + +As a reference, the following table shows a mapping between the past DPDK versions +and the external crypto libraries supported by them: + +.. _table_kasumi_versions: + +.. table:: DPDK and external crypto library version compatibility + + ============= ================================ + DPDK version Crypto library version + ============= ================================ + 16.11 - 19.11 LibSSO KASUMI + 20.02+ Multi-buffer library 0.53 - 0.54 + ============= ================================ + + +Initialization +-------------- + +In order to enable this virtual crypto PMD, user must: + +* Build the multi buffer library (explained in Installation section). + +* Build DPDK as follows: + +.. code-block:: console + + make config T=x86_64-native-linux-gcc + sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_KASUMI\)=n,\1=y,' build/.config + make + + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_kasumi") within the application. + +* Use --vdev="crypto_kasumi" in the EAL options, which will call rte_vdev_init() internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* socket_id: Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). + +* max_nb_queue_pairs: Specify the maximum number of queue pairs in the device (8 by default). + +* max_nb_sessions: Specify the maximum number of sessions that can be created (2048 by default). + +Example: + +.. code-block:: console + + ./l2fwd-crypto -l 1 -n 4 --vdev="crypto_kasumi,socket_id=0,max_nb_sessions=128" \ + -- -p 1 --cdev SW --chain CIPHER_ONLY --cipher_algo "kasumi-f8" + +Extra notes on KASUMI F9 +------------------------ + +When using KASUMI F9 authentication algorithm, the input buffer must be +constructed according to the 3GPP KASUMI specifications (section 4.4, page 13): +``_. +Input buffer has to have COUNT (4 bytes), FRESH (4 bytes), MESSAGE and DIRECTION (1 bit) +concatenated. After the DIRECTION bit, a single '1' bit is appended, followed by +between 0 and 7 '0' bits, so that the total length of the buffer is multiple of 8 bits. +Note that the actual message can be any length, specified in bits. + +Once this buffer is passed this way, when creating the crypto operation, +length of data to authenticate (op.sym.auth.data.length) must be the length +of all the items described above, including the padding at the end. +Also, offset of data to authenticate (op.sym.auth.data.offset) +must be such that points at the start of the COUNT bytes. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/mvsam.rst b/src/spdk/dpdk/doc/guides/cryptodevs/mvsam.rst new file mode 100644 index 000000000..399fe37a6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/mvsam.rst @@ -0,0 +1,112 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Marvell International Ltd. + Copyright(c) 2017 Semihalf. + +MVSAM Crypto Poll Mode Driver +============================= + +The MVSAM CRYPTO PMD (**librte_crypto_mvsam_pmd**) provides poll mode crypto driver +support by utilizing MUSDK library, which provides cryptographic operations +acceleration by using Security Acceleration Engine (EIP197) directly from +user-space with minimum overhead and high performance. + +Detailed information about SoCs that use MVSAM crypto driver can be obtained here: + +* https://www.marvell.com/embedded-processors/armada-70xx/ +* https://www.marvell.com/embedded-processors/armada-80xx/ +* https://www.marvell.com/embedded-processors/armada-3700/ + + +Features +-------- + +MVSAM CRYPTO PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_NULL`` +* ``RTE_CRYPTO_CIPHER_AES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES_CTR`` +* ``RTE_CRYPTO_CIPHER_AES_ECB`` +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_3DES_CTR`` +* ``RTE_CRYPTO_CIPHER_3DES_ECB`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_NULL`` +* ``RTE_CRYPTO_AUTH_MD5`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA1`` +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_AES_GMAC`` + +AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` + +For supported feature flags please consult :doc:`overview`. + +Limitations +----------- + +* Hardware only supports scenarios where ICV (digest buffer) is placed just + after the authenticated data. Other placement will result in error. + +Installation +------------ + +MVSAM CRYPTO PMD driver compilation is disabled by default due to external dependencies. +Currently there are two driver specific compilation options in +``config/common_base`` available: + +- ``CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO`` (default: ``n``) + + Toggle compilation of the librte_pmd_mvsam driver. + +MVSAM CRYPTO PMD requires MUSDK built with EIP197 support thus following +extra option must be passed to the library configuration script: + +.. code-block:: console + + --enable-sam [--enable-sam-statistics] [--enable-sam-debug] + +For instructions how to build required kernel modules please refer +to `doc/musdk_get_started.txt`. + +Initialization +-------------- + +After successfully building MVSAM CRYPTO PMD, the following modules need to be +loaded: + +.. code-block:: console + + insmod musdk_cma.ko + insmod crypto_safexcel.ko rings=0,0 + insmod mv_sam_uio.ko + +The following parameters (all optional) are exported by the driver: + +- ``max_nb_queue_pairs``: maximum number of queue pairs in the device (default: 8 - A8K, 4 - A7K/A3K). +- ``max_nb_sessions``: maximum number of sessions that can be created (default: 2048). +- ``socket_id``: socket on which to allocate the device resources on. + +l2fwd-crypto example application can be used to verify MVSAM CRYPTO PMD +operation: + +.. code-block:: console + + ./l2fwd-crypto --vdev=eth_mvpp2,iface=eth0 --vdev=crypto_mvsam -- \ + --cipher_op ENCRYPT --cipher_algo aes-cbc \ + --cipher_key 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:0f \ + --auth_op GENERATE --auth_algo sha1-hmac \ + --auth_key 10:11:12:13:14:15:16:17:18:19:1a:1b:1c:1d:1e:1f diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/nitrox.rst b/src/spdk/dpdk/doc/guides/cryptodevs/nitrox.rst new file mode 100644 index 000000000..85f5212b6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/nitrox.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(C) 2019 Marvell International Ltd. + +Marvell NITROX Crypto Poll Mode Driver +====================================== + +The Nitrox crypto poll mode driver provides support for offloading +cryptographic operations to the NITROX V security processor. Detailed +information about the NITROX V security processor can be obtained here: + +* https://www.marvell.com/security-solutions/nitrox-security-processors/nitrox-v/ + +Features +-------- + +Nitrox crypto PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_AES_CBC`` +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` + +Limitations +----------- + +* AES_CBC Cipher Only combination is not supported. +* 3DES Cipher Only combination is not supported. +* Session-less APIs are not supported. + +Installation +------------ + +For compiling the Nitrox crypto PMD, please check if the +CONFIG_RTE_LIBRTE_PMD_NITROX setting is set to `y` in config/common_base file. + +* ``CONFIG_RTE_LIBRTE_PMD_NITROX=y`` + +Initialization +-------------- + +Nitrox crypto PMD depend on Nitrox kernel PF driver being installed on the +platform. Nitrox PF driver is required to create VF devices which will +be used by the PMD. Each VF device can enable one cryptodev PMD. + +Nitrox kernel PF driver is available as part of CNN55XX-Driver SDK. The SDK +and it's installation instructions can be obtained from: +`Marvell Technical Documentation Portal `_. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/null.rst b/src/spdk/dpdk/doc/guides/cryptodevs/null.rst new file mode 100644 index 000000000..c980e0ac8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/null.rst @@ -0,0 +1,71 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +Null Crypto Poll Mode Driver +============================ + +The Null Crypto PMD (**librte_pmd_null_crypto**) provides a crypto poll mode +driver which provides a minimal implementation for a software crypto device. As +a null device it does not modify the data in the mbuf on which the crypto +operation is to operate and it only has support for a single cipher and +authentication algorithm. + +When a burst of mbufs is submitted to a Null Crypto PMD for processing then +each mbuf in the burst will be enqueued in an internal buffer for collection on +a dequeue call as long as the mbuf has a valid rte_mbuf_offload operation with +a valid rte_cryptodev_session or rte_crypto_xform chain of operations. + +Features +-------- + +Modes: + +* RTE_CRYPTO_XFORM_CIPHER ONLY +* RTE_CRYPTO_XFORM_AUTH ONLY +* RTE_CRYPTO_XFORM_CIPHER THEN RTE_CRYPTO_XFORM_AUTH +* RTE_CRYPTO_XFORM_AUTH THEN RTE_CRYPTO_XFORM_CIPHER + +Cipher algorithms: + +* RTE_CRYPTO_CIPHER_NULL + +Authentication algorithms: + +* RTE_CRYPTO_AUTH_NULL + +Limitations +----------- + +* Only in-place is currently supported (destination address is the same as + source address). + +Installation +------------ + +The Null Crypto PMD is enabled and built by default in both the Linux and +FreeBSD builds. + +Initialization +-------------- + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_null") within the application. + +* Use --vdev="crypto_null" in the EAL options, which will call rte_vdev_init() internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* socket_id: Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). + +* max_nb_queue_pairs: Specify the maximum number of queue pairs in the device (8 by default). + +* max_nb_sessions: Specify the maximum number of sessions that can be created (2048 by default). + +Example: + +.. code-block:: console + + ./l2fwd-crypto -l 1 -n 4 --vdev="crypto_null,socket_id=0,max_nb_sessions=128" \ + -- -p 1 --cdev SW --chain CIPHER_ONLY --cipher_algo "null" diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/octeontx.rst b/src/spdk/dpdk/doc/guides/cryptodevs/octeontx.rst new file mode 100644 index 000000000..4fa199e3c --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/octeontx.rst @@ -0,0 +1,147 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Cavium, Inc + +Cavium OCTEON TX Crypto Poll Mode Driver +======================================== + +The OCTEON TX crypto poll mode driver provides support for offloading +cryptographic operations to cryptographic accelerator units on +**OCTEON TX** :sup:`®` family of processors (CN8XXX). The OCTEON TX crypto +poll mode driver enqueues the crypto request to this accelerator and dequeues +the response once the operation is completed. + +Supported Symmetric Crypto Algorithms +------------------------------------- + +Cipher Algorithms +~~~~~~~~~~~~~~~~~ + +* ``RTE_CRYPTO_CIPHER_NULL`` +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_3DES_ECB`` +* ``RTE_CRYPTO_CIPHER_AES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES_CTR`` +* ``RTE_CRYPTO_CIPHER_AES_XTS`` +* ``RTE_CRYPTO_CIPHER_DES_CBC`` +* ``RTE_CRYPTO_CIPHER_KASUMI_F8`` +* ``RTE_CRYPTO_CIPHER_SNOW3G_UEA2`` +* ``RTE_CRYPTO_CIPHER_ZUC_EEA3`` + +Hash Algorithms +~~~~~~~~~~~~~~~ + +* ``RTE_CRYPTO_AUTH_NULL`` +* ``RTE_CRYPTO_AUTH_AES_GMAC`` +* ``RTE_CRYPTO_AUTH_KASUMI_F9`` +* ``RTE_CRYPTO_AUTH_MD5`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA1`` +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_SNOW3G_UIA2`` +* ``RTE_CRYPTO_AUTH_ZUC_EIA3`` + +AEAD Algorithms +~~~~~~~~~~~~~~~ + +* ``RTE_CRYPTO_AEAD_AES_GCM`` + +Supported Asymmetric Crypto Algorithms +-------------------------------------- + +* ``RTE_CRYPTO_ASYM_XFORM_RSA`` +* ``RTE_CRYPTO_ASYM_XFORM_MODEX`` + +Config flags +------------ + +For compiling the OCTEON TX crypto poll mode driver, please check if the +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO setting is set to `y` in +config/common_base file. + +* ``CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO=y`` + +Compilation +----------- + +The OCTEON TX crypto poll mode driver can be compiled either natively on +**OCTEON TX** :sup:`®` board or cross-compiled on an x86 based platform. + +Refer :doc:`../platform/octeontx` for details about setting up the platform +and building DPDK applications. + +.. note:: + + OCTEON TX crypto PF driver needs microcode to be available at `/lib/firmware/` directory. + Refer SDK documents for further information. + +SDK and related information can be obtained from: `Cavium support site `_. + +Execution +--------- + +The number of crypto VFs to be enabled can be controlled by setting sysfs entry, +`sriov_numvfs`, for the corresponding PF driver. + +.. code-block:: console + + echo > /sys/bus/pci/devices//sriov_numvfs + +The device bus ID, `dev_bus_id`, to be used in the above step can be found out +by using dpdk-devbind.py script. The OCTEON TX crypto PF device need to be +identified and the corresponding device number can be used to tune various PF +properties. + + +Once the required VFs are enabled, dpdk-devbind.py script can be used to +identify the VFs. To be accessible from DPDK, VFs need to be bound to vfio-pci +driver: + +.. code-block:: console + + cd + ./usertools/dpdk-devbind.py -u + ./usertools/dpdk-devbind.py -b vfio-pci + +Appropriate huge page need to be setup in order to run the DPDK example +applications. + +.. code-block:: console + + echo 8 > /sys/kernel/mm/hugepages/hugepages-524288kB/nr_hugepages + mkdir /mnt/huge + mount -t hugetlbfs nodev /mnt/huge + +Example applications can now be executed with crypto operations offloaded to +OCTEON TX crypto PMD. + +.. code-block:: console + + ./build/ipsec-secgw --log-level=8 -c 0xff -- -P -p 0x3 -u 0x2 --config + "(1,0,0),(0,0,0)" -f ep1.cfg + +Testing +------- + +The symmetric crypto operations on OCTEON TX crypto PMD may be verified by running the test +application: + +.. code-block:: console + + ./test + RTE>>cryptodev_octeontx_autotest + +The asymmetric crypto operations on OCTEON TX crypto PMD may be verified by running the test +application: + +.. code-block:: console + + ./test + RTE>>cryptodev_octeontx_asym_autotest diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/octeontx2.rst b/src/spdk/dpdk/doc/guides/cryptodevs/octeontx2.rst new file mode 100644 index 000000000..8bdb83f49 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/octeontx2.rst @@ -0,0 +1,159 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Marvell International Ltd. + + +Marvell OCTEON TX2 Crypto Poll Mode Driver +========================================== + +The OCTEON TX2 crypto poll mode driver provides support for offloading +cryptographic operations to cryptographic accelerator units on the +**OCTEON TX2** :sup:`®` family of processors (CN9XXX). + +More information about OCTEON TX2 SoCs may be obtained from ``_ + +Features +-------- + +The OCTEON TX2 crypto PMD has support for: + +Symmetric Crypto Algorithms +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_NULL`` +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_3DES_ECB`` +* ``RTE_CRYPTO_CIPHER_AES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES_CTR`` +* ``RTE_CRYPTO_CIPHER_AES_XTS`` +* ``RTE_CRYPTO_CIPHER_DES_CBC`` +* ``RTE_CRYPTO_CIPHER_KASUMI_F8`` +* ``RTE_CRYPTO_CIPHER_SNOW3G_UEA2`` +* ``RTE_CRYPTO_CIPHER_ZUC_EEA3`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_NULL`` +* ``RTE_CRYPTO_AUTH_AES_GMAC`` +* ``RTE_CRYPTO_AUTH_KASUMI_F9`` +* ``RTE_CRYPTO_AUTH_MD5`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA1`` +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_SNOW3G_UIA2`` +* ``RTE_CRYPTO_AUTH_ZUC_EIA3`` + +AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` + +Asymmetric Crypto Algorithms +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ``RTE_CRYPTO_ASYM_XFORM_RSA`` +* ``RTE_CRYPTO_ASYM_XFORM_MODEX`` + + +Installation +------------ + +The OCTEON TX2 crypto PMD may be compiled natively on an OCTEON TX2 platform or +cross-compiled on an x86 platform. + +Enable OCTEON TX2 crypto PMD in your config file: + +* ``CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_CRYPTO=y`` + +Refer to :doc:`../platform/octeontx2` for instructions to build your DPDK +application. + +.. note:: + + The OCTEON TX2 crypto PMD uses services from the kernel mode OCTEON TX2 + crypto PF driver in linux. This driver is included in the OCTEON TX SDK. + +Initialization +-------------- + +List the CPT PF devices available on your OCTEON TX2 platform: + +.. code-block:: console + + lspci -d:a0fd + +``a0fd`` is the CPT PF device id. You should see output similar to: + +.. code-block:: console + + 0002:10:00.0 Class 1080: Device 177d:a0fd + +Set ``sriov_numvfs`` on the CPT PF device, to create a VF: + +.. code-block:: console + + echo 1 > /sys/bus/pci/drivers/octeontx2-cpt/0002:10:00.0/sriov_numvfs + +Bind the CPT VF device to the vfio_pci driver: + +.. code-block:: console + + echo '177d a0fe' > /sys/bus/pci/drivers/vfio-pci/new_id + echo 0002:10:00.1 > /sys/bus/pci/devices/0002:10:00.1/driver/unbind + echo 0002:10:00.1 > /sys/bus/pci/drivers/vfio-pci/bind + +Another way to bind the VF would be to use the ``dpdk-devbind.py`` script: + +.. code-block:: console + + cd + ./usertools/dpdk-devbind.py -u 0002:10:00.1 + ./usertools/dpdk-devbind.py -b vfio-pci 0002:10.00.1 + +.. note:: + + Ensure that sufficient huge pages are available for your application:: + + echo 8 > /sys/kernel/mm/hugepages/hugepages-524288kB/nr_hugepages + + Refer to :ref:`linux_gsg_hugepages` for more details. + +Debugging Options +----------------- + +.. _table_octeontx2_crypto_debug_options: + +.. table:: OCTEON TX2 crypto PMD debug options + + +---+------------+-------------------------------------------------------+ + | # | Component | EAL log command | + +===+============+=======================================================+ + | 1 | CPT | --log-level='pmd\.crypto\.octeontx2,8' | + +---+------------+-------------------------------------------------------+ + +Testing +------- + +The symmetric crypto operations on OCTEON TX2 crypto PMD may be verified by running the test +application: + +.. code-block:: console + + ./test + RTE>>cryptodev_octeontx2_autotest + +The asymmetric crypto operations on OCTEON TX2 crypto PMD may be verified by running the test +application: + +.. code-block:: console + + ./test + RTE>>cryptodev_octeontx2_asym_autotest diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/openssl.rst b/src/spdk/dpdk/doc/guides/cryptodevs/openssl.rst new file mode 100644 index 000000000..740729481 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/openssl.rst @@ -0,0 +1,113 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +OpenSSL Crypto Poll Mode Driver +=============================== + +This code provides the initial implementation of the openssl poll mode +driver. All cryptography operations are using Openssl library crypto API. +Each algorithm uses EVP interface from openssl API - which is recommended +by Openssl maintainers. + +For more details about openssl library please visit openssl webpage: +https://www.openssl.org/ + +Features +-------- + +OpenSSL PMD has support for: + +Supported cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES_CTR`` +* ``RTE_CRYPTO_CIPHER_3DES_CTR`` +* ``RTE_CRYPTO_CIPHER_DES_DOCSISBPI`` + +Supported authentication algorithms: + +* ``RTE_CRYPTO_AUTH_AES_GMAC`` +* ``RTE_CRYPTO_AUTH_MD5`` +* ``RTE_CRYPTO_AUTH_SHA1`` +* ``RTE_CRYPTO_AUTH_SHA224`` +* ``RTE_CRYPTO_AUTH_SHA256`` +* ``RTE_CRYPTO_AUTH_SHA384`` +* ``RTE_CRYPTO_AUTH_SHA512`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` + +Supported AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` +* ``RTE_CRYPTO_AEAD_AES_CCM`` + +Supported Asymmetric Crypto algorithms: + +* ``RTE_CRYPTO_ASYM_XFORM_RSA`` +* ``RTE_CRYPTO_ASYM_XFORM_DSA`` +* ``RTE_CRYPTO_ASYM_XFORM_DH`` +* ``RTE_CRYPTO_ASYM_XFORM_MODINV`` +* ``RTE_CRYPTO_ASYM_XFORM_MODEX`` + + +Installation +------------ + +To compile openssl PMD, it has to be enabled in the config/common_base file +and appropriate openssl packages have to be installed in the build environment. + +The newest openssl library version is supported: + +* 1.0.2h-fips 3 May 2016. + +Older versions that were also verified: + +* 1.0.1f 6 Jan 2014 +* 1.0.1 14 Mar 2012 + +For Ubuntu 14.04 LTS these packages have to be installed in the build system: + +.. code-block:: console + + sudo apt-get install openssl + sudo apt-get install libc6-dev-i386 # for i686-native-linux-gcc target + +This code was also verified on Fedora 24. +This code has NOT been verified on FreeBSD yet. + +Initialization +-------------- + +User can use app/test application to check how to use this pmd and to verify +crypto processing. + +Test name is cryptodev_openssl_autotest. +For asymmetric crypto operations testing, run cryptodev_openssl_asym_autotest. + +To verify real traffic l2fwd-crypto example can be used with this command: + +.. code-block:: console + + sudo ./build/l2fwd-crypto -l 0-1 -n 4 --vdev "crypto_openssl" + --vdev "crypto_openssl"-- -p 0x3 --chain CIPHER_HASH + --cipher_op ENCRYPT --cipher_algo AES_CBC + --cipher_key 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:0f + --iv 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:ff + --auth_op GENERATE --auth_algo SHA1_HMAC + --auth_key 11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + :11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + :11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11:11 + +Limitations +----------- + +* Maximum number of sessions is 2048. +* Chained mbufs are supported only for source mbuf (destination must be + contiguous). +* Hash only is not supported for GCM and GMAC. +* Cipher only is not supported for GCM and GMAC. diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/overview.rst b/src/spdk/dpdk/doc/guides/cryptodevs/overview.rst new file mode 100644 index 000000000..e2a1e08ec --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/overview.rst @@ -0,0 +1,76 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016-2017 Intel Corporation. + +Crypto Device Supported Functionality Matrices +============================================== + +Supported Feature Flags +----------------------- + +.. _table_crypto_pmd_features: + +.. include:: overview_feature_table.txt + +.. Note:: + + - "In Place SGL" feature flag stands for "In place Scatter-gather list", + which means that an input buffer can consist of multiple segments, + being the operation in-place (input address = output address). + + - "OOP SGL In SGL Out" feature flag stands for + "Out-of-place Scatter-gather list Input, Scatter-gather list Output", + which means pmd supports different scatter-gather styled input and output buffers + (i.e. both can consists of multiple segments). + + - "OOP SGL In LB Out" feature flag stands for + "Out-of-place Scatter-gather list Input, Linear Buffers Output", + which means PMD supports input from scatter-gathered styled buffers, + outputting linear buffers (i.e. single segment). + + - "OOP LB In SGL Out" feature flag stands for + "Out-of-place Linear Buffers Input, Scatter-gather list Output", + which means PMD supports input from linear buffer, outputting + scatter-gathered styled buffers. + + - "OOP LB In LB Out" feature flag stands for + "Out-of-place Linear Buffers Input, Linear Buffers Output", + which means that Out-of-place operation is supported, + with linear input and output buffers. + + - "RSA PRIV OP KEY EXP" feature flag means PMD support RSA private key + operation (Sign and Decrypt) using exponent key type only. + + - "RSA PRIV OP KEY QT" feature flag means PMD support RSA private key + operation (Sign and Decrypt) using quintuple (crt) type key only. + + - "Digest encrypted" feature flag means PMD support hash-cipher cases, + where generated digest is appended to and encrypted with the data. + + +Supported Cipher Algorithms +--------------------------- + +.. _table_crypto_pmd_cipher_algos: + +.. include:: overview_cipher_table.txt + +Supported Authentication Algorithms +----------------------------------- + +.. _table_crypto_pmd_auth_algos: + +.. include:: overview_auth_table.txt + +Supported AEAD Algorithms +------------------------- + +.. _table_crypto_pmd_aead_algos: + +.. include:: overview_aead_table.txt + +Supported Asymmetric Algorithms +------------------------------- + +.. _table_crypto_pmd_asym_algos: + +.. include:: overview_asym_table.txt diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/qat.rst b/src/spdk/dpdk/doc/guides/cryptodevs/qat.rst new file mode 100644 index 000000000..c2cc3d5ca --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/qat.rst @@ -0,0 +1,698 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2015-2019 Intel Corporation. + +Intel(R) QuickAssist (QAT) Crypto Poll Mode Driver +================================================== + +QAT documentation consists of three parts: + +* Details of the symmetric and asymmetric crypto services below. +* Details of the :doc:`compression service <../compressdevs/qat_comp>` + in the compressdev drivers section. +* Details of building the common QAT infrastructure and the PMDs to support the + above services. See :ref:`building_qat` below. + + +Symmetric Crypto Service on QAT +------------------------------- + +The QAT symmetric crypto PMD (hereafter referred to as `QAT SYM [PMD]`) provides +poll mode crypto driver support for the following hardware accelerator devices: + +* ``Intel QuickAssist Technology DH895xCC`` +* ``Intel QuickAssist Technology C62x`` +* ``Intel QuickAssist Technology C3xxx`` +* ``Intel QuickAssist Technology D15xx`` +* ``Intel QuickAssist Technology P5xxx`` + + +Features +~~~~~~~~ + +The QAT SYM PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_3DES_CBC`` +* ``RTE_CRYPTO_CIPHER_3DES_CTR`` +* ``RTE_CRYPTO_CIPHER_AES128_CBC`` +* ``RTE_CRYPTO_CIPHER_AES192_CBC`` +* ``RTE_CRYPTO_CIPHER_AES256_CBC`` +* ``RTE_CRYPTO_CIPHER_AES128_CTR`` +* ``RTE_CRYPTO_CIPHER_AES192_CTR`` +* ``RTE_CRYPTO_CIPHER_AES256_CTR`` +* ``RTE_CRYPTO_CIPHER_AES_XTS`` +* ``RTE_CRYPTO_CIPHER_SNOW3G_UEA2`` +* ``RTE_CRYPTO_CIPHER_NULL`` +* ``RTE_CRYPTO_CIPHER_KASUMI_F8`` +* ``RTE_CRYPTO_CIPHER_DES_CBC`` +* ``RTE_CRYPTO_CIPHER_AES_DOCSISBPI`` +* ``RTE_CRYPTO_CIPHER_DES_DOCSISBPI`` +* ``RTE_CRYPTO_CIPHER_ZUC_EEA3`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1`` +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA224`` +* ``RTE_CRYPTO_AUTH_SHA224_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA256`` +* ``RTE_CRYPTO_AUTH_SHA256_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA384`` +* ``RTE_CRYPTO_AUTH_SHA384_HMAC`` +* ``RTE_CRYPTO_AUTH_SHA512`` +* ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_AES_XCBC_MAC`` +* ``RTE_CRYPTO_AUTH_SNOW3G_UIA2`` +* ``RTE_CRYPTO_AUTH_MD5_HMAC`` +* ``RTE_CRYPTO_AUTH_NULL`` +* ``RTE_CRYPTO_AUTH_KASUMI_F9`` +* ``RTE_CRYPTO_AUTH_AES_GMAC`` +* ``RTE_CRYPTO_AUTH_ZUC_EIA3`` +* ``RTE_CRYPTO_AUTH_AES_CMAC`` + +Supported AEAD algorithms: + +* ``RTE_CRYPTO_AEAD_AES_GCM`` +* ``RTE_CRYPTO_AEAD_AES_CCM`` + + +Supported Chains +~~~~~~~~~~~~~~~~ + +All the usual chains are supported and also some mixed chains: + +.. table:: Supported hash-cipher chains for wireless digest-encrypted cases + + +------------------+-----------+-------------+----------+----------+ + | Cipher algorithm | NULL AUTH | SNOW3G UIA2 | ZUC EIA3 | AES CMAC | + +==================+===========+=============+==========+==========+ + | NULL CIPHER | Y | 2&3 | 2&3 | Y | + +------------------+-----------+-------------+----------+----------+ + | SNOW3G UEA2 | 2&3 | Y | 2&3 | 2&3 | + +------------------+-----------+-------------+----------+----------+ + | ZUC EEA3 | 2&3 | 2&3 | 2&3 | 2&3 | + +------------------+-----------+-------------+----------+----------+ + | AES CTR | Y | 2&3 | 2&3 | Y | + +------------------+-----------+-------------+----------+----------+ + +* The combinations marked as "Y" are supported on all QAT hardware versions. +* The combinations marked as "2&3" are supported on GEN2/GEN3 QAT hardware only. + + +Limitations +~~~~~~~~~~~ + +* Only supports the session-oriented API implementation (session-less APIs are not supported). +* SNOW 3G (UEA2), KASUMI (F8) and ZUC (EEA3) supported only if cipher length and offset fields are byte-multiple. +* SNOW 3G (UIA2) and ZUC (EIA3) supported only if hash length and offset fields are byte-multiple. +* No BSD support as BSD QAT kernel driver not available. +* ZUC EEA3/EIA3 is not supported by dh895xcc devices +* Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros. +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) +* A GCM limitation exists, but only in the case where there are multiple + generations of QAT devices on a single platform. + To optimise performance, the GCM crypto session should be initialised for the + device generation to which the ops will be enqueued. Specifically if a GCM + session is initialised on a GEN2 device, but then attached to an op enqueued + to a GEN3 device, it will work but cannot take advantage of hardware + optimisations in the GEN3 device. And if a GCM session is initialised on a + GEN3 device, then attached to an op sent to a GEN1/GEN2 device, it will not be + enqueued to the device and will be marked as failed. The simplest way to + mitigate this is to use the bdf whitelist to avoid mixing devices of different + generations in the same process if planning to use for GCM. +* The mixed algo feature on GEN2 is not supported by all kernel drivers. Check + the notes under the Available Kernel Drivers table below for specific details. + +Extra notes on KASUMI F9 +~~~~~~~~~~~~~~~~~~~~~~~~ + +When using KASUMI F9 authentication algorithm, the input buffer must be +constructed according to the +`3GPP KASUMI specification `_ +(section 4.4, page 13). The input buffer has to have COUNT (4 bytes), +FRESH (4 bytes), MESSAGE and DIRECTION (1 bit) concatenated. After the DIRECTION +bit, a single '1' bit is appended, followed by between 0 and 7 '0' bits, so that +the total length of the buffer is multiple of 8 bits. Note that the actual +message can be any length, specified in bits. + +Once this buffer is passed this way, when creating the crypto operation, +length of data to authenticate "op.sym.auth.data.length" must be the length +of all the items described above, including the padding at the end. +Also, offset of data to authenticate "op.sym.auth.data.offset" +must be such that points at the start of the COUNT bytes. + +Asymmetric Crypto Service on QAT +-------------------------------- + +The QAT asymmetric crypto PMD (hereafter referred to as `QAT ASYM [PMD]`) provides +poll mode crypto driver support for the following hardware accelerator devices: + +* ``Intel QuickAssist Technology DH895xCC`` +* ``Intel QuickAssist Technology C62x`` +* ``Intel QuickAssist Technology C3xxx`` +* ``Intel QuickAssist Technology D15xx`` +* ``Intel QuickAssist Technology P5xxx`` + +The QAT ASYM PMD has support for: + +* ``RTE_CRYPTO_ASYM_XFORM_MODEX`` +* ``RTE_CRYPTO_ASYM_XFORM_MODINV`` + +Limitations +~~~~~~~~~~~ + +* Big integers longer than 4096 bits are not supported. +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) +* RSA-2560, RSA-3584 are not supported + +.. _building_qat: + +Building PMDs on QAT +-------------------- + +A QAT device can host multiple acceleration services: + +* symmetric cryptography +* data compression +* asymmetric cryptography + +These services are provided to DPDK applications via PMDs which register to +implement the corresponding cryptodev and compressdev APIs. The PMDs use +common QAT driver code which manages the QAT PCI device. They also depend on a +QAT kernel driver being installed on the platform, see :ref:`qat_kernel` below. + + +Configuring and Building the DPDK QAT PMDs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +Further information on configuring, building and installing DPDK is described +:doc:`here <../linux_gsg/build_dpdk>`. + + +Quick instructions for QAT cryptodev PMD are as follows: + +.. code-block:: console + + cd to the top-level DPDK directory + make defconfig + sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_QAT_SYM\)=n,\1=y,' build/.config + or/and + sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_QAT_ASYM\)=n,\1=y,' build/.config + make + +Quick instructions for QAT compressdev PMD are as follows: + +.. code-block:: console + + cd to the top-level DPDK directory + make defconfig + make + + +.. _building_qat_config: + +Build Configuration +~~~~~~~~~~~~~~~~~~~ + +These are the build configuration options affecting QAT, and their default values: + +.. code-block:: console + + CONFIG_RTE_LIBRTE_PMD_QAT=y + CONFIG_RTE_LIBRTE_PMD_QAT_SYM=n + CONFIG_RTE_LIBRTE_PMD_QAT_ASYM=n + CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES=48 + CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE=65536 + +CONFIG_RTE_LIBRTE_PMD_QAT must be enabled for any QAT PMD to be built. + +Both QAT SYM PMD and QAT ASYM PMD have an external dependency on libcrypto, so are not +built by default. CONFIG_RTE_LIBRTE_PMD_QAT_SYM/ASYM should be enabled to build them. + +The QAT compressdev PMD has no external dependencies, so needs no configuration +options and is built by default. + +The number of VFs per PF varies - see table below. If multiple QAT packages are +installed on a platform then CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES should be +adjusted to the number of VFs which the QAT common code will need to handle. + +.. Note:: + + There are separate config items (not QAT-specific) for max cryptodevs + CONFIG_RTE_CRYPTO_MAX_DEVS and max compressdevs CONFIG_RTE_COMPRESS_MAX_DEVS, + if necessary these should be adjusted to handle the total of QAT and other + devices which the process will use. In particular for crypto, where each + QAT VF may expose two crypto devices, sym and asym, it may happen that the + number of devices will be bigger than MAX_DEVS and the process will show an error + during PMD initialisation. To avoid this problem CONFIG_RTE_CRYPTO_MAX_DEVS may be + increased or -w, pci-whitelist domain:bus:devid:func option may be used. + + +QAT compression PMD needs intermediate buffers to support Deflate compression +with Dynamic Huffman encoding. CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE +specifies the size of a single buffer, the PMD will allocate a multiple of these, +plus some extra space for associated meta-data. For GEN2 devices, 20 buffers are +allocated while for GEN1 devices, 12 buffers are allocated, plus 1472 bytes overhead. + +.. Note:: + + If the compressed output of a Deflate operation using Dynamic Huffman + Encoding is too big to fit in an intermediate buffer, then the + operation will be split into smaller operations and their results will + be merged afterwards. + This is not possible if any checksum calculation was requested - in such + case the code falls back to fixed compression. + To avoid this less performant case, applications should configure + the intermediate buffer size to be larger than the expected input data size + (compressed output size is usually unknown, so the only option is to make + larger than the input size). + + +Running QAT PMD with minimum threshold for burst size +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If only a small number or packets can be enqueued. Each enqueue causes an expensive MMIO write. +These MMIO write occurrences can be optimised by setting any of the following parameters: + +- qat_sym_enq_threshold +- qat_asym_enq_threshold +- qat_comp_enq_threshold + +When any of these parameters is set rte_cryptodev_enqueue_burst function will +return 0 (thereby avoiding an MMIO) if the device is congested and number of packets +possible to enqueue is smaller. +To use this feature the user must set the parameter on process start as a device additional parameter:: + + -w 03:01.1,qat_sym_enq_threshold=32,qat_comp_enq_threshold=16 + +All parameters can be used with the same device regardless of order. Parameters are separated +by comma. When the same parameter is used more than once first occurrence of the parameter +is used. +Maximum threshold that can be set is 32. + + +Device and driver naming +~~~~~~~~~~~~~~~~~~~~~~~~ + +* The qat cryptodev symmetric crypto driver name is "crypto_qat". +* The qat cryptodev asymmetric crypto driver name is "crypto_qat_asym". + +The "rte_cryptodev_devices_get()" returns the devices exposed by either of these drivers. + +* Each qat sym crypto device has a unique name, in format + "_", e.g. "0000:41:01.0_qat_sym". +* Each qat asym crypto device has a unique name, in format + "_", e.g. "0000:41:01.0_qat_asym". + This name can be passed to "rte_cryptodev_get_dev_id()" to get the device_id. + +.. Note:: + + The cryptodev driver name is passed to the dpdk-test-crypto-perf tool in the "-devtype" parameter. + + The qat crypto device name is in the format of the slave parameter passed to the crypto scheduler. + +* The qat compressdev driver name is "compress_qat". + The rte_compressdev_devices_get() returns the devices exposed by this driver. + +* Each qat compression device has a unique name, in format + _, e.g. "0000:41:01.0_qat_comp". + This name can be passed to rte_compressdev_get_dev_id() to get the device_id. + +.. _qat_kernel: + +Dependency on the QAT kernel driver +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To use QAT an SRIOV-enabled QAT kernel driver is required. The VF +devices created and initialised by this driver will be used by the QAT PMDs. + +Instructions for installation are below, but first an explanation of the +relationships between the PF/VF devices and the PMDs visible to +DPDK applications. + +Each QuickAssist PF device exposes a number of VF devices. Each VF device can +enable one symmetric cryptodev PMD and/or one asymmetric cryptodev PMD and/or +one compressdev PMD. +These QAT PMDs share the same underlying device and pci-mgmt code, but are +enumerated independently on their respective APIs and appear as independent +devices to applications. + +.. Note:: + + Each VF can only be used by one DPDK process. It is not possible to share + the same VF across multiple processes, even if these processes are using + different acceleration services. + + Conversely one DPDK process can use one or more QAT VFs and can expose both + cryptodev and compressdev instances on each of those VFs. + + +Available kernel drivers +~~~~~~~~~~~~~~~~~~~~~~~~ + +Kernel drivers for each device for each service are listed in the following table. (Scroll right +to see the full table) + + +.. _table_qat_pmds_drivers: + +.. table:: QAT device generations, devices and drivers + + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | S | A | C | Gen | Device | Driver/ver | Kernel Module | Pci Driver | PF Did | #PFs | VF Did | VFs/PF | + +=====+=====+=====+=====+==========+===============+===============+============+========+======+========+========+ + | Yes | No | No | 1 | DH895xCC | linux/4.4+ | qat_dh895xcc | dh895xcc | 435 | 1 | 443 | 32 | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | Yes | No | " | " | 01.org/4.2.0+ | " | " | " | " | " | " | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | Yes | Yes | " | " | 01.org/4.3.0+ | " | " | " | " | " | " | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | No | No | 2 | C62x | linux/4.5+ | qat_c62x | c6xx | 37c8 | 3 | 37c9 | 16 | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | Yes | Yes | " | " | 01.org/4.2.0+ | " | " | " | " | " | " | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | No | No | 2 | C3xxx | linux/4.5+ | qat_c3xxx | c3xxx | 19e2 | 1 | 19e3 | 16 | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | Yes | Yes | " | " | 01.org/4.2.0+ | " | " | " | " | " | " | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | No | No | 2 | D15xx | p | qat_d15xx | d15xx | 6f54 | 1 | 6f55 | 16 | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + | Yes | No | No | 3 | P5xxx | p | qat_p5xxx | p5xxx | 18a0 | 1 | 18a1 | 128 | + +-----+-----+-----+-----+----------+---------------+---------------+------------+--------+------+--------+--------+ + +* Note: Symmetric mixed crypto algorithms feature on Gen 2 works only with 01.org driver version 4.9.0+ + +The first 3 columns indicate the service: + +* S = Symmetric crypto service (via cryptodev API) +* A = Asymmetric crypto service (via cryptodev API) +* C = Compression service (via compressdev API) + +The ``Driver`` column indicates either the Linux kernel version in which +support for this device was introduced or a driver available on Intel's 01.org +website. There are both linux in-tree and 01.org kernel drivers available for some +devices. p = release pending. + +If you are running on a kernel which includes a driver for your device, see +`Installation using kernel.org driver`_ below. Otherwise see +`Installation using 01.org QAT driver`_. + + +Installation using kernel.org driver +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The examples below are based on the C62x device, if you have a different device +use the corresponding values in the above table. + +In BIOS ensure that SRIOV is enabled and either: + +* Disable VT-d or +* Enable VT-d and set ``"intel_iommu=on iommu=pt"`` in the grub file. + +Check that the QAT driver is loaded on your system, by executing:: + + lsmod | grep qa + +You should see the kernel module for your device listed, e.g.:: + + qat_c62x 5626 0 + intel_qat 82336 1 qat_c62x + +Next, you need to expose the Virtual Functions (VFs) using the sysfs file system. + +First find the BDFs (Bus-Device-Function) of the physical functions (PFs) of +your device, e.g.:: + + lspci -d:37c8 + +You should see output similar to:: + + 1a:00.0 Co-processor: Intel Corporation Device 37c8 + 3d:00.0 Co-processor: Intel Corporation Device 37c8 + 3f:00.0 Co-processor: Intel Corporation Device 37c8 + +Enable the VFs for each PF by echoing the number of VFs per PF to the pci driver:: + + echo 16 > /sys/bus/pci/drivers/c6xx/0000:1a:00.0/sriov_numvfs + echo 16 > /sys/bus/pci/drivers/c6xx/0000:3d:00.0/sriov_numvfs + echo 16 > /sys/bus/pci/drivers/c6xx/0000:3f:00.0/sriov_numvfs + +Check that the VFs are available for use. For example ``lspci -d:37c9`` should +list 48 VF devices available for a ``C62x`` device. + +To complete the installation follow the instructions in +`Binding the available VFs to the DPDK UIO driver`_. + +.. Note:: + + If the QAT kernel modules are not loaded and you see an error like ``Failed + to load MMP firmware qat_895xcc_mmp.bin`` in kernel logs, this may be as a + result of not using a distribution, but just updating the kernel directly. + + Download firmware from the `kernel firmware repo + `_. + + Copy qat binaries to ``/lib/firmware``:: + + cp qat_895xcc.bin /lib/firmware + cp qat_895xcc_mmp.bin /lib/firmware + + Change to your linux source root directory and start the qat kernel modules:: + + insmod ./drivers/crypto/qat/qat_common/intel_qat.ko + insmod ./drivers/crypto/qat/qat_dh895xcc/qat_dh895xcc.ko + + +.. Note:: + + If you see the following warning in ``/var/log/messages`` it can be ignored: + ``IOMMU should be enabled for SR-IOV to work correctly``. + + +Installation using 01.org QAT driver +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Download the latest QuickAssist Technology Driver from `01.org +`_. +Consult the *Getting Started Guide* at the same URL for further information. + +The steps below assume you are: + +* Building on a platform with one ``C62x`` device. +* Using package ``qat1.7.l.4.2.0-000xx.tar.gz``. +* On Fedora26 kernel ``4.11.11-300.fc26.x86_64``. + +In the BIOS ensure that SRIOV is enabled and VT-d is disabled. + +Uninstall any existing QAT driver, for example by running: + +* ``./installer.sh uninstall`` in the directory where originally installed. + + +Build and install the SRIOV-enabled QAT driver:: + + mkdir /QAT + cd /QAT + + # Copy the package to this location and unpack + tar zxof qat1.7.l.4.2.0-000xx.tar.gz + + ./configure --enable-icp-sriov=host + make install + +You can use ``cat /sys/kernel/debug/qat/version/fw`` to confirm the driver is correctly installed and is using firmware version 4.2.0. +You can use ``lspci -d:37c9`` to confirm the presence of the 16 VF devices available per ``C62x`` PF. + +Confirm the driver is correctly installed and is using firmware version 4.2.0:: + + cat /sys/kernel/debug/qat/version/fw + + +Confirm the presence of 48 VF devices - 16 per PF:: + + lspci -d:37c9 + + +To complete the installation - follow instructions in `Binding the available VFs to the DPDK UIO driver`_. + +.. Note:: + + If using a later kernel and the build fails with an error relating to + ``strict_stroul`` not being available apply the following patch: + + .. code-block:: diff + + /QAT/QAT1.6/quickassist/utilities/downloader/Target_CoreLibs/uclo/include/linux/uclo_platform.h + + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,5) + + #define STR_TO_64(str, base, num, endPtr) {endPtr=NULL; if (kstrtoul((str), (base), (num))) printk("Error strtoull convert %s\n", str); } + + #else + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38) + #define STR_TO_64(str, base, num, endPtr) {endPtr=NULL; if (strict_strtoull((str), (base), (num))) printk("Error strtoull convert %s\n", str); } + #else + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) + #define STR_TO_64(str, base, num, endPtr) {endPtr=NULL; strict_strtoll((str), (base), (num));} + #else + #define STR_TO_64(str, base, num, endPtr) \ + do { \ + if (str[0] == '-') \ + { \ + *(num) = -(simple_strtoull((str+1), &(endPtr), (base))); \ + }else { \ + *(num) = simple_strtoull((str), &(endPtr), (base)); \ + } \ + } while(0) + + #endif + #endif + #endif + + +.. Note:: + + If the build fails due to missing header files you may need to do following:: + + sudo yum install zlib-devel + sudo yum install openssl-devel + sudo yum install libudev-devel + +.. Note:: + + If the build or install fails due to mismatching kernel sources you may need to do the following:: + + sudo yum install kernel-headers-`uname -r` + sudo yum install kernel-src-`uname -r` + sudo yum install kernel-devel-`uname -r` + + +Binding the available VFs to the DPDK UIO driver +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Unbind the VFs from the stock driver so they can be bound to the uio driver. + +For an Intel(R) QuickAssist Technology DH895xCC device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The unbind command below assumes ``BDFs`` of ``03:01.00-03:04.07``, if your +VFs are different adjust the unbind command below:: + + for device in $(seq 1 4); do \ + for fn in $(seq 0 7); do \ + echo -n 0000:03:0${device}.${fn} > \ + /sys/bus/pci/devices/0000\:03\:0${device}.${fn}/driver/unbind; \ + done; \ + done + +For an Intel(R) QuickAssist Technology C62x device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The unbind command below assumes ``BDFs`` of ``1a:01.00-1a:02.07``, +``3d:01.00-3d:02.07`` and ``3f:01.00-3f:02.07``, if your VFs are different +adjust the unbind command below:: + + for device in $(seq 1 2); do \ + for fn in $(seq 0 7); do \ + echo -n 0000:1a:0${device}.${fn} > \ + /sys/bus/pci/devices/0000\:1a\:0${device}.${fn}/driver/unbind; \ + + echo -n 0000:3d:0${device}.${fn} > \ + /sys/bus/pci/devices/0000\:3d\:0${device}.${fn}/driver/unbind; \ + + echo -n 0000:3f:0${device}.${fn} > \ + /sys/bus/pci/devices/0000\:3f\:0${device}.${fn}/driver/unbind; \ + done; \ + done + +For Intel(R) QuickAssist Technology C3xxx or D15xx device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The unbind command below assumes ``BDFs`` of ``01:01.00-01:02.07``, if your +VFs are different adjust the unbind command below:: + + for device in $(seq 1 2); do \ + for fn in $(seq 0 7); do \ + echo -n 0000:01:0${device}.${fn} > \ + /sys/bus/pci/devices/0000\:01\:0${device}.${fn}/driver/unbind; \ + done; \ + done + +Bind to the DPDK uio driver +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Install the DPDK igb_uio driver, bind the VF PCI Device id to it and use lspci +to confirm the VF devices are now in use by igb_uio kernel driver, +e.g. for the C62x device:: + + cd to the top-level DPDK directory + modprobe uio + insmod ./build/kmod/igb_uio.ko + echo "8086 37c9" > /sys/bus/pci/drivers/igb_uio/new_id + lspci -vvd:37c9 + + +Another way to bind the VFs to the DPDK UIO driver is by using the +``dpdk-devbind.py`` script:: + + cd to the top-level DPDK directory + ./usertools/dpdk-devbind.py -b igb_uio 0000:03:01.1 + +Testing +~~~~~~~ + +QAT SYM crypto PMD can be tested by running the test application:: + + make defconfig + make -j + cd ./build/app + ./test -l1 -n1 -w + RTE>>cryptodev_qat_autotest + +QAT ASYM crypto PMD can be tested by running the test application:: + + make defconfig + make -j + cd ./build/app + ./test -l1 -n1 -w + RTE>>cryptodev_qat_asym_autotest + +QAT compression PMD can be tested by running the test application:: + + make defconfig + sed -i 's,\(CONFIG_RTE_COMPRESSDEV_TEST\)=n,\1=y,' build/.config + make -j + cd ./build/app + ./test -l1 -n1 -w + RTE>>compressdev_autotest + + +Debugging +~~~~~~~~~ + +There are 2 sets of trace available via the dynamic logging feature: + +* pmd.qat_dp exposes trace on the data-path. +* pmd.qat_general exposes all other trace. + +pmd.qat exposes both sets of traces. +They can be enabled using the log-level option (where 8=maximum log level) on +the process cmdline, e.g. using any of the following:: + + --log-level="pmd.qat_general,8" + --log-level="pmd.qat_dp,8" + --log-level="pmd.qat,8" + +.. Note:: + + The global RTE_LOG_DP_LEVEL overrides data-path trace so must be set to + RTE_LOG_DEBUG to see all the trace. This variable is in config/rte_config.h + for meson build and config/common_base for gnu make. + Also the dynamic global log level overrides both sets of trace, so e.g. no + QAT trace would display in this case:: + + --log-level="7" --log-level="pmd.qat_general,8" diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/scheduler.rst b/src/spdk/dpdk/doc/guides/cryptodevs/scheduler.rst new file mode 100644 index 000000000..7004ca431 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/scheduler.rst @@ -0,0 +1,182 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +Cryptodev Scheduler Poll Mode Driver Library +============================================ + +Scheduler PMD is a software crypto PMD, which has the capabilities of +attaching hardware and/or software cryptodevs, and distributes ingress +crypto ops among them in a certain manner. + +.. figure:: img/scheduler-overview.* + + Cryptodev Scheduler Overview + + +The Cryptodev Scheduler PMD library (**librte_pmd_crypto_scheduler**) acts as +a software crypto PMD and shares the same API provided by librte_cryptodev. +The PMD supports attaching multiple crypto PMDs, software or hardware, as +slaves, and distributes the crypto workload to them with certain behavior. +The behaviors are categorizes as different "modes". Basically, a scheduling +mode defines certain actions for scheduling crypto ops to its slaves. + +The librte_pmd_crypto_scheduler library exports a C API which provides an API +for attaching/detaching slaves, set/get scheduling modes, and enable/disable +crypto ops reordering. + +Limitations +----------- + +* Sessionless crypto operation is not supported +* OOP crypto operation is not supported when the crypto op reordering feature + is enabled. + + +Installation +------------ + +To build DPDK with CRYTPO_SCHEDULER_PMD the user is required to set +CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER=y in config/common_base, and +recompile DPDK + + +Initialization +-------------- + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_scheduler") within the application. + +* Use --vdev="crypto_scheduler" in the EAL options, which will call + rte_vdev_init() internally. + + +The following parameters (all optional) can be provided in the previous +two calls: + +* socket_id: Specify the socket where the memory for the device is going + to be allocated (by default, socket_id will be the socket where the core + that is creating the PMD is running on). + +* max_nb_sessions: Specify the maximum number of sessions that can be + created. This value may be overwritten internally if there are too + many devices are attached. + +* slave: If a cryptodev has been initialized with specific name, it can be + attached to the scheduler using this parameter, simply filling the name + here. Multiple cryptodevs can be attached initially by presenting this + parameter multiple times. + +* mode: Specify the scheduling mode of the PMD. The supported scheduling + mode parameter values are specified in the "Cryptodev Scheduler Modes + Overview" section. + +* mode_param: Specify the mode-specific parameter. Some scheduling modes + may be initialized with specific parameters other than the default ones, + such as the **threshold** packet size of **packet-size-distr** mode. This + parameter fulfills the purpose. + +* ordering: Specify the status of the crypto operations ordering feature. + The value of this parameter can be "enable" or "disable". This feature + is disabled by default. + +Example: + +.. code-block:: console + + ... --vdev "crypto_aesni_mb0,name=aesni_mb_1" --vdev "crypto_aesni_mb1,name=aesni_mb_2" --vdev "crypto_scheduler,slave=aesni_mb_1,slave=aesni_mb_2" ... + +.. note:: + + * The scheduler cryptodev cannot be started unless the scheduling mode + is set and at least one slave is attached. Also, to configure the + scheduler in the run-time, like attach/detach slave(s), change + scheduling mode, or enable/disable crypto op ordering, one should stop + the scheduler first, otherwise an error will be returned. + + * The crypto op reordering feature requires using the userdata field of + every mbuf to be processed to store temporary data. By the end of + processing, the field is set to pointing to NULL, any previously + stored value of this field will be lost. + + +Cryptodev Scheduler Modes Overview +---------------------------------- + +Currently the Crypto Scheduler PMD library supports following modes of +operation: + +* **CDEV_SCHED_MODE_ROUNDROBIN:** + + *Initialization mode parameter*: **round-robin** + + Round-robin mode, which distributes the enqueued burst of crypto ops + among its slaves in a round-robin manner. This mode may help to fill + the throughput gap between the physical core and the existing cryptodevs + to increase the overall performance. + +* **CDEV_SCHED_MODE_PKT_SIZE_DISTR:** + + *Initialization mode parameter*: **packet-size-distr** + + Packet-size based distribution mode, which works with 2 slaves, the primary + slave and the secondary slave, and distributes the enqueued crypto + operations to them based on their data lengths. A crypto operation will be + distributed to the primary slave if its data length is equal to or bigger + than the designated threshold, otherwise it will be handled by the secondary + slave. + + A typical usecase in this mode is with the QAT cryptodev as the primary and + a software cryptodev as the secondary slave. This may help applications to + process additional crypto workload than what the QAT cryptodev can handle on + its own, by making use of the available CPU cycles to deal with smaller + crypto workloads. + + The threshold is set to 128 bytes by default. It can be updated by calling + function **rte_cryptodev_scheduler_option_set**. The parameter of + **option_type** must be **CDEV_SCHED_OPTION_THRESHOLD** and **option** should + point to a rte_cryptodev_scheduler_threshold_option structure filled with + appropriate threshold value. Please NOTE this threshold has be a power-of-2 + unsigned integer. It is possible to use **mode_param** initialization + parameter to achieve the same purpose. For example: + + ... --vdev "crypto_scheduler,mode=packet-size-distr,mode_param=threshold:512" ... + + The above parameter will overwrite the threshold value to 512. + +* **CDEV_SCHED_MODE_FAILOVER:** + + *Initialization mode parameter*: **fail-over** + + Fail-over mode, which works with 2 slaves, the primary slave and the + secondary slave. In this mode, the scheduler will enqueue the incoming + crypto operation burst to the primary slave. When one or more crypto + operations fail to be enqueued, then they will be enqueued to the secondary + slave. + +* **CDEV_SCHED_MODE_MULTICORE:** + + *Initialization mode parameter*: **multi-core** + + Multi-core mode, which distributes the workload with several (up to eight) + worker cores. The enqueued bursts are distributed among the worker cores in a + round-robin manner. If scheduler cannot enqueue entire burst to the same worker, + it will enqueue the remaining operations to the next available worker. + For pure small packet size (64 bytes) traffic however the multi-core mode is not + an optimal solution, as it doesn't give significant per-core performance improvement. + For mixed traffic (IMIX) the optimal number of worker cores is around 2-3. + For large packets (1.5 kbytes) scheduler shows linear scaling in performance + up to eight cores. + Each worker uses its own slave cryptodev. Only software cryptodevs + are supported. Only the same type of cryptodevs should be used concurrently. + + The multi-core mode uses one extra parameter: + + * corelist: Semicolon-separated list of logical cores to be used as workers. + The number of worker cores should be equal to the number of slave cryptodevs. + These cores should be present in EAL core list parameter and + should not be used by the application or any other process. + + Example: + ... --vdev "crypto_aesni_mb1,name=aesni_mb_1" --vdev "crypto_aesni_mb_pmd2,name=aesni_mb_2" \ + --vdev "crypto_scheduler,slave=aesni_mb_1,slave=aesni_mb_2,mode=multi-core,corelist=23;24" ... diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/snow3g.rst b/src/spdk/dpdk/doc/guides/cryptodevs/snow3g.rst new file mode 100644 index 000000000..e0cddc2d7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/snow3g.rst @@ -0,0 +1,119 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016-2019 Intel Corporation. + +SNOW 3G Crypto Poll Mode Driver +=============================== + +The SNOW3G PMD (**librte_snow3g_zuc**) provides poll mode crypto driver support for +utilizing `Intel IPSec Multi-buffer library `_ +which implements F8 and F8 functions for SNOW 3G UEA2 cipher and UIA2 hash algorithms. + +Features +-------- + +SNOW 3G PMD has support for: + +Cipher algorithm: + +* RTE_CRYPTO_CIPHER_SNOW3G_UEA2 + +Authentication algorithm: + +* RTE_CRYPTO_AUTH_SNOW3G_UIA2 + +Limitations +----------- + +* Chained mbufs are not supported. +* SNOW 3G (UIA2) supported only if hash offset field is byte-aligned. +* In-place bit-level operations for SNOW 3G (UEA2) are not supported + (if length and/or offset of data to be ciphered is not byte-aligned). + +Installation +------------ + +To build DPDK with the SNOW3G_PMD the user is required to download the multi-buffer +library from `here `_ +and compile it on their user system before building DPDK. +The latest version of the library supported by this PMD is v0.54, which +can be downloaded from ``_. + +After downloading the library, the user needs to unpack and compile it +on their system before building DPDK: + +.. code-block:: console + + make + make install + +The library requires NASM to be built. Depending on the library version, it might +require a minimum NASM version (e.g. v0.54 requires at least NASM 2.14). + +NASM is packaged for different OS. However, on some OS the version is too old, +so a manual installation is required. In that case, NASM can be downloaded from +`NASM website `_. +Once it is downloaded, extract it and follow these steps: + +.. code-block:: console + + ./configure + make + make install + +.. note:: + + Compilation of the Multi-Buffer library is broken when GCC < 5.0, if library <= v0.53. + If a lower GCC version than 5.0, the workaround proposed by the following link + should be used: ``_. + +As a reference, the following table shows a mapping between the past DPDK versions +and the external crypto libraries supported by them: + +.. _table_snow3g_versions: + +.. table:: DPDK and external crypto library version compatibility + + ============= ================================ + DPDK version Crypto library version + ============= ================================ + 16.04 - 19.11 LibSSO SNOW3G + 20.02+ Multi-buffer library 0.53 - 0.54 + ============= ================================ + + +Initialization +-------------- + +In order to enable this virtual crypto PMD, user must: + +* Build the multi buffer library (explained in Installation section). + +* Build DPDK as follows: + +.. code-block:: console + + make config T=x86_64-native-linux-gcc + sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_SNOW3G\)=n,\1=y,' build/.config + make + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_snow3g") within the application. + +* Use --vdev="crypto_snow3g" in the EAL options, which will call rte_vdev_init() internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* socket_id: Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). + +* max_nb_queue_pairs: Specify the maximum number of queue pairs in the device (8 by default). + +* max_nb_sessions: Specify the maximum number of sessions that can be created (2048 by default). + +Example: + +.. code-block:: console + + ./l2fwd-crypto -l 1 -n 4 --vdev="crypto_snow3g,socket_id=0,max_nb_sessions=128" \ + -- -p 1 --cdev SW --chain CIPHER_ONLY --cipher_algo "snow3g-uea2" diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/virtio.rst b/src/spdk/dpdk/doc/guides/cryptodevs/virtio.rst new file mode 100644 index 000000000..1496ec920 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/virtio.rst @@ -0,0 +1,117 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 HUAWEI TECHNOLOGIES CO., LTD. + +Virtio Crypto Poll Mode Driver +============================== + +The virtio crypto PMD provides poll mode driver support for the virtio crypto +device. + +Features +-------- + +The virtio crypto PMD has support for: + +Cipher algorithms: + +* ``RTE_CRYPTO_CIPHER_AES_CBC`` + +Hash algorithms: + +* ``RTE_CRYPTO_AUTH_SHA1_HMAC`` + +Limitations +----------- + +* Only supports the session-oriented API implementation (session-less APIs are + not supported). +* Only supports modern mode since virtio crypto conforms to virtio-1.0. +* Only has two types of queues: data queue and control queue. These two queues + only support indirect buffers to communication with the virtio backend. +* Only supports AES_CBC cipher only algorithm and AES_CBC with HMAC_SHA1 + chaining algorithm since the vhost crypto backend only these algorithms + are supported. +* Does not support Link State interrupt. +* Does not support runtime configuration. + +Virtio crypto PMD Rx/Tx Callbacks +--------------------------------- + +Rx callbacks: + +* ``virtio_crypto_pkt_rx_burst`` + +Tx callbacks: + +* ``virtio_crypto_pkt_tx_burst`` + +Installation +------------ + +Quick instructions are as follows: + +Firstly run DPDK vhost crypto sample as a server side and build QEMU with +vhost crypto enabled. +QEMU can then be started using the following parameters: + +.. code-block:: console + + qemu-system-x86_64 \ + [...] \ + -chardev socket,id=charcrypto0,path=/path/to/your/socket \ + -object cryptodev-vhost-user,id=cryptodev0,chardev=charcrypto0 \ + -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 + [...] + +Secondly bind the uio_generic driver for the virtio-crypto device. +For example, 0000:00:04.0 is the domain, bus, device and function +number of the virtio-crypto device: + +.. code-block:: console + + modprobe uio_pci_generic + echo -n 0000:00:04.0 > /sys/bus/pci/drivers/virtio-pci/unbind + echo "1af4 1054" > /sys/bus/pci/drivers/uio_pci_generic/new_id + +Finally the front-end virtio crypto PMD driver can be installed: + +.. code-block:: console + + cd to the top-level DPDK directory + sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_VIRTIO_CRYPTO\)=n,\1=y,' config/common_base + make config T=x86_64-native-linux-gcc + make install T=x86_64-native-linux-gcc + +Tests +----- + +The unit test cases can be tested as below: + +.. code-block:: console + + reserve enough huge pages + cd to the top-level DPDK directory + export RTE_TARGET=x86_64-native-linux-gcc + export RTE_SDK=`pwd` + cd to app/test + type the command "make" to compile + run the tests with "./test" + type the command "cryptodev_virtio_autotest" to test + +The performance can be tested as below: + +.. code-block:: console + + reserve enough huge pages + cd to the top-level DPDK directory + export RTE_TARGET=x86_64-native-linux-gcc + export RTE_SDK=`pwd` + cd to app/test-crypto-perf + type the command "make" to compile + run the tests with the following command: + + ./dpdk-test-crypto-perf -l 0,1 -- --devtype crypto_virtio \ + --ptest throughput --optype cipher-then-auth --cipher-algo aes-cbc \ + --cipher-op encrypt --cipher-key-sz 16 --auth-algo sha1-hmac \ + --auth-op generate --auth-key-sz 64 --digest-sz 12 \ + --total-ops 100000000 --burst-sz 64 --buffer-sz 2048 diff --git a/src/spdk/dpdk/doc/guides/cryptodevs/zuc.rst b/src/spdk/dpdk/doc/guides/cryptodevs/zuc.rst new file mode 100644 index 000000000..9b51ba141 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/cryptodevs/zuc.rst @@ -0,0 +1,119 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016-2019 Intel Corporation. + +ZUC Crypto Poll Mode Driver +=========================== + +The ZUC PMD (**librte_pmd_zuc**) provides poll mode crypto driver support for +utilizing `Intel IPSec Multi-buffer library `_ +which implements F8 and F9 functions for ZUC EEA3 cipher and EIA3 hash algorithms. + +Features +-------- + +ZUC PMD has support for: + +Cipher algorithm: + +* RTE_CRYPTO_CIPHER_ZUC_EEA3 + +Authentication algorithm: + +* RTE_CRYPTO_AUTH_ZUC_EIA3 + +Limitations +----------- + +* Chained mbufs are not supported. +* ZUC (EIA3) supported only if hash offset field is byte-aligned. +* ZUC (EEA3) supported only if cipher length, cipher offset fields are byte-aligned. + + +Installation +------------ + +To build DPDK with the ZUC_PMD the user is required to download the multi-buffer +library from `here `_ +and compile it on their user system before building DPDK. +The latest version of the library supported by this PMD is v0.54, which +can be downloaded from ``_. + +After downloading the library, the user needs to unpack and compile it +on their system before building DPDK: + +.. code-block:: console + + make + make install + +The library requires NASM to be built. Depending on the library version, it might +require a minimum NASM version (e.g. v0.54 requires at least NASM 2.14). + +NASM is packaged for different OS. However, on some OS the version is too old, +so a manual installation is required. In that case, NASM can be downloaded from +`NASM website `_. +Once it is downloaded, extract it and follow these steps: + +.. code-block:: console + + ./configure + make + make install + +.. note:: + + Compilation of the Multi-Buffer library is broken when GCC < 5.0, if library <= v0.53. + If a lower GCC version than 5.0, the workaround proposed by the following link + should be used: ``_. + +As a reference, the following table shows a mapping between the past DPDK versions +and the external crypto libraries supported by them: + +.. _table_zuc_versions: + +.. table:: DPDK and external crypto library version compatibility + + ============= ================================ + DPDK version Crypto library version + ============= ================================ + 16.11 - 19.11 LibSSO ZUC + 20.02+ Multi-buffer library 0.53 - 0.54 + ============= ================================ + + +Initialization +-------------- + +In order to enable this virtual crypto PMD, user must: + +* Build the multi buffer library (explained in Installation section). + +* Build DPDK as follows: + +.. code-block:: console + + make config T=x86_64-native-linux-gcc + sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_ZUC\)=n,\1=y,' build/.config + make + +To use the PMD in an application, user must: + +* Call rte_vdev_init("crypto_zuc") within the application. + +* Use --vdev="crypto_zuc" in the EAL options, which will call rte_vdev_init() internally. + +The following parameters (all optional) can be provided in the previous two calls: + +* socket_id: Specify the socket where the memory for the device is going to be allocated + (by default, socket_id will be the socket where the core that is creating the PMD is running on). + +* max_nb_queue_pairs: Specify the maximum number of queue pairs in the device (8 by default). + +* max_nb_sessions: Specify the maximum number of sessions that can be created (2048 by default). + +Example: + +.. code-block:: console + + ./l2fwd-crypto -l 1 -n 4 --vdev="crypto_zuc,socket_id=0,max_nb_sessions=128" \ + -- -p 1 --cdev SW --chain CIPHER_ONLY --cipher_algo "zuc-eea3" diff --git a/src/spdk/dpdk/doc/guides/custom.css b/src/spdk/dpdk/doc/guides/custom.css new file mode 100644 index 000000000..c87c60611 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/custom.css @@ -0,0 +1,7 @@ +/* Override readthedocs theme */ + +/* Spacing before a list item must be bigger than spacing inside the item. + * Complex list items start with a p.first element. */ +.section li > .first { + margin-top: 18px; +} diff --git a/src/spdk/dpdk/doc/guides/eventdevs/dpaa.rst b/src/spdk/dpdk/doc/guides/eventdevs/dpaa.rst new file mode 100644 index 000000000..be68c6da6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/dpaa.rst @@ -0,0 +1,102 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2017 NXP + +NXP DPAA Eventdev Driver +========================= + +The dpaa eventdev is an implementation of the eventdev API, that provides a +wide range of the eventdev features. The eventdev relies on a dpaa based +platform to perform event scheduling. + +More information can be found at `NXP Official Website +`_. + +Features +-------- + +The DPAA EVENTDEV implements many features in the eventdev API; + +- Hardware based event scheduler +- 4 event ports +- 4 event queues +- Parallel flows +- Atomic flows + +Supported DPAA SoCs +-------------------- + +- LS1046A/LS1026A +- LS1043A/LS1023A + +Prerequisites +------------- + +See :doc:`../platform/dpaa` for setup information + +Currently supported by DPDK: + +- NXP SDK **2.0+** or LSDK **18.09+** +- Supported architectures: **arm64 LE**. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV`` (default ``y``) + + Toggle compilation of the ``librte_pmd_dpaa_event`` driver. + +Driver Compilation +~~~~~~~~~~~~~~~~~~ + +To compile the DPAA EVENTDEV PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-dpaa-linux-gcc install + +Initialization +-------------- + +The dpaa eventdev is exposed as a vdev device which consists of a set of channels +and queues. On EAL initialization, dpaa components will be +probed and then vdev device can be created from the application code by + +* Invoking ``rte_vdev_init("event_dpaa1")`` from the application + +* Using ``--vdev="event_dpaa1"`` in the EAL options, which will call + rte_vdev_init() internally + +Example: + +.. code-block:: console + + ./your_eventdev_application --vdev="event_dpaa1" + +* Use dev arg option ``disable_intr=1`` to disable the interrupt mode + +Limitations +----------- + +1. DPAA eventdev can not work with DPAA PUSH mode queues configured for ethdev. + Please configure export DPAA_NUM_PUSH_QUEUES=0 + +Platform Requirement +~~~~~~~~~~~~~~~~~~~~ + +DPAA drivers for DPDK can only work on NXP SoCs as listed in the +``Supported DPAA SoCs``. + +Port-core Binding +~~~~~~~~~~~~~~~~~ + +DPAA EVENTDEV driver requires event port 'x' to be used on core 'x'. diff --git a/src/spdk/dpdk/doc/guides/eventdevs/dpaa2.rst b/src/spdk/dpdk/doc/guides/eventdevs/dpaa2.rst new file mode 100644 index 000000000..835767f98 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/dpaa2.rst @@ -0,0 +1,118 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2017 NXP + + +NXP DPAA2 Eventdev Driver +========================= + +The dpaa2 eventdev is an implementation of the eventdev API, that provides a +wide range of the eventdev features. The eventdev relies on a dpaa2 hw to +perform event scheduling. + +More information can be found at `NXP Official Website +`_. + +Features +-------- + +The DPAA2 EVENTDEV implements many features in the eventdev API; + +- Hardware based event scheduler +- 8 event ports +- 8 event queues +- Parallel flows +- Atomic flows + +Supported DPAA2 SoCs +-------------------- + +- LX2160A +- LS2084A/LS2044A +- LS2088A/LS2048A +- LS1088A/LS1048A + +Prerequisites +------------- + +See :doc:`../platform/dpaa2` for setup information + +Currently supported by DPDK: + +- NXP SDK **19.09+**. +- MC Firmware version **10.18.0** and higher. +- Supported architectures: **arm64 LE**. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +.. note:: + + Some part of fslmc bus code (mc flib - object library) routines are + dual licensed (BSD & GPLv2). + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV`` (default ``y``) + + Toggle compilation of the ``lrte_pmd_dpaa2_event`` driver. + +Driver Compilation +~~~~~~~~~~~~~~~~~~ + +To compile the DPAA2 EVENTDEV PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-dpaa-linux-gcc install + +Initialization +-------------- + +The dpaa2 eventdev is exposed as a vdev device which consists of a set of dpcon +devices and dpci devices. On EAL initialization, dpcon and dpci devices will be +probed and then vdev device can be created from the application code by + +* Invoking ``rte_vdev_init("event_dpaa2")`` from the application + +* Using ``--vdev="event_dpaa2"`` in the EAL options, which will call + rte_vdev_init() internally + +Example: + +.. code-block:: console + + ./your_eventdev_application --vdev="event_dpaa2" + +Enabling logs +------------- + +For enabling logs, use the following EAL parameter: + +.. code-block:: console + + ./your_eventdev_application --log-level=pmd.event.dpaa2, + +Using ``eventdev.dpaa2`` as log matching criteria, all Event PMD logs can be +enabled which are lower than logging ``level``. + +Limitations +----------- + +Platform Requirement +~~~~~~~~~~~~~~~~~~~~ + +DPAA2 drivers for DPDK can only work on NXP SoCs as listed in the +``Supported DPAA2 SoCs``. + +Port-core binding +~~~~~~~~~~~~~~~~~ + +DPAA2 EVENTDEV can support only one eventport per core. diff --git a/src/spdk/dpdk/doc/guides/eventdevs/dsw.rst b/src/spdk/dpdk/doc/guides/eventdevs/dsw.rst new file mode 100644 index 000000000..6653f501c --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/dsw.rst @@ -0,0 +1,96 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Ericsson AB + +Distributed Software Eventdev Poll Mode Driver +============================================== + +The distributed software event device is an eventdev driver which +distributes the task of scheduling events among all the eventdev ports +and the lcore threads using them. + +Features +-------- + +Queues + * Atomic + * Parallel + * Single-Link + +Ports + * Load balanced (for Atomic, Ordered, Parallel queues) + * Single Link (for single-link queues) + +Configuration and Options +------------------------- + +The distributed software eventdev is a vdev device, and as such can be +created from the application code, or from the EAL command line: + +* Call ``rte_vdev_init("event_dsw0")`` from the application + +* Use ``--vdev="event_dsw0"`` in the EAL options, which will call + rte_vdev_init() internally + +Example: + +.. code-block:: console + + ./your_eventdev_application --vdev="event_dsw0" + +Limitations +----------- + +Unattended Ports +~~~~~~~~~~~~~~~~ + +The distributed software eventdev uses an internal signaling schema +between the ports to achieve load balancing. In order for this to +work, the application must perform enqueue and/or dequeue operations +on all ports. + +Producer-only ports which currently have no events to enqueue should +periodically call rte_event_enqueue_burst() with a zero-sized burst. + +Ports left unattended for longer periods of time will prevent load +balancing, and also cause traffic interruptions on the flows which +are in the process of being migrated. + +Output Buffering +~~~~~~~~~~~~~~~~ + +For efficiency reasons, the distributed software eventdev might not +send enqueued events immediately to the destination port, but instead +store them in an internal buffer in the source port. + +In case no more events are enqueued on a port with buffered events, +these events will be sent after the application has performed a number +of enqueue and/or dequeue operations. + +For explicit flushing, an application may call +rte_event_enqueue_burst() with a zero-sized burst. + + +Priorities +~~~~~~~~~~ + +The distributed software eventdev does not support event priorities. + +Ordered Queues +~~~~~~~~~~~~~~ + +The distributed software eventdev does not support the ordered queue type. + + +"All Types" Queues +~~~~~~~~~~~~~~~~~~ + +The distributed software eventdev does not support queues of type +RTE_EVENT_QUEUE_CFG_ALL_TYPES, which allow both atomic, ordered, and +parallel events on the same queue. + +Dynamic Link/Unlink +~~~~~~~~~~~~~~~~~~~ + +The distributed software eventdev does not support calls to +rte_event_port_link() or rte_event_port_unlink() after +rte_event_dev_start() has been called. diff --git a/src/spdk/dpdk/doc/guides/eventdevs/index.rst b/src/spdk/dpdk/doc/guides/eventdevs/index.rst new file mode 100644 index 000000000..bb66a5eac --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/index.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +Event Device Drivers +==================== + +The following are a list of event device PMDs, which can be used from an +application through the eventdev API. + +.. toctree:: + :maxdepth: 2 + :numbered: + + dpaa + dpaa2 + dsw + sw + octeontx + octeontx2 + opdl diff --git a/src/spdk/dpdk/doc/guides/eventdevs/octeontx.rst b/src/spdk/dpdk/doc/guides/eventdevs/octeontx.rst new file mode 100644 index 000000000..9a3646db0 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/octeontx.rst @@ -0,0 +1,148 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Cavium, Inc + +OCTEON TX SSOVF Eventdev Driver +=============================== + +The OCTEON TX SSOVF PMD (**librte_pmd_octeontx_ssovf**) provides poll mode +eventdev driver support for the inbuilt event device found in the **Cavium OCTEON TX** +SoC family as well as their virtual functions (VF) in SR-IOV context. + +More information can be found at `Cavium, Inc Official Website +`_. + +Features +-------- + +Features of the OCTEON TX SSOVF PMD are: + +- 64 Event queues +- 32 Event ports +- HW event scheduler +- Supports 1M flows per event queue +- Flow based event pipelining +- Flow pinning support in flow based event pipelining +- Queue based event pipelining +- Supports ATOMIC, ORDERED, PARALLEL schedule types per flow +- Event scheduling QoS based on event queue priority +- Open system with configurable amount of outstanding events +- HW accelerated dequeue timeout support to enable power management +- SR-IOV VF +- HW managed event timers support through TIMVF, with high precision and + time granularity of 1us. +- Up to 64 event timer adapters. + +Supported OCTEON TX SoCs +------------------------ +- CN83xx + +Prerequisites +------------- + +See :doc:`../platform/octeontx` for setup information. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF`` (default ``y``) + + Toggle compilation of the ``librte_pmd_octeontx_ssovf`` driver. + +Driver Compilation +~~~~~~~~~~~~~~~~~~ + +To compile the OCTEON TX SSOVF PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-thunderx-linux-gcc install + + +Initialization +-------------- + +The OCTEON TX eventdev is exposed as a vdev device which consists of a set +of SSO group and work-slot PCIe VF devices. On EAL initialization, +SSO PCIe VF devices will be probed and then the vdev device can be created +from the application code, or from the EAL command line based on +the number of probed/bound SSO PCIe VF device to DPDK by + +* Invoking ``rte_vdev_init("event_octeontx")`` from the application + +* Using ``--vdev="event_octeontx"`` in the EAL options, which will call + rte_vdev_init() internally + +Example: + +.. code-block:: console + + ./your_eventdev_application --vdev="event_octeontx" + + +Selftest +-------- + +The functionality of OCTEON TX eventdev can be verified using this option, +various unit and functional tests are run to verify the sanity. +The tests are run once the vdev creation is successfully complete. + +.. code-block:: console + + --vdev="event_octeontx,selftest=1" + + +Enable TIMvf stats +------------------ +TIMvf stats can be enabled by using this option, by default the stats are +disabled. + +.. code-block:: console + + --vdev="event_octeontx,timvf_stats=1" + + +Limitations +----------- + +Burst mode support +~~~~~~~~~~~~~~~~~~ + +Burst mode is not supported. Dequeue and Enqueue functions accepts only single +event at a time. + +Rx adapter support +~~~~~~~~~~~~~~~~~~ + +When eth_octeontx is used as Rx adapter event schedule type +``RTE_SCHED_TYPE_PARALLEL`` is not supported. + +Event timer adapter support +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When timvf is used as Event timer adapter the clock source mapping is as +follows: + +.. code-block:: console + + RTE_EVENT_TIMER_ADAPTER_CPU_CLK = TIM_CLK_SRC_SCLK + RTE_EVENT_TIMER_ADAPTER_EXT_CLK0 = TIM_CLK_SRC_GPIO + RTE_EVENT_TIMER_ADAPTER_EXT_CLK1 = TIM_CLK_SRC_GTI + RTE_EVENT_TIMER_ADAPTER_EXT_CLK2 = TIM_CLK_SRC_PTP + +When timvf is used as Event timer adapter event schedule type +``RTE_SCHED_TYPE_PARALLEL`` is not supported. + +Max mempool size +~~~~~~~~~~~~~~~~ + +Max mempool size when using OCTEON TX Eventdev (SSO) should be limited to 128K. +When running dpdk-test-eventdev on OCTEON TX the application can limit the +number of mbufs by using the option ``--pool_sz 131072`` diff --git a/src/spdk/dpdk/doc/guides/eventdevs/octeontx2.rst b/src/spdk/dpdk/doc/guides/eventdevs/octeontx2.rst new file mode 100644 index 000000000..6502f6415 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/octeontx2.rst @@ -0,0 +1,174 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Marvell International Ltd. + +OCTEON TX2 SSO Eventdev Driver +=============================== + +The OCTEON TX2 SSO PMD (**librte_pmd_octeontx2_event**) provides poll mode +eventdev driver support for the inbuilt event device found in the **Marvell OCTEON TX2** +SoC family. + +More information about OCTEON TX2 SoC can be found at `Marvell Official Website +`_. + +Features +-------- + +Features of the OCTEON TX2 SSO PMD are: + +- 256 Event queues +- 26 (dual) and 52 (single) Event ports +- HW event scheduler +- Supports 1M flows per event queue +- Flow based event pipelining +- Flow pinning support in flow based event pipelining +- Queue based event pipelining +- Supports ATOMIC, ORDERED, PARALLEL schedule types per flow +- Event scheduling QoS based on event queue priority +- Open system with configurable amount of outstanding events limited only by + DRAM +- HW accelerated dequeue timeout support to enable power management +- HW managed event timers support through TIM, with high precision and + time granularity of 2.5us. +- Up to 256 TIM rings aka event timer adapters. +- Up to 8 rings traversed in parallel. +- HW managed packets enqueued from ethdev to eventdev exposed through event eth + RX adapter. +- N:1 ethernet device Rx queue to Event queue mapping. +- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE`` + capability while maintaining receive packet order. +- Full Rx/Tx offload support defined through ethdev queue config. + +Prerequisites and Compilation procedure +--------------------------------------- + + See :doc:`../platform/octeontx2` for setup information. + +Pre-Installation Configuration +------------------------------ + +Compile time Config Options +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following option can be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV`` (default ``y``) + + Toggle compilation of the ``librte_pmd_octeontx2_event`` driver. + +Runtime Config Options +~~~~~~~~~~~~~~~~~~~~~~ + +- ``Maximum number of in-flight events`` (default ``8192``) + + In **Marvell OCTEON TX2** the max number of in-flight events are only limited + by DRAM size, the ``xae_cnt`` devargs parameter is introduced to provide + upper limit for in-flight events. + For example:: + + -w 0002:0e:00.0,xae_cnt=16384 + +- ``Force legacy mode`` + + The ``single_ws`` devargs parameter is introduced to force legacy mode i.e + single workslot mode in SSO and disable the default dual workslot mode. + For example:: + + -w 0002:0e:00.0,single_ws=1 + +- ``Event Group QoS support`` + + SSO GGRPs i.e. queue uses DRAM & SRAM buffers to hold in-flight + events. By default the buffers are assigned to the SSO GGRPs to + satisfy minimum HW requirements. SSO is free to assign the remaining + buffers to GGRPs based on a preconfigured threshold. + We can control the QoS of SSO GGRP by modifying the above mentioned + thresholds. GGRPs that have higher importance can be assigned higher + thresholds than the rest. The dictionary format is as follows + [Qx-XAQ-TAQ-IAQ][Qz-XAQ-TAQ-IAQ] expressed in percentages, 0 represents + default. + For example:: + + -w 0002:0e:00.0,qos=[1-50-50-50] + +- ``Selftest`` + + The functionality of OCTEON TX2 eventdev can be verified using this option, + various unit and functional tests are run to verify the sanity. + The tests are run once the vdev creation is successfully complete. + For example:: + + -w 0002:0e:00.0,selftest=1 + +- ``TIM disable NPA`` + + By default chunks are allocated from NPA then TIM can automatically free + them when traversing the list of chunks. The ``tim_disable_npa`` devargs + parameter disables NPA and uses software mempool to manage chunks + For example:: + + -w 0002:0e:00.0,tim_disable_npa=1 + +- ``TIM modify chunk slots`` + + The ``tim_chnk_slots`` devargs can be used to modify number of chunk slots. + Chunks are used to store event timers, a chunk can be visualised as an array + where the last element points to the next chunk and rest of them are used to + store events. TIM traverses the list of chunks and enqueues the event timers + to SSO. The default value is 255 and the max value is 4095. + For example:: + + -w 0002:0e:00.0,tim_chnk_slots=1023 + +- ``TIM enable arm/cancel statistics`` + + The ``tim_stats_ena`` devargs can be used to enable arm and cancel stats of + event timer adapter. + For example:: + + -w 0002:0e:00.0,tim_stats_ena=1 + +- ``TIM limit max rings reserved`` + + The ``tim_rings_lmt`` devargs can be used to limit the max number of TIM + rings i.e. event timer adapter reserved on probe. Since, TIM rings are HW + resources we can avoid starving other applications by not grabbing all the + rings. + For example:: + + -w 0002:0e:00.0,tim_rings_lmt=5 + +- ``TIM ring control internal parameters`` + + When using multiple TIM rings the ``tim_ring_ctl`` devargs can be used to + control each TIM rings internal parameters uniquely. The following dict + format is expected [ring-chnk_slots-disable_npa-stats_ena]. 0 represents + default values. + For Example:: + + -w 0002:0e:00.0,tim_ring_ctl=[2-1023-1-0] + +- ``Lock NPA contexts in NDC`` + + Lock NPA aura and pool contexts in NDC cache. + The device args take hexadecimal bitmask where each bit represent the + corresponding aura/pool id. + + For example:: + + -w 0002:0e:00.0,npa_lock_mask=0xf + +Debugging Options +~~~~~~~~~~~~~~~~~ + +.. _table_octeontx2_event_debug_options: + +.. table:: OCTEON TX2 event device debug options + + +---+------------+-------------------------------------------------------+ + | # | Component | EAL log command | + +===+============+=======================================================+ + | 1 | SSO | --log-level='pmd\.event\.octeontx2,8' | + +---+------------+-------------------------------------------------------+ + | 2 | TIM | --log-level='pmd\.event\.octeontx2\.timer,8' | + +---+------------+-------------------------------------------------------+ diff --git a/src/spdk/dpdk/doc/guides/eventdevs/opdl.rst b/src/spdk/dpdk/doc/guides/eventdevs/opdl.rst new file mode 100644 index 000000000..cbfd1f11b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/opdl.rst @@ -0,0 +1,136 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +OPDL Eventdev Poll Mode Driver +================================== + +The OPDL (Ordered Packet Distribution Library) eventdev is a specific\ +implementation of the eventdev API. It is particularly suited to packet\ +processing workloads that have high throughput and low latency requirements.\ +All packets follow the same path through the device. The order in which\ +packets follow is determined by the order in which queues are set up.\ +Events are left on the ring until they are transmitted. As a result packets\ +do not go out of order + + +Features +-------- + +The OPDL eventdev implements a subset of features of the eventdev API; + +Queues + * Atomic + * Ordered (Parallel is supported as parallel is a subset of Ordered) + * Single-Link + +Ports + * Load balanced (for Atomic, Ordered, Parallel queues) + * Single Link (for single-link queues) + + +Configuration and Options +------------------------- + +The software eventdev is a vdev device, and as such can be created from the +application code, or from the EAL command line: + +* Call ``rte_vdev_init("event_opdl0")`` from the application + +* Use ``--vdev="event_opdl0"`` in the EAL options, which will call + rte_vdev_init() internally + +Example: + +.. code-block:: console + + ./your_eventdev_application --vdev="event_opdl0" + + +Single Port Queue +~~~~~~~~~~~~~~~~~ + +It is possible to create a Single Port Queue ``RTE_EVENT_QUEUE_CFG_SINGLE_LINK``. +Packets dequeued from this queue do not need to be re-enqueued (as is the +case with an ordered queue). The purpose of this queue is to allow for +asynchronous handling of packets in the middle of a pipeline. Ordered +queues in the middle of a pipeline cannot delete packets. + + +Queue Dependencies +~~~~~~~~~~~~~~~~~~ + +As stated the order in which packets travel through queues is static in +nature. They go through the queues in the order the queues are setup at +initialisation ``rte_event_queue_setup()``. For example if an application +sets up 3 queues, Q0, Q1, Q2 and has 3 associated ports P0, P1, P2 and +P3 then packets must be + + * Enqueued onto Q0 (typically through P0), then + + * Dequeued from Q0 (typically through P1), then + + * Enqueued onto Q1 (also through P1), then + + * Dequeued from Q2 (typically through P2), then + + * Enqueued onto Q3 (also through P2), then + + * Dequeued from Q3 (typically through P3) and then transmitted on the relevant \ + eth port + + +Limitations +----------- + +The opdl implementation has a number of limitations. These limitations are +due to the static nature of the underlying queues. It is because of this +that the implementation can achieve such high throughput and low latency + +The following list is a comprehensive outline of the what is supported and +the limitations / restrictions imposed by the opdl pmd + + - The order in which packets moved between queues is static and fixed \ + (dynamic scheduling is not supported). + + - NEW, RELEASE are not explicitly supported. RX (first enqueue) implicitly \ + adds NEW event types, and TX (last dequeue) implicitly does RELEASE event types. + + - All packets follow the same path through device queues. + + - Flows within queues are NOT supported. + + - Event priority is NOT supported. + + - Once the device is stopped all inflight events are lost. Applications should \ + clear all inflight events before stopping it. + + - Each port can only be associated with one queue. + + - Each queue can have multiple ports associated with it. + + - Each worker core has to dequeue the maximum burst size for that port. + + - For performance, the rte_event flow_id should not be updated once packet\ + is enqueued on RX. + + + +Validation & Statistics +~~~~~~~~~~~~~~~~~~~~~~~ + +Validation can be turned on through a command line parameter + +.. code-block:: console + + --vdev="event_opdl0,do_validation=1,self_test=1" + +If validation is turned on every packet (as opposed to just the first in +each burst), is validated to have come from the right queue. Statistics +are also produced in this mode. The statistics are available through the +eventdev xstats API. Statistics are per port as follows: + + - claim_pkts_requested + - claim_pkts_granted + - claim_non_empty + - claim_empty + - total_cycles diff --git a/src/spdk/dpdk/doc/guides/eventdevs/sw.rst b/src/spdk/dpdk/doc/guides/eventdevs/sw.rst new file mode 100644 index 000000000..04c8b0305 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/eventdevs/sw.rst @@ -0,0 +1,132 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +Software Eventdev Poll Mode Driver +================================== + +The software eventdev is an implementation of the eventdev API, that provides a +wide range of the eventdev features. The eventdev relies on a CPU core to +perform event scheduling. This PMD can use the service core library to run the +scheduling function, allowing an application to utilize the power of service +cores to multiplex other work on the same core if required. + + +Features +-------- + +The software eventdev implements many features in the eventdev API; + +Queues + * Atomic + * Ordered + * Parallel + * Single-Link + +Ports + * Load balanced (for Atomic, Ordered, Parallel queues) + * Single Link (for single-link queues) + +Event Priorities + * Each event has a priority, which can be used to provide basic QoS + + +Configuration and Options +------------------------- + +The software eventdev is a vdev device, and as such can be created from the +application code, or from the EAL command line: + +* Call ``rte_vdev_init("event_sw0")`` from the application + +* Use ``--vdev="event_sw0"`` in the EAL options, which will call + rte_vdev_init() internally + +Example: + +.. code-block:: console + + ./your_eventdev_application --vdev="event_sw0" + + +Scheduling Quanta +~~~~~~~~~~~~~~~~~ + +The scheduling quanta sets the number of events that the device attempts to +schedule in a single schedule call performed by the service core. Note that +is a *hint* only, and that fewer or more events may be scheduled in a given +iteration. + +The scheduling quanta can be set using a string argument to the vdev +create call: + +.. code-block:: console + + --vdev="event_sw0,sched_quanta=64" + + +Credit Quanta +~~~~~~~~~~~~~ + +The credit quanta is the number of credits that a port will fetch at a time from +the instance's credit pool. Higher numbers will cause less overhead in the +atomic credit fetch code, however it also reduces the overall number of credits +in the system faster. A balanced number (e.g. 32) ensures that only small numbers +of credits are pre-allocated at a time, while also mitigating performance impact +of the atomics. + +Experimentation with higher values may provide minor performance improvements, +at the cost of the whole system having less credits. On the other hand, +reducing the quanta may cause measurable performance impact but provide the +system with a higher number of credits at all times. + +A value of 32 seems a good balance however your specific application may +benefit from a higher or reduced quanta size, experimentation is required to +verify possible gains. + +.. code-block:: console + + --vdev="event_sw0,credit_quanta=64" + + +Limitations +----------- + +The software eventdev implementation has a few limitations. The reason for +these limitations is usually that the performance impact of supporting the +feature would be significant. + + +"All Types" Queues +~~~~~~~~~~~~~~~~~~ + +The software eventdev does not support creating queues that handle all types of +traffic. An eventdev with this capability allows enqueuing Atomic, Ordered and +Parallel traffic to the same queue, but scheduling each of them appropriately. + +The reason to not allow Atomic, Ordered and Parallel event types in the +same queue is that it causes excessive branching in the code to enqueue packets +to the queue, causing a significant performance impact. + +The ``RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES`` flag is not set in the +``event_dev_cap`` field of the ``rte_event_dev_info`` struct for the software +eventdev. + +Distributed Scheduler +~~~~~~~~~~~~~~~~~~~~~ + +The software eventdev is a centralized scheduler, requiring a service core to +perform the required event distribution. This is not really a limitation but +rather a design decision. + +The ``RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED`` flag is not set in the +``event_dev_cap`` field of the ``rte_event_dev_info`` struct for the software +eventdev. + +Dequeue Timeout +~~~~~~~~~~~~~~~ + +The eventdev API supports a timeout when dequeuing packets using the +``rte_event_dequeue_burst`` function. +This allows a core to wait for an event to arrive, or until ``timeout`` number +of ticks have passed. Timeout ticks is not supported by the software eventdev +for performance reasons. diff --git a/src/spdk/dpdk/doc/guides/faq/faq.rst b/src/spdk/dpdk/doc/guides/faq/faq.rst new file mode 100644 index 000000000..f19c1389b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/faq/faq.rst @@ -0,0 +1,197 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +What does "EAL: map_all_hugepages(): open failed: Permission denied Cannot init memory" mean? +--------------------------------------------------------------------------------------------- + +This is most likely due to the test application not being run with sudo to promote the user to a superuser. +Alternatively, applications can also be run as regular user. +For more information, please refer to :ref:`DPDK Getting Started Guide `. + + +If I want to change the number of hugepages allocated, how do I remove the original pages allocated? +---------------------------------------------------------------------------------------------------- + +The number of pages allocated can be seen by executing the following command:: + + grep Huge /proc/meminfo + +Once all the pages are mmapped by an application, they stay that way. +If you start a test application with less than the maximum, then you have free pages. +When you stop and restart the test application, it looks to see if the pages are available in the ``/dev/huge`` directory and mmaps them. +If you look in the directory, you will see ``n`` number of 2M pages files. If you specified 1024, you will see 1024 page files. +These are then placed in memory segments to get contiguous memory. + +If you need to change the number of pages, it is easier to first remove the pages. The usertools/dpdk-setup.sh script provides an option to do this. +See the "Quick Start Setup Script" section in the :ref:`DPDK Getting Started Guide ` for more information. + + +If I execute "l2fwd -l 0-3 -m 64 -n 3 -- -p 3", I get the following output, indicating that there are no socket 0 hugepages to allocate the mbuf and ring structures to? +------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + +I have set up a total of 1024 Hugepages (that is, allocated 512 2M pages to each NUMA node). + +The -m command line parameter does not guarantee that huge pages will be reserved on specific sockets. Therefore, allocated huge pages may not be on socket 0. +To request memory to be reserved on a specific socket, please use the --socket-mem command-line parameter instead of -m. + + +I am running a 32-bit DPDK application on a NUMA system, and sometimes the application initializes fine but cannot allocate memory. Why is that happening? +---------------------------------------------------------------------------------------------------------------------------------------------------------- + +32-bit applications have limitations in terms of how much virtual memory is available, hence the number of hugepages they are able to allocate is also limited (1 GB size). +If your system has a lot (>1 GB size) of hugepage memory, not all of it will be allocated. +Due to hugepages typically being allocated on a local NUMA node, the hugepages allocation the application gets during the initialization depends on which +NUMA node it is running on (the EAL does not affinitize cores until much later in the initialization process). +Sometimes, the Linux OS runs the DPDK application on a core that is located on a different NUMA node from DPDK master core and +therefore all the hugepages are allocated on the wrong socket. + +To avoid this scenario, either lower the amount of hugepage memory available to 1 GB size (or less), or run the application with taskset +affinitizing the application to a would-be master core. + +For example, if your EAL coremask is 0xff0, the master core will usually be the first core in the coremask (0x10); this is what you have to supply to taskset:: + + taskset 0x10 ./l2fwd -l 4-11 -n 2 + +.. Note: Instead of '-c 0xff0' use the '-l 4-11' as a cleaner way to define lcores. + +In this way, the hugepages have a greater chance of being allocated to the correct socket. +Additionally, a ``--socket-mem`` option could be used to ensure the availability of memory for each socket, so that if hugepages were allocated on +the wrong socket, the application simply will not start. + + +On application startup, there is a lot of EAL information printed. Is there any way to reduce this? +--------------------------------------------------------------------------------------------------- + +Yes, the option ``--log-level=`` accepts either symbolic names (or numbers): + +1. emergency +2. alert +3. critical +4. error +5. warning +6. notice +7. info +8. debug + +How can I tune my network application to achieve lower latency? +--------------------------------------------------------------- + +Traditionally, there is a trade-off between throughput and latency. An application can be tuned to achieve a high throughput, +but the end-to-end latency of an average packet typically increases as a result. +Similarly, the application can be tuned to have, on average, a low end-to-end latency at the cost of lower throughput. + +To achieve higher throughput, the DPDK attempts to aggregate the cost of processing each packet individually by processing packets in bursts. +Using the testpmd application as an example, the "burst" size can be set on the command line to a value of 32 (also the default value). +This allows the application to request 32 packets at a time from the PMD. +The testpmd application then immediately attempts to transmit all the packets that were received, in this case, all 32 packets. +The packets are not transmitted until the tail pointer is updated on the corresponding TX queue of the network port. +This behavior is desirable when tuning for high throughput because the cost of tail pointer updates to both the RX and TX queues +can be spread across 32 packets, effectively hiding the relatively slow MMIO cost of writing to the PCIe* device. + +However, this is not very desirable when tuning for low latency, because the first packet that was received must also wait for the other 31 packets to be received. +It cannot be transmitted until the other 31 packets have also been processed because the NIC will not know to transmit the packets until the TX tail pointer has been updated, +which is not done until all 32 packets have been processed for transmission. + +To consistently achieve low latency even under heavy system load, the application developer should avoid processing packets in bunches. +The testpmd application can be configured from the command line to use a burst value of 1. +This allows a single packet to be processed at a time, providing lower latency, but with the added cost of lower throughput. + + +Without NUMA enabled, my network throughput is low, why? +-------------------------------------------------------- + +I have a dual Intel® Xeon® E5645 processors 2.40 GHz with four Intel® 82599 10 Gigabit Ethernet NICs. +Using eight logical cores on each processor with RSS set to distribute network load from two 10 GbE interfaces to the cores on each processor. + +Without NUMA enabled, memory is allocated from both sockets, since memory is interleaved. +Therefore, each 64B chunk is interleaved across both memory domains. + +The first 64B chunk is mapped to node 0, the second 64B chunk is mapped to node 1, the third to node 0, the fourth to node 1. +If you allocated 256B, you would get memory that looks like this: + +.. code-block:: console + + 256B buffer + Offset 0x00 - Node 0 + Offset 0x40 - Node 1 + Offset 0x80 - Node 0 + Offset 0xc0 - Node 1 + +Therefore, packet buffers and descriptor rings are allocated from both memory domains, thus incurring QPI bandwidth accessing the other memory and much higher latency. +For best performance with NUMA disabled, only one socket should be populated. + + +I am getting errors about not being able to open files. Why? +------------------------------------------------------------ + +As the DPDK operates, it opens a lot of files, which can result in reaching the open files limits, which is set using the ulimit command or in the limits.conf file. +This is especially true when using a large number (>512) of 2 MB huge pages. Please increase the open file limit if your application is not able to open files. +This can be done either by issuing a ulimit command or editing the limits.conf file. Please consult Linux manpages for usage information. + + +VF driver for IXGBE devices cannot be initialized +------------------------------------------------- + +Some versions of Linux IXGBE driver do not assign a random MAC address to VF devices at initialization. +In this case, this has to be done manually on the VM host, using the following command: + +.. code-block:: console + + ip link set vf mac + +where being the interface providing the virtual functions for example, eth0, being the virtual function number, for example 0, +and being the desired MAC address. + + +Is it safe to add an entry to the hash table while running? +------------------------------------------------------------ +Currently the table implementation is not a thread safe implementation and assumes that locking between threads and processes is handled by the user's application. +This is likely to be supported in future releases. + + +What is the purpose of setting iommu=pt? +---------------------------------------- +DPDK uses a 1:1 mapping and does not support IOMMU. IOMMU allows for simpler VM physical address translation. +The second role of IOMMU is to allow protection from unwanted memory access by an unsafe device that has DMA privileges. +Unfortunately, the protection comes with an extremely high performance cost for high speed NICs. + +Setting ``iommu=pt`` disables IOMMU support for the hypervisor. + + +When trying to send packets from an application to itself, meaning smac==dmac, using Intel(R) 82599 VF packets are lost. +------------------------------------------------------------------------------------------------------------------------ + +Check on register ``LLE(PFVMTXSSW[n])``, which allows an individual pool to send traffic and have it looped back to itself. + + +Can I split packet RX to use DPDK and have an application's higher order functions continue using Linux pthread? +---------------------------------------------------------------------------------------------------------------- + +The DPDK's lcore threads are Linux pthreads bound onto specific cores. Configure the DPDK to do work on the same +cores and run the application's other work on other cores using the DPDK's "coremask" setting to specify which +cores it should launch itself on. + + +Is it possible to exchange data between DPDK processes and regular userspace processes via some shared memory or IPC mechanism? +------------------------------------------------------------------------------------------------------------------------------- + +Yes - DPDK processes are regular Linux/BSD processes, and can use all OS provided IPC mechanisms. + + +Can the multiple queues in Intel(R) I350 be used with DPDK? +----------------------------------------------------------- + +I350 has RSS support and 8 queue pairs can be used in RSS mode. It should work with multi-queue DPDK applications using RSS. + + +How can hugepage-backed memory be shared among multiple processes? +------------------------------------------------------------------ + +See the Primary and Secondary examples in the :ref:`multi-process sample application `. + + +Why can't my application receive packets on my system with UEFI Secure Boot enabled? +------------------------------------------------------------------------------------ + +If UEFI secure boot is enabled, the Linux kernel may disallow the use of UIO on the system. +Therefore, devices for use by DPDK should be bound to the ``vfio-pci`` kernel module rather than ``igb_uio`` or ``uio_pci_generic``. diff --git a/src/spdk/dpdk/doc/guides/faq/index.rst b/src/spdk/dpdk/doc/guides/faq/index.rst new file mode 100644 index 000000000..a9ae9bddd --- /dev/null +++ b/src/spdk/dpdk/doc/guides/faq/index.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +FAQ +=== + +This document contains some Frequently Asked Questions that arise when working with DPDK. + +.. toctree:: + :maxdepth: 2 + :numbered: + + faq diff --git a/src/spdk/dpdk/doc/guides/freebsd_gsg/build_dpdk.rst b/src/spdk/dpdk/doc/guides/freebsd_gsg/build_dpdk.rst new file mode 100644 index 000000000..e31c966b9 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/freebsd_gsg/build_dpdk.rst @@ -0,0 +1,252 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +.. _building_from_source: + +Compiling the DPDK Target from Source +===================================== + +Prerequisites +------------- + +The following FreeBSD packages are required to build DPDK: + +* meson +* ninja +* pkgconf + +These can be installed using (as root):: + + pkg install meson pkgconf + +To compile the required kernel modules for memory management and working +with physical NIC devices, the kernel sources for FreeBSD also +need to be installed. If not already present on the system, these can be +installed via commands like the following, for FreeBSD 12.1 on x86_64:: + + fetch http://ftp.freebsd.org/pub/FreeBSD/releases/amd64/12.1-RELEASE/src.txz + tar -C / -xJvf src.txz + +To enable the telemetry library in DPDK, the jansson library also needs to +be installed, and can be installed via:: + + pkg install jansson + +Individual drivers may have additional requirements. Consult the relevant +driver guide for any driver-specific requirements of interest. + +Building DPDK +------------- + +The following commands can be used to build and install DPDK on a system. +The final, install, step generally needs to be run as root:: + + meson build + cd build + ninja + ninja install + +This will install the DPDK libraries and drivers to `/usr/local/lib` with a +pkg-config file `libdpdk.pc` installed to `/usr/local/lib/pkgconfig`. The +DPDK test applications, such as `dpdk-testpmd` are installed to +`/usr/local/bin`. To use these applications, it is recommended that the +`contigmem` and `nic_uio` kernel modules be loaded first, as described in +the next section. + +.. note:: + + It is recommended that pkg-config be used to query information + about the compiler and linker flags needed to build applications + against DPDK. In some cases, the path `/usr/local/lib/pkgconfig` + may not be in the default search paths for `.pc` files, which means + that queries for DPDK information may fail. This can be fixed by + setting the appropriate path in `PKG_CONFIG_PATH` environment + variable. + + +.. _loading_contigmem: + +Loading the DPDK contigmem Module +--------------------------------- + +To run a DPDK application, physically contiguous memory is required. +In the absence of non-transparent superpages, the included sources for the +contigmem kernel module provides the ability to present contiguous blocks of +memory for the DPDK to use. The contigmem module must be loaded into the +running kernel before any DPDK is run. Once DPDK is installed on the +system, the module can be found in the `/boot/modules` directory. + +The amount of physically contiguous memory along with the number of physically +contiguous blocks to be reserved by the module can be set at runtime prior to +module loading using:: + + kenv hw.contigmem.num_buffers=n + kenv hw.contigmem.buffer_size=m + +The kernel environment variables can also be specified during boot by placing the +following in ``/boot/loader.conf``: + +.. code-block:: shell + + hw.contigmem.num_buffers=n + hw.contigmem.buffer_size=m + +The variables can be inspected using the following command:: + + sysctl -a hw.contigmem + +Where n is the number of blocks and m is the size in bytes of each area of +contiguous memory. A default of two buffers of size 1073741824 bytes (1 Gigabyte) +each is set during module load if they are not specified in the environment. + +The module can then be loaded using kldload:: + + kldload contigmem + +It is advisable to include the loading of the contigmem module during the boot +process to avoid issues with potential memory fragmentation during later system +up time. This can be achieved by placing lines similar to the following into +``/boot/loader.conf``: + +.. code-block:: shell + + hw.contigmem.num_buffers=1 + hw.contigmem.buffer_size=1073741824 + contigmem_load="YES" + +.. note:: + + The contigmem_load directive should be placed after any definitions of + ``hw.contigmem.num_buffers`` and ``hw.contigmem.buffer_size`` if the default values + are not to be used. + +An error such as:: + + kldload: can't load ./x86_64-native-freebsd-gcc/kmod/contigmem.ko: + Exec format error + +is generally attributed to not having enough contiguous memory +available and can be verified via dmesg or ``/var/log/messages``:: + + kernel: contigmalloc failed for buffer + +To avoid this error, reduce the number of buffers or the buffer size. + +.. _loading_nic_uio: + +Loading the DPDK nic_uio Module +------------------------------- + +After loading the contigmem module, the ``nic_uio`` module must also be loaded into the +running kernel prior to running any DPDK application, e.g. using:: + + kldload nic_uio + +.. note:: + + If the ports to be used are currently bound to a existing kernel driver + then the ``hw.nic_uio.bdfs sysctl`` value will need to be set before loading the + module. Setting this value is described in the next section below. + +Currently loaded modules can be seen by using the ``kldstat`` command and a module +can be removed from the running kernel by using ``kldunload ``. + +To load the module during boot place the following into ``/boot/loader.conf``: + +.. code-block:: shell + + nic_uio_load="YES" + +.. note:: + + ``nic_uio_load="YES"`` must appear after the contigmem_load directive, if it exists. + +By default, the ``nic_uio`` module will take ownership of network ports if they are +recognized DPDK devices and are not owned by another module. However, since +the FreeBSD kernel includes support, either built-in, or via a separate driver +module, for most network card devices, it is likely that the ports to be used are +already bound to a driver other than ``nic_uio``. The following sub-section describe +how to query and modify the device ownership of the ports to be used by +DPDK applications. + +.. _binding_network_ports: + +Binding Network Ports to the nic_uio Module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Device ownership can be viewed using the pciconf -l command. The example below shows +four Intel® 82599 network ports under ``if_ixgbe`` module ownership. + +.. code-block:: none + + pciconf -l + ix0@pci0:1:0:0: class=0x020000 card=0x00038086 chip=0x10fb8086 rev=0x01 hdr=0x00 + ix1@pci0:1:0:1: class=0x020000 card=0x00038086 chip=0x10fb8086 rev=0x01 hdr=0x00 + ix2@pci0:2:0:0: class=0x020000 card=0x00038086 chip=0x10fb8086 rev=0x01 hdr=0x00 + ix3@pci0:2:0:1: class=0x020000 card=0x00038086 chip=0x10fb8086 rev=0x01 hdr=0x00 + +The first column constitutes three components: + +#. Device name: ``ixN`` + +#. Unit name: ``pci0`` + +#. Selector (Bus:Device:Function): ``1:0:0`` + +Where no driver is associated with a device, the device name will be ``none``. + +By default, the FreeBSD kernel will include built-in drivers for the most common +devices; a kernel rebuild would normally be required to either remove the drivers +or configure them as loadable modules. + +To avoid building a custom kernel, the ``nic_uio`` module can detach a network port +from its current device driver. This is achieved by setting the ``hw.nic_uio.bdfs`` +kernel environment variable prior to loading ``nic_uio``, as follows:: + + kenv hw.nic_uio.bdfs="b:d:f,b:d:f,..." + +Where a comma separated list of selectors is set, the list must not contain any +whitespace. + +For example to re-bind ``ix2@pci0:2:0:0`` and ``ix3@pci0:2:0:1`` to the ``nic_uio`` module +upon loading, use the following command:: + + kenv hw.nic_uio.bdfs="2:0:0,2:0:1" + +The variable can also be specified during boot by placing the following into +``/boot/loader.conf``, before the previously-described ``nic_uio_load`` line - as +shown: + +.. code-block:: shell + + hw.nic_uio.bdfs="2:0:0,2:0:1" + nic_uio_load="YES" + +Binding Network Ports Back to their Original Kernel Driver +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the original driver for a network port has been compiled into the kernel, +it is necessary to reboot FreeBSD to restore the original device binding. Before +doing so, update or remove the ``hw.nic_uio.bdfs`` in ``/boot/loader.conf``. + +If rebinding to a driver that is a loadable module, the network port binding can +be reset without rebooting. To do so, unload both the target kernel module and the +``nic_uio`` module, modify or clear the ``hw.nic_uio.bdfs`` kernel environment (kenv) +value, and reload the two drivers - first the original kernel driver, and then +the ``nic_uio driver``. Note: the latter does not need to be reloaded unless there are +ports that are still to be bound to it. + +Example commands to perform these steps are shown below:: + + kldunload nic_uio + kldunload + + # To clear the value completely: + kenv -u hw.nic_uio.bdfs + + # To update the list of ports to bind: + kenv hw.nic_uio.bdfs="b:d:f,b:d:f,..." + + kldload + + kldload nic_uio # optional diff --git a/src/spdk/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst b/src/spdk/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst new file mode 100644 index 000000000..2a68f5fc3 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst @@ -0,0 +1,117 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +.. _compiling_sample_apps: + +Compiling and Running Sample Applications +========================================= + +The chapter describes how to compile and run applications in a DPDK +environment. It also provides a pointer to where sample applications are stored. + +Compiling a Sample Application +------------------------------ + +The DPDK example applications make use of the pkg-config file installed on +the system when DPDK is installed, and so can be built using GNU make. + +.. note:: + + BSD make cannot be used to compile the DPDK example applications. GNU + make can be installed using `pkg install gmake` if not already installed + on the FreeBSD system. + +The following shows how to compile the helloworld example app, following +the installation of DPDK using `ninja install` as described previously:: + + $ export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig + + $ cd examples/helloworld/ + + $ gmake + cc -O3 -I/usr/local/include -include rte_config.h -march=native + -D__BSD_VISIBLE main.c -o build/helloworld-shared + -L/usr/local/lib -lrte_telemetry -lrte_bpf -lrte_flow_classify + -lrte_pipeline -lrte_table -lrte_port -lrte_fib -lrte_ipsec + -lrte_stack -lrte_security -lrte_sched -lrte_reorder -lrte_rib + -lrte_rcu -lrte_rawdev -lrte_pdump -lrte_member -lrte_lpm + -lrte_latencystats -lrte_jobstats -lrte_ip_frag -lrte_gso -lrte_gro + -lrte_eventdev -lrte_efd -lrte_distributor -lrte_cryptodev + -lrte_compressdev -lrte_cfgfile -lrte_bitratestats -lrte_bbdev + -lrte_acl -lrte_timer -lrte_hash -lrte_metrics -lrte_cmdline + -lrte_pci -lrte_ethdev -lrte_meter -lrte_net -lrte_mbuf + -lrte_mempool -lrte_ring -lrte_eal -lrte_kvargs + ln -sf helloworld-shared build/helloworld + + +.. _running_sample_app: + +Running a Sample Application +---------------------------- + +#. The ``contigmem`` and ``nic_uio`` modules must be set up prior to running an application. + +#. Any ports to be used by the application must be already bound to the ``nic_uio`` module, + as described in section :ref:`binding_network_ports`, prior to running the application. + The application is linked with the DPDK target environment's Environment + Abstraction Layer (EAL) library, which provides some options that are generic + to every DPDK application. + +A large number of options can be given to the EAL when running an +application. A full list of options can be got by passing `--help` to a +DPDK application. Some of the EAL options for FreeBSD are as follows: + +* ``-c COREMASK`` or ``-l CORELIST``: + A hexadecimal bit mask of the cores to run on. Note that core numbering + can change between platforms and should be determined beforehand. The corelist + is a list of cores to use instead of a core mask. + +* ``-b ``: + Blacklisting of ports; prevent EAL from using specified PCI device + (multiple ``-b`` options are allowed). + +* ``--use-device``: + Use the specified Ethernet device(s) only. Use comma-separate + ``[domain:]bus:devid.func`` values. Cannot be used with ``-b`` option. + +* ``-v``: + Display version information on startup. + +* ``-m MB``: + Memory to allocate from hugepages, regardless of processor socket. + +Other options, specific to Linux and are not supported under FreeBSD are as follows: + +* ``socket-mem``: + Memory to allocate from hugepages on specific sockets. + +* ``--huge-dir``: + The directory where hugetlbfs is mounted. + +* ``mbuf-pool-ops-name``: + Pool ops name for mbuf to use. + +* ``--file-prefix``: + The prefix text used for hugepage filenames. + +The ``-c`` or ``-l`` option is mandatory; the others are optional. + +.. _running_non_root: + +Running DPDK Applications Without Root Privileges +------------------------------------------------- + +Although applications using the DPDK use network ports and other hardware +resources directly, with a number of small permission adjustments, it is possible +to run these applications as a user other than "root". To do so, the ownership, +or permissions, on the following file system objects should be adjusted to ensure +that the user account being used to run the DPDK application has access +to them: + +* The userspace-io device files in ``/dev``, for example, ``/dev/uio0``, ``/dev/uio1``, and so on + +* The userspace contiguous memory device: ``/dev/contigmem`` + +.. note:: + + Please refer to the DPDK Release Notes for supported applications. diff --git a/src/spdk/dpdk/doc/guides/freebsd_gsg/freebsd_eal_parameters.rst b/src/spdk/dpdk/doc/guides/freebsd_gsg/freebsd_eal_parameters.rst new file mode 100644 index 000000000..fba467a2c --- /dev/null +++ b/src/spdk/dpdk/doc/guides/freebsd_gsg/freebsd_eal_parameters.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +EAL parameters +============== + +This document contains a list of all EAL parameters. These parameters can be +used by any DPDK application running on FreeBSD. + +Common EAL parameters +--------------------- + +The following EAL parameters are common to all platforms supported by DPDK. + +.. include:: ../linux_gsg/eal_args.include.rst + +FreeBSD-specific EAL parameters +------------------------------- + +There are currently no FreeBSD-specific EAL command-line parameters available. diff --git a/src/spdk/dpdk/doc/guides/freebsd_gsg/index.rst b/src/spdk/dpdk/doc/guides/freebsd_gsg/index.rst new file mode 100644 index 000000000..9af5988dc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/freebsd_gsg/index.rst @@ -0,0 +1,17 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +.. _freebsd_gsg: + +Getting Started Guide for FreeBSD +================================= + +.. toctree:: + :maxdepth: 2 + :numbered: + + intro + install_from_ports + build_dpdk + build_sample_apps + freebsd_eal_parameters diff --git a/src/spdk/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst b/src/spdk/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst new file mode 100644 index 000000000..d946f3f3b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst @@ -0,0 +1,125 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +.. _install_from_ports: + +Installing DPDK from the Ports Collection +========================================= + +The easiest way to get up and running with the DPDK on FreeBSD is to +install it using the FreeBSD `pkg` utility or from the ports collection. +Details of installing applications from packages or the ports collection are documented in the +`FreeBSD Handbook `_, +chapter `Installing Applications: Packages and Ports `_. + +.. note:: + + Please ensure that the latest patches are applied to third party libraries + and software to avoid any known vulnerabilities. + + +Installing the DPDK Package for FreeBSD +--------------------------------------- + +DPDK can be installed on FreeBSD using the command:: + + pkg install dpdk + +After the installation of the DPDK package, instructions will be printed on +how to install the kernel modules required to use the DPDK. A more +complete version of these instructions can be found in the sections +:ref:`loading_contigmem` and :ref:`loading_nic_uio`. Normally, lines like +those below would be added to the file ``/boot/loader.conf``. + +.. code-block:: shell + + # Reserve 2 x 1G blocks of contiguous memory using contigmem driver: + hw.contigmem.num_buffers=2 + hw.contigmem.buffer_size=1073741824 + contigmem_load="YES" + + # Identify NIC devices for DPDK apps to use and load nic_uio driver: + hw.nic_uio.bdfs="2:0:0,2:0:1" + nic_uio_load="YES" + + +Installing the DPDK FreeBSD Port +-------------------------------- + +If so desired, the user can install DPDK using the ports collection rather than from +a pre-compiled binary package. +On a system with the ports collection installed in ``/usr/ports``, the DPDK +can be installed using the commands:: + + cd /usr/ports/net/dpdk + + make install + + +Compiling and Running the Example Applications +---------------------------------------------- + +When the DPDK has been installed from the ports collection it installs +its example applications in ``/usr/local/share/dpdk/examples``. +These examples can be compiled and run as described in :ref:`compiling_sample_apps`. + +.. note:: + + DPDK example applications must be complied using `gmake` rather than + BSD `make`. To detect the installed DPDK libraries, `pkg-config` should + also be installed on the system. + +.. note:: + + To install a copy of the DPDK compiled using gcc, please download the + official DPDK package from https://core.dpdk.org/download/ and install manually using + the instructions given in the next chapter, :ref:`building_from_source` + +An example application can therefore be copied to a user's home directory and +compiled and run as below, where we have 2 memory blocks of size 1G reserved +via the contigmem module, and 4 NIC ports bound to the nic_uio module:: + + cp -r /usr/local/share/dpdk/examples/helloworld . + + cd helloworld/ + + gmake + cc -O3 -I/usr/local/include -include rte_config.h -march=corei7 -D__BSD_VISIBLE main.c -o build/helloworld-shared -L/usr/local/lib -lrte_bpf -lrte_flow_classify -lrte_pipeline -lrte_table -lrte_port -lrte_fib -lrte_ipsec -lrte_stack -lrte_security -lrte_sched -lrte_reorder -lrte_rib -lrte_rcu -lrte_rawdev -lrte_pdump -lrte_member -lrte_lpm -lrte_latencystats -lrte_jobstats -lrte_ip_frag -lrte_gso -lrte_gro -lrte_eventdev -lrte_efd -lrte_distributor -lrte_cryptodev -lrte_compressdev -lrte_cfgfile -lrte_bitratestats -lrte_bbdev -lrte_acl -lrte_timer -lrte_hash -lrte_metrics -lrte_cmdline -lrte_pci -lrte_ethdev -lrte_meter -lrte_net -lrte_mbuf -lrte_mempool -lrte_ring -lrte_eal -lrte_kvargs + ln -sf helloworld-shared build/helloworld + + sudo ./build/helloworld -l 0-3 + EAL: Sysctl reports 8 cpus + EAL: Detected 8 lcore(s) + EAL: Detected 1 NUMA nodes + EAL: Multi-process socket /var/run/dpdk/rte/mp_socket + EAL: Selected IOVA mode 'PA' + EAL: Contigmem driver has 2 buffers, each of size 1GB + EAL: Mapped memory segment 0 @ 0x1040000000: physaddr:0x180000000, len 1073741824 + EAL: Mapped memory segment 1 @ 0x1080000000: physaddr:0x1c0000000, len 1073741824 + EAL: PCI device 0000:00:19.0 on NUMA socket 0 + EAL: probe driver: 8086:153b net_e1000_em + EAL: 0000:00:19.0 not managed by UIO driver, skipping + EAL: PCI device 0000:01:00.0 on NUMA socket 0 + EAL: probe driver: 8086:1572 net_i40e + EAL: PCI device 0000:01:00.1 on NUMA socket 0 + EAL: probe driver: 8086:1572 net_i40e + EAL: PCI device 0000:01:00.2 on NUMA socket 0 + EAL: probe driver: 8086:1572 net_i40e + EAL: PCI device 0000:01:00.3 on NUMA socket 0 + EAL: probe driver: 8086:1572 net_i40e + hello from core 1 + hello from core 2 + hello from core 3 + hello from core 0 + + +.. note:: + + To run a DPDK process as a non-root user, adjust the permissions on + the ``/dev/contigmem`` and ``/dev/uio device`` nodes as described in section + :ref:`running_non_root` + +.. note:: + + For an explanation of the command-line parameters that can be passed to an + DPDK application, see section :ref:`running_sample_app`. diff --git a/src/spdk/dpdk/doc/guides/freebsd_gsg/intro.rst b/src/spdk/dpdk/doc/guides/freebsd_gsg/intro.rst new file mode 100644 index 000000000..63160ce64 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/freebsd_gsg/intro.rst @@ -0,0 +1,55 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +Introduction +============ + +This document contains instructions for installing and configuring the +Data Plane Development Kit (DPDK) software. It is designed to get customers +up and running quickly and describes how to compile and run a +DPDK application in a FreeBSD application (freebsd) environment, without going +deeply into detail. + +For a comprehensive guide to installing and using FreeBSD, the following +handbook is available from the FreeBSD Documentation Project: +`FreeBSD Handbook `_. + +.. note:: + + DPDK is now available as part of the FreeBSD ports collection and as a pre-built package. + Installing via the ports collection or FreeBSD `pkg` infrastructure is now the recommended + way to install DPDK on FreeBSD, and is documented in the next chapter, :ref:`install_from_ports`. + +Documentation Roadmap +--------------------- + +The following is a list of DPDK documents in the suggested reading order: + +* **Release Notes** : Provides release-specific information, including supported + features, limitations, fixed issues, known issues and so on. Also, provides the + answers to frequently asked questions in FAQ format. + +* **Getting Started Guide** (this document): Describes how to install and + configure the DPDK; designed to get users up and running quickly with the + software. + +* **Programmer's Guide**: Describes: + + * The software architecture and how to use it (through examples), + specifically in a Linux* application (linux) environment + + * The content of the DPDK, the build system (including the commands + that can be used in the root DPDK Makefile to build the development + kit and an application) and guidelines for porting an application + + * Optimizations used in the software and those that should be considered + for new development + + A glossary of terms is also provided. + +* **API Reference**: Provides detailed information about DPDK functions, + data structures and other programming constructs. + +* **Sample Applications User Guide**: Describes a set of sample applications. + Each chapter describes a sample application that showcases specific functionality + and provides instructions on how to compile, run and use the sample application. diff --git a/src/spdk/dpdk/doc/guides/howto/debug_troubleshoot.rst b/src/spdk/dpdk/doc/guides/howto/debug_troubleshoot.rst new file mode 100644 index 000000000..cef016b2f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/debug_troubleshoot.rst @@ -0,0 +1,460 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +Debug & Troubleshoot guide +========================== + +DPDK applications can be designed to have simple or complex pipeline processing +stages making use of single or multiple threads. Applications can use poll mode +hardware devices which helps in offloading CPU cycles too. It is common to find +solutions designed with + +* single or multiple primary processes + +* single primary and single secondary + +* single primary and multiple secondaries + +In all the above cases, it is tedious to isolate, debug, and understand various +behaviors which occur randomly or periodically. The goal of the guide is to +consolidate a few commonly seen issues for reference. Then, isolate to identify +the root cause through step by step debug at various stages. + +.. note:: + + It is difficult to cover all possible issues; in a single attempt. With + feedback and suggestions from the community, more cases can be covered. + + +Application Overview +-------------------- + +By making use of the application model as a reference, we can discuss multiple +causes of issues in the guide. Let us assume the sample makes use of a single +primary process, with various processing stages running on multiple cores. The +application may also make uses of Poll Mode Driver, and libraries like service +cores, mempool, mbuf, eventdev, cryptodev, QoS, and ethdev. + +The overview of an application modeled using PMD is shown in +:numref:`dtg_sample_app_model`. + +.. _dtg_sample_app_model: + +.. figure:: img/dtg_sample_app_model.* + + Overview of pipeline stage of an application + + +Bottleneck Analysis +------------------- + +A couple of factors that lead the design decision could be the platform, scale +factor, and target. This distinct preference leads to multiple combinations, +that are built using PMD and libraries of DPDK. While the compiler, library +mode, and optimization flags are the components are to be constant, that +affects the application too. + + +Is there mismatch in packet (received < desired) rate? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +RX Port and associated core :numref:`dtg_rx_rate`. + +.. _dtg_rx_rate: + +.. figure:: img/dtg_rx_rate.* + + RX packet rate compared against received rate. + +#. Is the configuration for the RX setup correctly? + + * Identify if port Speed and Duplex is matching to desired values with + ``rte_eth_link_get``. + + * Check ``DEV_RX_OFFLOAD_JUMBO_FRAME`` is set with ``rte_eth_dev_info_get``. + + * Check promiscuous mode if the drops do not occur for unique MAC address + with ``rte_eth_promiscuous_get``. + +#. Is the drop isolated to certain NIC only? + + * Make use of ``rte_eth_dev_stats`` to identify the drops cause. + + * If there are mbuf drops, check nb_desc for RX descriptor as it might not + be sufficient for the application. + + * If ``rte_eth_dev_stats`` shows drops are on specific RX queues, ensure RX + lcore threads has enough cycles for ``rte_eth_rx_burst`` on the port queue + pair. + + * If there are redirect to a specific port queue pair with, ensure RX lcore + threads gets enough cycles. + + * Check the RSS configuration ``rte_eth_dev_rss_hash_conf_get`` if the + spread is not even and causing drops. + + * If PMD stats are not updating, then there might be offload or configuration + which is dropping the incoming traffic. + +#. Is there drops still seen? + + * If there are multiple port queue pair, it might be the RX thread, RX + distributor, or event RX adapter not having enough cycles. + + * If there are drops seen for RX adapter or RX distributor, try using + ``rte_prefetch_non_temporal`` which intimates the core that the mbuf in the + cache is temporary. + + +Is there packet drops at receive or transmit? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +RX-TX port and associated cores :numref:`dtg_rx_tx_drop`. + +.. _dtg_rx_tx_drop: + +.. figure:: img/dtg_rx_tx_drop.* + + RX-TX drops + +#. At RX + + * Identify if there are multiple RX queue configured for port by + ``nb_rx_queues`` using ``rte_eth_dev_info_get``. + + * Using ``rte_eth_dev_stats`` fetch drops in q_errors, check if RX thread + is configured to fetch packets from the port queue pair. + + * Using ``rte_eth_dev_stats`` shows drops in ``rx_nombuf``, check if RX + thread has enough cycles to consume the packets from the queue. + +#. At TX + + * If the TX rate is falling behind the application fill rate, identify if + there are enough descriptors with ``rte_eth_dev_info_get`` for TX. + + * Check the ``nb_pkt`` in ``rte_eth_tx_burst`` is done for multiple packets. + + * Check ``rte_eth_tx_burst`` invokes the vector function call for the PMD. + + * If oerrors are getting incremented, TX packet validations are failing. + Check if there queue specific offload failures. + + * If the drops occur for large size packets, check MTU and multi-segment + support configured for NIC. + + +Is there object drops in producer point for the ring library? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Producer point for ring :numref:`dtg_producer_ring`. + +.. _dtg_producer_ring: + +.. figure:: img/dtg_producer_ring.* + + Producer point for Rings + +#. Performance issue isolation at producer + + * Use ``rte_ring_dump`` to validate for all single producer flag is set to + ``RING_F_SP_ENQ``. + + * There should be sufficient ``rte_ring_free_count`` at any point in time. + + * Extreme stalls in dequeue stage of the pipeline will cause + ``rte_ring_full`` to be true. + + +Is there object drops in consumer point for the ring library? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Consumer point for ring :numref:`dtg_consumer_ring`. + +.. _dtg_consumer_ring: + +.. figure:: img/dtg_consumer_ring.* + + Consumer point for Rings + +#. Performance issue isolation at consumer + + * Use ``rte_ring_dump`` to validate for all single consumer flag is set to + ``RING_F_SC_DEQ``. + + * If the desired burst dequeue falls behind the actual dequeue, the enqueue + stage is not filling up the ring as required. + + * Extreme stall in the enqueue will lead to ``rte_ring_empty`` to be true. + + +Is there a variance in packet or object processing rate in the pipeline? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Memory objects close to NUMA :numref:`dtg_mempool`. + +.. _dtg_mempool: + +.. figure:: img/dtg_mempool.* + + Memory objects have to be close to the device per NUMA. + +#. Stall in processing pipeline can be attributes of MBUF release delays. + These can be narrowed down to + + * Heavy processing cycles at single or multiple processing stages. + + * Cache is spread due to the increased stages in the pipeline. + + * CPU thread responsible for TX is not able to keep up with the burst of + traffic. + + * Extra cycles to linearize multi-segment buffer and software offload like + checksum, TSO, and VLAN strip. + + * Packet buffer copy in fast path also results in stalls in MBUF release if + not done selectively. + + * Application logic sets ``rte_pktmbuf_refcnt_set`` to higher than the + desired value and frequently uses ``rte_pktmbuf_prefree_seg`` and does + not release MBUF back to mempool. + +#. Lower performance between the pipeline processing stages can be + + * The NUMA instance for packets or objects from NIC, mempool, and ring + should be the same. + + * Drops on a specific socket are due to insufficient objects in the pool. + Use ``rte_mempool_get_count`` or ``rte_mempool_avail_count`` to monitor + when drops occurs. + + * Try prefetching the content in processing pipeline logic to minimize the + stalls. + +#. Performance issue can be due to special cases + + * Check if MBUF continuous with ``rte_pktmbuf_is_contiguous`` as certain + offload requires the same. + + * Use ``rte_mempool_cache_create`` for user threads require access to + mempool objects. + + * If the variance is absent for larger huge pages, then try rte_mem_lock_page + on the objects, packets, lookup tables to isolate the issue. + + +Is there a variance in cryptodev performance? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Crypto device and PMD :numref:`dtg_crypto`. + +.. _dtg_crypto: + +.. figure:: img/dtg_crypto.* + + CRYPTO and interaction with PMD device. + +#. Performance issue isolation for enqueue + + * Ensure cryptodev, resources and enqueue is running on NUMA cores. + + * Isolate if the cause of errors for err_count using ``rte_cryptodev_stats``. + + * Parallelize enqueue thread for varied multiple queue pair. + +#. Performance issue isolation for dequeue + + * Ensure cryptodev, resources and dequeue are running on NUMA cores. + + * Isolate if the cause of errors for err_count using ``rte_cryptodev_stats``. + + * Parallelize dequeue thread for varied multiple queue pair. + +#. Performance issue isolation for crypto operation + + * If the cryptodev software-assist is in use, ensure the library is built + with right (SIMD) flags or check if the queue pair using CPU ISA for + feature_flags AVX|SSE|NEON using ``rte_cryptodev_info_get``. + + * If the cryptodev hardware-assist is in use, ensure both firmware and + drivers are up to date. + +#. Configuration issue isolation + + * Identify cryptodev instances with ``rte_cryptodev_count`` and + ``rte_cryptodev_info_get``. + + +Is user functions performance is not as expected? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Custom worker function :numref:`dtg_distributor_worker`. + +.. _dtg_distributor_worker: + +.. figure:: img/dtg_distributor_worker.* + + Custom worker function performance drops. + +#. Performance issue isolation + + * The functions running on CPU cores without context switches are the + performing scenarios. Identify lcore with ``rte_lcore`` and lcore index + mapping with CPU using ``rte_lcore_index``. + + * Use ``rte_thread_get_affinity`` to isolate functions running on the same + CPU core. + +#. Configuration issue isolation + + * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and + SERVICE. Check performance functions are mapped to run on the cores. + + * For high-performance execution logic ensure running it on correct NUMA + and non-master core. + + * Analyze run logic with ``rte_dump_stack``, ``rte_dump_registers`` and + ``rte_memdump`` for more insights. + + * Make use of objdump to ensure opcode is matching to the desired state. + + +Is the execution cycles for dynamic service functions are not frequent? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +service functions on service cores :numref:`dtg_service`. + +.. _dtg_service: + +.. figure:: img/dtg_service.* + + functions running on service cores + +#. Performance issue isolation + + * Services configured for parallel execution should have + ``rte_service_lcore_count`` should be equal to + ``rte_service_lcore_count_services``. + + * A service to run parallel on all cores should return + ``RTE_SERVICE_CAP_MT_SAFE`` for ``rte_service_probe_capability`` and + ``rte_service_map_lcore_get`` returns unique lcore. + + * If service function execution cycles for dynamic service functions are + not frequent? + + * If services share the lcore, overall execution should fit budget. + +#. Configuration issue isolation + + * Check if service is running with ``rte_service_runstate_get``. + + * Generic debug via ``rte_service_dump``. + + +Is there a bottleneck in the performance of eventdev? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#. Check for generic configuration + + * Ensure the event devices created are right NUMA using + ``rte_event_dev_count`` and ``rte_event_dev_socket_id``. + + * Check for event stages if the events are looped back into the same queue. + + * If the failure is on the enqueue stage for events, check if queue depth + with ``rte_event_dev_info_get``. + +#. If there are performance drops in the enqueue stage + + * Use ``rte_event_dev_dump`` to dump the eventdev information. + + * Periodically checks stats for queue and port to identify the starvation. + + * Check the in-flight events for the desired queue for enqueue and dequeue. + + +Is there a variance in traffic manager? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Traffic Manager on TX interface :numref:`dtg_qos_tx`. + +.. _dtg_qos_tx: + +.. figure:: img/dtg_qos_tx.* + + Traffic Manager just before TX. + +#. Identify the cause for a variance from expected behavior, is due to + insufficient CPU cycles. Use ``rte_tm_capabilities_get`` to fetch features + for hierarchies, WRED and priority schedulers to be offloaded hardware. + +#. Undesired flow drops can be narrowed down to WRED, priority, and rates + limiters. + +#. Isolate the flow in which the undesired drops occur. Use + ``rte_tn_get_number_of_leaf_node`` and flow table to ping down the leaf + where drops occur. + +#. Check the stats using ``rte_tm_stats_update`` and ``rte_tm_node_stats_read`` + for drops for hierarchy, schedulers and WRED configurations. + + +Is the packet in the unexpected format? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Packet capture before and after processing :numref:`dtg_pdump`. + +.. _dtg_pdump: + +.. figure:: img/dtg_pdump.* + + Capture points of Traffic at RX-TX. + +#. To isolate the possible packet corruption in the processing pipeline, + carefully staged capture packets are to be implemented. + + * First, isolate at NIC entry and exit. + + Use pdump in primary to allow secondary to access port-queue pair. The + packets get copied over in RX|TX callback by the secondary process using + ring buffers. + + * Second, isolate at pipeline entry and exit. + + Using hooks or callbacks capture the packet middle of the pipeline stage + to copy the packets, which can be shared to the secondary debug process + via user-defined custom rings. + +.. note:: + + Use similar analysis to objects and metadata corruption. + + +Does the issue still persist? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The issue can be further narrowed down to the following causes. + +#. If there are vendor or application specific metadata, check for errors due + to META data error flags. Dumping private meta-data in the objects can give + insight into details for debugging. + +#. If there are multi-process for either data or configuration, check for + possible errors in the secondary process where the configuration fails and + possible data corruption in the data plane. + +#. Random drops in the RX or TX when opening other application is an indication + of the effect of a noisy neighbor. Try using the cache allocation technique + to minimize the effect between applications. + + +How to develop a custom code to debug? +-------------------------------------- + +#. For an application that runs as the primary process only, debug functionality + is added in the same process. These can be invoked by timer call-back, + service core and signal handler. + +#. For the application that runs as multiple processes. debug functionality in + a standalone secondary process. diff --git a/src/spdk/dpdk/doc/guides/howto/flow_bifurcation.rst b/src/spdk/dpdk/doc/guides/howto/flow_bifurcation.rst new file mode 100644 index 000000000..7ba66b900 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/flow_bifurcation.rst @@ -0,0 +1,70 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +Flow Bifurcation How-to Guide +============================= + +Flow Bifurcation is a mechanism which uses hardware capable Ethernet devices +to split traffic between Linux user space and kernel space. Since it is a +hardware assisted feature this approach can provide line rate processing +capability. Other than :ref:`KNI `, the software is just required to +enable device configuration, there is no need to take care of the packet +movement during the traffic split. This can yield better performance with +less CPU overhead. + +The Flow Bifurcation splits the incoming data traffic to user space +applications (such as DPDK applications) and/or kernel space programs (such as +the Linux kernel stack). It can direct some traffic, for example data plane +traffic, to DPDK, while directing some other traffic, for example control +plane traffic, to the traditional Linux networking stack. + +There are a number of technical options to achieve this. A typical example is +to combine the technology of SR-IOV and packet classification filtering. + +SR-IOV is a PCI standard that allows the same physical adapter to be split as +multiple virtual functions. Each virtual function (VF) has separated queues +with physical functions (PF). The network adapter will direct traffic to a +virtual function with a matching destination MAC address. In a sense, SR-IOV +has the capability for queue division. + +Packet classification filtering is a hardware capability available on most +network adapters. Filters can be configured to direct specific flows to a +given receive queue by hardware. Different NICs may have different filter +types to direct flows to a Virtual Function or a queue that belong to it. + +In this way the Linux networking stack can receive specific traffic through +the kernel driver while a DPDK application can receive specific traffic +bypassing the Linux kernel by using drivers like VFIO or the DPDK ``igb_uio`` +module. + +.. _figure_flow_bifurcation_overview: + +.. figure:: img/flow_bifurcation_overview.* + + Flow Bifurcation Overview + + +Using Flow Bifurcation on Mellanox ConnectX +------------------------------------------- + +The Mellanox devices are :ref:`natively bifurcated `, +so there is no need to split into SR-IOV PF/VF +in order to get the flow bifurcation mechanism. +The full device is already shared with the kernel driver. + +The DPDK application can setup some flow steering rules, +and let the rest go to the kernel stack. +In order to define the filters strictly with flow rules, +the :ref:`flow_isolated_mode` can be configured. + +There is no specific instructions to follow. +The recommended reading is the :doc:`../prog_guide/rte_flow` guide. +Below is an example of testpmd commands +for receiving VXLAN 42 in 4 queues of the DPDK port 0, +while all other packets go to the kernel: + +.. code-block:: console + + testpmd> flow isolate 0 true + testpmd> flow create 0 ingress pattern eth / ipv4 / udp / vxlan vni is 42 / end \ + actions rss queues 0 1 2 3 end / end diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_consumer_ring.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_consumer_ring.svg new file mode 100644 index 000000000..d8cf0c292 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_consumer_ring.svg @@ -0,0 +1,24 @@ + + + + + + + + consumer ring + + + + Layer 1 + + + + Stage 1 + Stage 2 + Stage 3 + + + + + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_crypto.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_crypto.svg new file mode 100644 index 000000000..969f8b5d5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_crypto.svg @@ -0,0 +1,21 @@ + + + + + + + + crypto + + + + Layer 1 + + Core 7 + + CRYPTO PMD + + Device + + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_distributor_worker.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_distributor_worker.svg new file mode 100644 index 000000000..d2638ded8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_distributor_worker.svg @@ -0,0 +1,36 @@ + + + + + + + + distributor and worker + + + + Layer 1 + + PKT classify + Distribute + + + + + + worker 1 + worker 2 + worker 3 + worker 4 + core 1 + core 2,3,4,5 + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_mempool.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_mempool.svg new file mode 100644 index 000000000..2f53a3ca8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_mempool.svg @@ -0,0 +1,27 @@ + + + + + + + + mempool + + + + Layer 1 + + + MBUF pool + + + + + + Payload + + Metadata + + struct mbuf + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_pdump.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_pdump.svg new file mode 100644 index 000000000..4b14fb7ce --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_pdump.svg @@ -0,0 +1,33 @@ + + + + + + + + pdump + + + + Layer 1 + + + + + + RX + TX + Q1 + Q2 + Q4 + Q3 + Primary + + Secondary + + + Ring BufferQ + + Core 0 + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_producer_ring.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_producer_ring.svg new file mode 100644 index 000000000..e300234cd --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_producer_ring.svg @@ -0,0 +1,24 @@ + + + + + + + + producer ring + + + + Layer 1 + + + + Stage 1 + Stage 2 + Stage 3 + + + + + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_qos_tx.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_qos_tx.svg new file mode 100644 index 000000000..9f1d3b80d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_qos_tx.svg @@ -0,0 +1,29 @@ + + + + + + + + qos + + + + + + + Layer 1 + + + + + TX + Core1 + NIC1 + NIC2 + NIC3 + + QoS + core 10 + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_rx_rate.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_rx_rate.svg new file mode 100644 index 000000000..02f8760c5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_rx_rate.svg @@ -0,0 +1,25 @@ + + + + + + + + rx rate + + + + + + + Layer 1 + + + + RX + Core0 + NIC1 + NIC2 + NIC3 + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_rx_tx_drop.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_rx_tx_drop.svg new file mode 100644 index 000000000..ca1484862 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_rx_tx_drop.svg @@ -0,0 +1,33 @@ + + + + + + + + rx-tx drops + + + + + + + Layer 1 + + + + RX + Core0 + NIC1 + NIC2 + NIC3 + + + + TX + Core1 + NIC1 + NIC2 + NIC3 + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_sample_app_model.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_sample_app_model.svg new file mode 100644 index 000000000..b876aa3b6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_sample_app_model.svg @@ -0,0 +1,110 @@ + + + + + + + + sample application model + + + + Layer 1 + + + + + Health Check + core 7 + Stats Collector + + + RX + NIC 1 + NIC 2 + core0 + + + TX + core1 + NIC 1 + NIC 2 + + QoS + + Crypto + + + + Worker 1 + Worker 2 + Worker 3 + + + + Worker 1 + Worker 2 + Worker 3 + + Device + core 2,3,4 + core 5 + core 6 + core 2,3,4 + + PKT classify + Distribute + + + + + + + + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/dtg_service.svg b/src/spdk/dpdk/doc/guides/howto/img/dtg_service.svg new file mode 100644 index 000000000..fa72de823 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/dtg_service.svg @@ -0,0 +1,20 @@ + + + + + + + + service + + + + Layer 1 + + + + Health Check + core 6 + Stats Collector + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/flow_bifurcation_overview.svg b/src/spdk/dpdk/doc/guides/howto/img/flow_bifurcation_overview.svg new file mode 100644 index 000000000..4fa27648d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/flow_bifurcation_overview.svg @@ -0,0 +1,544 @@ + + + +image/svg+xmlPage-1Sheet.85NICNIC +NIC +Rounded Rectangle.20LINUXLINUX +LINUX +Rounded Rectangle.8Kernel pf driverKernel pf driver +Rounded RectangleFilters support traffic steering to VFFilters support traffic +steering to VF +Rectangle.3Rx Queues (0-N) PFRx Queues +( 0-N ) + PF +Rectangle.4Rx Queues (0-M) VF(vf 0)Rx Queues +( 0-M ) +VF(vf0) +Rectangle.5filtersfilters +Rounded Rectangle.6Tools to program filtersTools to +program filters +2-D word balloonDirector flows to queue index in specified VFinspecified VF +Director flows +to queue index +in specified VF +Rounded Rectangle.24DPDKDPDK +Rounded Rectangle.25SocketSocket +Simple Arrow.44Single arrowheadDynamic connector.70Dynamic connector.81Dynamic connector.83Dynamic connector.84Sheet.98Sheet.109Sheet.110 \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/howto/img/lm_bond_virtio_sriov.svg b/src/spdk/dpdk/doc/guides/howto/img/lm_bond_virtio_sriov.svg new file mode 100644 index 000000000..d913ae012 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/lm_bond_virtio_sriov.svg @@ -0,0 +1,666 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + VM 1 + Switch with 10Gb ports + Server 1 + Server 2 + 10 Gb Traffic Generator + VM 2 + Linux, KVM, QEMU + Linux, KVM, QEMU + 10 Gb NIC + 10 Gb NIC + + + 10 Gb NIC + 10 Gb NIC + + DPDK Testpmd App. + bonded device withvirtio and VF slaves + + DPDK Testpmd App. + bonded device withvirtio and VF slaves + Kernel PF driver + Kernel PF driver + SW bridge with Tapand PF connected + NFS ServerVM disk image + + + + + + + + + + + + + + + + SW bridge with Tapand PF connected + 10 Gb Migration Link + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/lm_vhost_user.svg b/src/spdk/dpdk/doc/guides/howto/img/lm_vhost_user.svg new file mode 100644 index 000000000..3601cf115 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/lm_vhost_user.svg @@ -0,0 +1,644 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + VM 1 + Switch with 10Gb ports + Server 1 + Server 2 + 10 Gb Traffic Generator + VM 2 + Linux, KVM, QEMU 2.5 + 10 Gb NIC + 10 Gb NIC + + + 10 Gb NIC + 10 Gb NIC + + + DPDK Testpmd App + DPDK virtio PMD's + DPDK PF PMD and vhost_user + DPDK PF PMD and vhost_user + NFS ServerVM disk image + + + + + + + + + + + + + + + + 10 Gb Migration Link + DPDK Testpmd App + DPDK virtio PMD's + Linux, KVM, QEMU 2.5 + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/packet_capture_framework.svg b/src/spdk/dpdk/doc/guides/howto/img/packet_capture_framework.svg new file mode 100644 index 000000000..a76baf71f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/packet_capture_framework.svg @@ -0,0 +1,471 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + DPDK Primary Application + + dpdk-pdumptool + + PCAP PMD + + dpdk_port0 + + librte_pdump + + capture.pcap + + Traffic Generator + + + + + + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/pvp_2nics.svg b/src/spdk/dpdk/doc/guides/howto/img/pvp_2nics.svg new file mode 100644 index 000000000..517a80084 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/pvp_2nics.svg @@ -0,0 +1,556 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + TE + + 10G NIC + Moongen + + DUT + + VM + + 10G NIC + + TestPMD(macswap) + + TestPMD (io) + + + + + + + + 10G NIC + + 10G NIC + + + + + + + + + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/use_models_for_running_dpdk_in_containers.svg b/src/spdk/dpdk/doc/guides/howto/img/use_models_for_running_dpdk_in_containers.svg new file mode 100644 index 000000000..662c22660 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/use_models_for_running_dpdk_in_containers.svg @@ -0,0 +1,398 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + Rectangle + Container + + + + + + + + + + Container + + Rectangle.3 + + + + + + + + + + Rectangle.4 + + + + + + + + + + Sheet.5 + Host kernel + + + + Host kernel + + Sheet.6 + NIC + + + + NIC + + Rectangle.7 + PF + + + + + + + + + + PF + + Rectangle.8 + VF + + + + + + + + + + VF + + Rectangle.10 + Hardware virtual switch + + + + + + + + + + Hardware virtual switch + + Rectangle.14 + Container + + + + + + + + + + Container + + Rectangle.15 + VF + + + + + + + + + + VF + + Dynamic connector.16 + + + + Ellipse + PF driver + + + + + + + + + + PF driver + + Dynamic connector.19 + + + + Ellipse.20 + DPDK + + + + + + + + + + DPDK + + Dynamic connector.21 + + + + Ellipse.22 + DPDK + + + + + + + + + + DPDK + + Rectangle.23 + Virtual Appliance + + + + + + + + + + Virtual Appliance + + Rectangle.25 + VM + Container + + + + + + + + + + VM + Container + + Rectangle.27 + Container + + + + + + + + + + Container + + Ellipse.28 + DPDK + + + + + + + + + + DPDK + + Rectangle.29 + + + + + + + + + + Sheet.30 + Host kernel + + + + Host kernel + + Rectangle.31 + vSwitch or vRouter + + + + + + + + + + vSwitchorvRouter + + Ellipse.32 + DPDK + + + + + + + + + + DPDK + + Dynamic connector + + + + Dynamic connector.35 + + + + Dynamic connector.36 + + + + Rectangle.37 + + + + + + + + + + Rectangle.38 + + + + + + + + + + Sheet.39 + NIC + + + + NIC + + Dynamic connector.41 + + + + Dynamic connector.42 + + + + Sheet.43 + (1) Slicing + + + + (1) Slicing + + Sheet.44 + (2) Aggregation + + + + (2) Aggregation + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/vf_daemon_overview.svg b/src/spdk/dpdk/doc/guides/howto/img/vf_daemon_overview.svg new file mode 100644 index 000000000..6a81f2fb8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/vf_daemon_overview.svg @@ -0,0 +1,408 @@ + + + + + + +image/svg+xmlSimple Double Arrow.14VM +VF Application +DPDK +Virtual ethdev +VF driver +Simple Double Arrow.14Simple Double Arrow.14Host +PF Application +DPDK +Ethdev +PF driver + diff --git a/src/spdk/dpdk/doc/guides/howto/img/virtio_user_as_exceptional_path.svg b/src/spdk/dpdk/doc/guides/howto/img/virtio_user_as_exceptional_path.svg new file mode 100644 index 000000000..b231b709d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/virtio_user_as_exceptional_path.svg @@ -0,0 +1,207 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + Rectangle.23 + + + + + + + + + + Dynamic connector.42 + + + + Rectangle.45 + tap + + + + + + + + + + tap + + Rectangle.46 + vhost ko + + + + + + + + + + vhost ko + + Sheet.47 + Kernel space + + + + Kernel space + + Sheet.48 + User space + + + + User space + + Rectangle.49 + ETHDEV + + + + + + + + + + ETHDEV + + Rectangle.50 + virtio PMD + + + + + + + + + + virtio PMD + + Rectangle.51 + other PMDs + + + + + + + + + + other PMDs + + Rectangle.52 + virtio-user + + + + + + + + + + virtio-user + + Rectangle.53 + vhost adapter + + + + + + + + + + vhost adapter + + Dynamic connector + + + + Dynamic connector.55 + + + + Rectangle.38 + + + + + + + + + + Sheet.57 + NIC + + + + NIC + + Dynamic connector.41 + + + + diff --git a/src/spdk/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg b/src/spdk/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg new file mode 100644 index 000000000..de8080664 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg @@ -0,0 +1,685 @@ + +image/svg+xmlPage-1Rectangle.23Rectangle.49ETHDEVethdev +Rectangle.50virtio PMDvirtio PMD +Rectangle.52virtio-user (virtual device)virtio-user(virtual device) +Rectangle.53vhost-user adapterRectangle.38Sheet.57NICNIC +Rectangle.59virtio (PCI device)virtio(PCI device) +Rectangle.60vSwitch or vRoutervSwitchorvRouter +Sheet.61DPDKDPDK +Rectangle.62Sheet.63Contanier/AppContainer/App +Rectangle.64Rectangle.65Sheet.66virtiovirtio +Sheet.67vhostvhost +Dynamic connectorDynamic connector.70Dynamic connector.72Rectangle.71unix socket fileunix socket file +vhost-user +adapter + \ No newline at end of file diff --git a/src/spdk/dpdk/doc/guides/howto/index.rst b/src/spdk/dpdk/doc/guides/howto/index.rst new file mode 100644 index 000000000..5a97ea508 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/index.rst @@ -0,0 +1,22 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +HowTo Guides +============ + +.. toctree:: + :maxdepth: 2 + :numbered: + + lm_bond_virtio_sriov + lm_virtio_vhost_user + flow_bifurcation + rte_flow + pvp_reference_benchmark + vfd + virtio_user_for_container_networking + virtio_user_as_exceptional_path + packet_capture_framework + telemetry + debug_troubleshoot + openwrt diff --git a/src/spdk/dpdk/doc/guides/howto/lm_bond_virtio_sriov.rst b/src/spdk/dpdk/doc/guides/howto/lm_bond_virtio_sriov.rst new file mode 100644 index 000000000..07563b3e2 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/lm_bond_virtio_sriov.rst @@ -0,0 +1,686 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +Live Migration of VM with SR-IOV VF +=================================== + +Overview +-------- + +It is not possible to migrate a Virtual Machine which has an SR-IOV Virtual Function (VF). + +To get around this problem the bonding PMD is used. + +The following sections show an example of how to do this. + +Test Setup +---------- + +A bonded device is created in the VM. +The virtio and VF PMD's are added as slaves to the bonded device. +The VF is set as the primary slave of the bonded device. + +A bridge must be set up on the Host connecting the tap device, which is the +backend of the Virtio device and the Physical Function (PF) device. + +To test the Live Migration two servers with identical operating systems installed are used. +KVM and Qemu 2.3 is also required on the servers. + +In this example, the servers have Niantic and or Fortville NIC's installed. +The NIC's on both servers are connected to a switch +which is also connected to the traffic generator. + +The switch is configured to broadcast traffic on all the NIC ports. +A :ref:`Sample switch configuration ` +can be found in this section. + +The host is running the Kernel PF driver (ixgbe or i40e). + +The ip address of host_server_1 is 10.237.212.46 + +The ip address of host_server_2 is 10.237.212.131 + +.. _figure_lm_bond_virtio_sriov: + +.. figure:: img/lm_bond_virtio_sriov.* + +Live Migration steps +-------------------- + +The sample scripts mentioned in the steps below can be found in the +:ref:`Sample host scripts ` and +:ref:`Sample VM scripts ` sections. + +On host_server_1: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./setup_vf_on_212_46.sh + +For Fortville NIC + +.. code-block:: console + + ./vm_virtio_vf_i40e_212_46.sh + +For Niantic NIC + +.. code-block:: console + + ./vm_virtio_vf_one_212_46.sh + +On host_server_1: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./setup_bridge_on_212_46.sh + ./connect_to_qemu_mon_on_host.sh + (qemu) + +On host_server_1: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In VM on host_server_1:** + +.. code-block:: console + + cd /root/dpdk/vm_scripts + ./setup_dpdk_in_vm.sh + ./run_testpmd_bonding_in_vm.sh + + testpmd> show port info all + +The ``mac_addr`` command only works with kernel PF for Niantic + +.. code-block:: console + + testpmd> mac_addr add port 1 vf 0 AA:BB:CC:DD:EE:FF + +The syntax of the ``testpmd`` command is: + +Create bonded device (mode) (socket). + +Mode 1 is active backup. + +Virtio is port 0 (P0). + +VF is port 1 (P1). + +Bonding is port 2 (P2). + +.. code-block:: console + + testpmd> create bonded device 1 0 + Created new bonded device net_bond_testpmd_0 on (port 2). + testpmd> add bonding slave 0 2 + testpmd> add bonding slave 1 2 + testpmd> show bonding config 2 + +The syntax of the ``testpmd`` command is: + +set bonding primary (slave id) (port id) + +Set primary to P1 before starting bonding port. + +.. code-block:: console + + testpmd> set bonding primary 1 2 + testpmd> show bonding config 2 + testpmd> port start 2 + Port 2: 02:09:C0:68:99:A5 + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Port 2 Link Up - speed 10000 Mbps - full-duplex + + testpmd> show bonding config 2 + +Primary is now P1. There are 2 active slaves. + +Use P2 only for forwarding. + +.. code-block:: console + + testpmd> set portlist 2 + testpmd> show config fwd + testpmd> set fwd mac + testpmd> start + testpmd> show bonding config 2 + +Primary is now P1. There are 2 active slaves. + +.. code-block:: console + + testpmd> show port stats all + +VF traffic is seen at P1 and P2. + +.. code-block:: console + + testpmd> clear port stats all + testpmd> set bonding primary 0 2 + testpmd> remove bonding slave 1 2 + testpmd> show bonding config 2 + +Primary is now P0. There is 1 active slave. + +.. code-block:: console + + testpmd> clear port stats all + testpmd> show port stats all + +No VF traffic is seen at P0 and P2, VF MAC address still present. + +.. code-block:: console + + testpmd> port stop 1 + testpmd> port close 1 + +Port close should remove VF MAC address, it does not remove perm_addr. + +The ``mac_addr`` command only works with the kernel PF for Niantic. + +.. code-block:: console + + testpmd> mac_addr remove 1 AA:BB:CC:DD:EE:FF + testpmd> port detach 1 + Port '0000:00:04.0' is detached. Now total ports is 2 + testpmd> show port stats all + +No VF traffic is seen at P0 and P2. + +On host_server_1: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: console + + (qemu) device_del vf1 + + +On host_server_1: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In VM on host_server_1:** + +.. code-block:: console + + testpmd> show bonding config 2 + +Primary is now P0. There is 1 active slave. + +.. code-block:: console + + testpmd> show port info all + testpmd> show port stats all + +On host_server_2: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./setup_vf_on_212_131.sh + ./vm_virtio_one_migrate.sh + +On host_server_2: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: console + + ./setup_bridge_on_212_131.sh + ./connect_to_qemu_mon_on_host.sh + (qemu) info status + VM status: paused (inmigrate) + (qemu) + +On host_server_1: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check that the switch is up before migrating. + +.. code-block:: console + + (qemu) migrate tcp:10.237.212.131:5555 + (qemu) info status + VM status: paused (postmigrate) + +For the Niantic NIC. + +.. code-block:: console + + (qemu) info migrate + capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: off + Migration status: completed + total time: 11834 milliseconds + downtime: 18 milliseconds + setup: 3 milliseconds + transferred ram: 389137 kbytes + throughput: 269.49 mbps + remaining ram: 0 kbytes + total ram: 1590088 kbytes + duplicate: 301620 pages + skipped: 0 pages + normal: 96433 pages + normal bytes: 385732 kbytes + dirty sync count: 2 + (qemu) quit + +For the Fortville NIC. + +.. code-block:: console + + (qemu) info migrate + capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: off + Migration status: completed + total time: 11619 milliseconds + downtime: 5 milliseconds + setup: 7 milliseconds + transferred ram: 379699 kbytes + throughput: 267.82 mbps + remaining ram: 0 kbytes + total ram: 1590088 kbytes + duplicate: 303985 pages + skipped: 0 pages + normal: 94073 pages + normal bytes: 376292 kbytes + dirty sync count: 2 + (qemu) quit + +On host_server_2: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In VM on host_server_2:** + + Hit Enter key. This brings the user to the testpmd prompt. + +.. code-block:: console + + testpmd> + +On host_server_2: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: console + + (qemu) info status + VM status: running + +For the Niantic NIC. + +.. code-block:: console + + (qemu) device_add pci-assign,host=06:10.0,id=vf1 + +For the Fortville NIC. + +.. code-block:: console + + (qemu) device_add pci-assign,host=03:02.0,id=vf1 + +On host_server_2: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In VM on host_server_2:** + +.. code-block:: console + + testpmd> show port info all + testpmd> show port stats all + testpmd> show bonding config 2 + testpmd> port attach 0000:00:04.0 + Port 1 is attached. + Now total ports is 3 + Done + + testpmd> port start 1 + +The ``mac_addr`` command only works with the Kernel PF for Niantic. + +.. code-block:: console + + testpmd> mac_addr add port 1 vf 0 AA:BB:CC:DD:EE:FF + testpmd> show port stats all. + testpmd> show config fwd + testpmd> show bonding config 2 + testpmd> add bonding slave 1 2 + testpmd> set bonding primary 1 2 + testpmd> show bonding config 2 + testpmd> show port stats all + +VF traffic is seen at P1 (VF) and P2 (Bonded device). + +.. code-block:: console + + testpmd> remove bonding slave 0 2 + testpmd> show bonding config 2 + testpmd> port stop 0 + testpmd> port close 0 + testpmd> port detach 0 + Port '0000:00:03.0' is detached. Now total ports is 2 + + testpmd> show port info all + testpmd> show config fwd + testpmd> show port stats all + +VF traffic is seen at P1 (VF) and P2 (Bonded device). + +.. _lm_bond_virtio_sriov_host_scripts: + +Sample host scripts +------------------- + +setup_vf_on_212_46.sh +~~~~~~~~~~~~~~~~~~~~~ +Set up Virtual Functions on host_server_1 + +.. code-block:: sh + + #!/bin/sh + # This script is run on the host 10.237.212.46 to setup the VF + + # set up Niantic VF + cat /sys/bus/pci/devices/0000\:09\:00.0/sriov_numvfs + echo 1 > /sys/bus/pci/devices/0000\:09\:00.0/sriov_numvfs + cat /sys/bus/pci/devices/0000\:09\:00.0/sriov_numvfs + rmmod ixgbevf + + # set up Fortville VF + cat /sys/bus/pci/devices/0000\:02\:00.0/sriov_numvfs + echo 1 > /sys/bus/pci/devices/0000\:02\:00.0/sriov_numvfs + cat /sys/bus/pci/devices/0000\:02\:00.0/sriov_numvfs + rmmod i40evf + +vm_virtio_vf_one_212_46.sh +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Setup Virtual Machine on host_server_1 + +.. code-block:: sh + + #!/bin/sh + + # Path to KVM tool + KVM_PATH="/usr/bin/qemu-system-x86_64" + + # Guest Disk image + DISK_IMG="/home/username/disk_image/virt1_sml.disk" + + # Number of guest cpus + VCPUS_NR="4" + + # Memory + MEM=1536 + + taskset -c 1-5 $KVM_PATH \ + -enable-kvm \ + -m $MEM \ + -smp $VCPUS_NR \ + -cpu host \ + -name VM1 \ + -no-reboot \ + -net none \ + -vnc none -nographic \ + -hda $DISK_IMG \ + -netdev type=tap,id=net1,script=no,downscript=no,ifname=tap1 \ + -device virtio-net-pci,netdev=net1,mac=CC:BB:BB:BB:BB:BB \ + -device pci-assign,host=09:10.0,id=vf1 \ + -monitor telnet::3333,server,nowait + +setup_bridge_on_212_46.sh +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Setup bridge on host_server_1 + +.. code-block:: sh + + #!/bin/sh + # This script is run on the host 10.237.212.46 to setup the bridge + # for the Tap device and the PF device. + # This enables traffic to go from the PF to the Tap to the Virtio PMD in the VM. + + # ens3f0 is the Niantic NIC + # ens6f0 is the Fortville NIC + + ifconfig ens3f0 down + ifconfig tap1 down + ifconfig ens6f0 down + ifconfig virbr0 down + + brctl show virbr0 + brctl addif virbr0 ens3f0 + brctl addif virbr0 ens6f0 + brctl addif virbr0 tap1 + brctl show virbr0 + + ifconfig ens3f0 up + ifconfig tap1 up + ifconfig ens6f0 up + ifconfig virbr0 up + +connect_to_qemu_mon_on_host.sh +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #!/bin/sh + # This script is run on both hosts when the VM is up, + # to connect to the Qemu Monitor. + + telnet 0 3333 + +setup_vf_on_212_131.sh +~~~~~~~~~~~~~~~~~~~~~~ + +Set up Virtual Functions on host_server_2 + +.. code-block:: sh + + #!/bin/sh + # This script is run on the host 10.237.212.131 to setup the VF + + # set up Niantic VF + cat /sys/bus/pci/devices/0000\:06\:00.0/sriov_numvfs + echo 1 > /sys/bus/pci/devices/0000\:06\:00.0/sriov_numvfs + cat /sys/bus/pci/devices/0000\:06\:00.0/sriov_numvfs + rmmod ixgbevf + + # set up Fortville VF + cat /sys/bus/pci/devices/0000\:03\:00.0/sriov_numvfs + echo 1 > /sys/bus/pci/devices/0000\:03\:00.0/sriov_numvfs + cat /sys/bus/pci/devices/0000\:03\:00.0/sriov_numvfs + rmmod i40evf + +vm_virtio_one_migrate.sh +~~~~~~~~~~~~~~~~~~~~~~~~ + +Setup Virtual Machine on host_server_2 + +.. code-block:: sh + + #!/bin/sh + # Start the VM on host_server_2 with the same parameters except without the VF + # parameters, as the VM on host_server_1, in migration-listen mode + # (-incoming tcp:0:5555) + + # Path to KVM tool + KVM_PATH="/usr/bin/qemu-system-x86_64" + + # Guest Disk image + DISK_IMG="/home/username/disk_image/virt1_sml.disk" + + # Number of guest cpus + VCPUS_NR="4" + + # Memory + MEM=1536 + + taskset -c 1-5 $KVM_PATH \ + -enable-kvm \ + -m $MEM \ + -smp $VCPUS_NR \ + -cpu host \ + -name VM1 \ + -no-reboot \ + -net none \ + -vnc none -nographic \ + -hda $DISK_IMG \ + -netdev type=tap,id=net1,script=no,downscript=no,ifname=tap1 \ + -device virtio-net-pci,netdev=net1,mac=CC:BB:BB:BB:BB:BB \ + -incoming tcp:0:5555 \ + -monitor telnet::3333,server,nowait + +setup_bridge_on_212_131.sh +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Setup bridge on host_server_2 + +.. code-block:: sh + + #!/bin/sh + # This script is run on the host to setup the bridge + # for the Tap device and the PF device. + # This enables traffic to go from the PF to the Tap to the Virtio PMD in the VM. + + # ens4f0 is the Niantic NIC + # ens5f0 is the Fortville NIC + + ifconfig ens4f0 down + ifconfig tap1 down + ifconfig ens5f0 down + ifconfig virbr0 down + + brctl show virbr0 + brctl addif virbr0 ens4f0 + brctl addif virbr0 ens5f0 + brctl addif virbr0 tap1 + brctl show virbr0 + + ifconfig ens4f0 up + ifconfig tap1 up + ifconfig ens5f0 up + ifconfig virbr0 up + +.. _lm_bond_virtio_sriov_vm_scripts: + +Sample VM scripts +----------------- + +setup_dpdk_in_vm.sh +~~~~~~~~~~~~~~~~~~~ + +Set up DPDK in the Virtual Machine + +.. code-block:: sh + + #!/bin/sh + # this script matches the vm_virtio_vf_one script + # virtio port is 03 + # vf port is 04 + + cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + + ifconfig -a + /root/dpdk/usertools/dpdk-devbind.py --status + + rmmod virtio-pci ixgbevf + + modprobe uio + insmod /root/dpdk/x86_64-default-linux-gcc/kmod/igb_uio.ko + + /root/dpdk/usertools/dpdk-devbind.py -b igb_uio 0000:00:03.0 + /root/dpdk/usertools/dpdk-devbind.py -b igb_uio 0000:00:04.0 + + /root/dpdk/usertools/dpdk-devbind.py --status + +run_testpmd_bonding_in_vm.sh +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run testpmd in the Virtual Machine. + +.. code-block:: sh + + #!/bin/sh + # Run testpmd in the VM + + # The test system has 8 cpus (0-7), use cpus 2-7 for VM + # Use taskset -pc + + # use for bonding of virtio and vf tests in VM + + /root/dpdk/x86_64-default-linux-gcc/app/testpmd \ + -l 0-3 -n 4 --socket-mem 350 -- --i --port-topology=chained + +.. _lm_bond_virtio_sriov_switch_conf: + +Sample switch configuration +--------------------------- + +The Intel switch is used to connect the traffic generator to the +NIC's on host_server_1 and host_server_2. + +In order to run the switch configuration two console windows are required. + +Log in as root in both windows. + +TestPointShared, run_switch.sh and load /root/switch_config must be executed +in the sequence below. + +On Switch: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~ + +run TestPointShared + +.. code-block:: console + + /usr/bin/TestPointShared + +On Switch: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~ + +execute run_switch.sh + +.. code-block:: console + + /root/run_switch.sh + +On Switch: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~ + +load switch configuration + +.. code-block:: console + + load /root/switch_config + +Sample switch configuration script +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``/root/switch_config`` script: + +.. code-block:: sh + + # TestPoint History + show port 1,5,9,13,17,21,25 + set port 1,5,9,13,17,21,25 up + show port 1,5,9,13,17,21,25 + del acl 1 + create acl 1 + create acl-port-set + create acl-port-set + add port port-set 1 0 + add port port-set 5,9,13,17,21,25 1 + create acl-rule 1 1 + add acl-rule condition 1 1 port-set 1 + add acl-rule action 1 1 redirect 1 + apply acl + create vlan 1000 + add vlan port 1000 1,5,9,13,17,21,25 + set vlan tagging 1000 1,5,9,13,17,21,25 tag + set switch config flood_ucast fwd + show port stats all 1,5,9,13,17,21,25 diff --git a/src/spdk/dpdk/doc/guides/howto/lm_virtio_vhost_user.rst b/src/spdk/dpdk/doc/guides/howto/lm_virtio_vhost_user.rst new file mode 100644 index 000000000..ecb783290 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/lm_virtio_vhost_user.rst @@ -0,0 +1,441 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +Live Migration of VM with Virtio on host running vhost_user +=========================================================== + +Overview +-------- + +Live Migration of a VM with DPDK Virtio PMD on a host which is +running the Vhost sample application (vhost-switch) and using the DPDK PMD (ixgbe or i40e). + +The Vhost sample application uses VMDQ so SRIOV must be disabled on the NIC's. + +The following sections show an example of how to do this migration. + +Test Setup +---------- + +To test the Live Migration two servers with identical operating systems installed are used. +KVM and QEMU is also required on the servers. + +QEMU 2.5 is required for Live Migration of a VM with vhost_user running on the hosts. + +In this example, the servers have Niantic and or Fortville NIC's installed. +The NIC's on both servers are connected to a switch +which is also connected to the traffic generator. + +The switch is configured to broadcast traffic on all the NIC ports. + +The ip address of host_server_1 is 10.237.212.46 + +The ip address of host_server_2 is 10.237.212.131 + +.. _figure_lm_vhost_user: + +.. figure:: img/lm_vhost_user.* + +Live Migration steps +-------------------- + +The sample scripts mentioned in the steps below can be found in the +:ref:`Sample host scripts ` and +:ref:`Sample VM scripts ` sections. + +On host_server_1: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Setup DPDK on host_server_1 + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./setup_dpdk_on_host.sh + +On host_server_1: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Bind the Niantic or Fortville NIC to igb_uio on host_server_1. + +For Fortville NIC. + +.. code-block:: console + + cd /root/dpdk/usertools + ./dpdk-devbind.py -b igb_uio 0000:02:00.0 + +For Niantic NIC. + +.. code-block:: console + + cd /root/dpdk/usertools + ./dpdk-devbind.py -b igb_uio 0000:09:00.0 + +On host_server_1: Terminal 3 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For Fortville and Niantic NIC's reset SRIOV and run the +vhost_user sample application (vhost-switch) on host_server_1. + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./reset_vf_on_212_46.sh + ./run_vhost_switch_on_host.sh + +On host_server_1: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Start the VM on host_server_1 + +.. code-block:: console + + ./vm_virtio_vhost_user.sh + +On host_server_1: Terminal 4 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Connect to the QEMU monitor on host_server_1. + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./connect_to_qemu_mon_on_host.sh + (qemu) + +On host_server_1: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In VM on host_server_1:** + +Setup DPDK in the VM and run testpmd in the VM. + +.. code-block:: console + + cd /root/dpdk/vm_scripts + ./setup_dpdk_in_vm.sh + ./run_testpmd_in_vm.sh + + testpmd> show port info all + testpmd> set fwd mac retry + testpmd> start tx_first + testpmd> show port stats all + +Virtio traffic is seen at P1 and P2. + +On host_server_2: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set up DPDK on the host_server_2. + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./setup_dpdk_on_host.sh + +On host_server_2: Terminal 2 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Bind the Niantic or Fortville NIC to igb_uio on host_server_2. + +For Fortville NIC. + +.. code-block:: console + + cd /root/dpdk/usertools + ./dpdk-devbind.py -b igb_uio 0000:03:00.0 + +For Niantic NIC. + +.. code-block:: console + + cd /root/dpdk/usertools + ./dpdk-devbind.py -b igb_uio 0000:06:00.0 + +On host_server_2: Terminal 3 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For Fortville and Niantic NIC's reset SRIOV, and run +the vhost_user sample application on host_server_2. + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./reset_vf_on_212_131.sh + ./run_vhost_switch_on_host.sh + +On host_server_2: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Start the VM on host_server_2. + +.. code-block:: console + + ./vm_virtio_vhost_user_migrate.sh + +On host_server_2: Terminal 4 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Connect to the QEMU monitor on host_server_2. + +.. code-block:: console + + cd /root/dpdk/host_scripts + ./connect_to_qemu_mon_on_host.sh + (qemu) info status + VM status: paused (inmigrate) + (qemu) + +On host_server_1: Terminal 4 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check that switch is up before migrating the VM. + +.. code-block:: console + + (qemu) migrate tcp:10.237.212.131:5555 + (qemu) info status + VM status: paused (postmigrate) + + (qemu) info migrate + capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: off + Migration status: completed + total time: 11619 milliseconds + downtime: 5 milliseconds + setup: 7 milliseconds + transferred ram: 379699 kbytes + throughput: 267.82 mbps + remaining ram: 0 kbytes + total ram: 1590088 kbytes + duplicate: 303985 pages + skipped: 0 pages + normal: 94073 pages + normal bytes: 376292 kbytes + dirty sync count: 2 + (qemu) quit + +On host_server_2: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In VM on host_server_2:** + + Hit Enter key. This brings the user to the testpmd prompt. + +.. code-block:: console + + testpmd> + +On host_server_2: Terminal 4 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In QEMU monitor on host_server_2** + +.. code-block:: console + + (qemu) info status + VM status: running + +On host_server_2: Terminal 1 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**In VM on host_server_2:** + +.. code-block:: console + + testpmd> show port info all + testpmd> show port stats all + +Virtio traffic is seen at P0 and P1. + + +.. _lm_virtio_vhost_user_host_scripts: + +Sample host scripts +------------------- + +reset_vf_on_212_46.sh +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #!/bin/sh + # This script is run on the host 10.237.212.46 to reset SRIOV + + # BDF for Fortville NIC is 0000:02:00.0 + cat /sys/bus/pci/devices/0000\:02\:00.0/max_vfs + echo 0 > /sys/bus/pci/devices/0000\:02\:00.0/max_vfs + cat /sys/bus/pci/devices/0000\:02\:00.0/max_vfs + + # BDF for Niantic NIC is 0000:09:00.0 + cat /sys/bus/pci/devices/0000\:09\:00.0/max_vfs + echo 0 > /sys/bus/pci/devices/0000\:09\:00.0/max_vfs + cat /sys/bus/pci/devices/0000\:09\:00.0/max_vfs + +vm_virtio_vhost_user.sh +~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #/bin/sh + # Script for use with vhost_user sample application + # The host system has 8 cpu's (0-7) + + # Path to KVM tool + KVM_PATH="/usr/bin/qemu-system-x86_64" + + # Guest Disk image + DISK_IMG="/home/user/disk_image/virt1_sml.disk" + + # Number of guest cpus + VCPUS_NR="6" + + # Memory + MEM=1024 + + VIRTIO_OPTIONS="csum=off,gso=off,guest_tso4=off,guest_tso6=off,guest_ecn=off" + + # Socket Path + SOCKET_PATH="/root/dpdk/host_scripts/usvhost" + + taskset -c 2-7 $KVM_PATH \ + -enable-kvm \ + -m $MEM \ + -smp $VCPUS_NR \ + -object memory-backend-file,id=mem,size=1024M,mem-path=/mnt/huge,share=on \ + -numa node,memdev=mem,nodeid=0 \ + -cpu host \ + -name VM1 \ + -no-reboot \ + -net none \ + -vnc none \ + -nographic \ + -hda $DISK_IMG \ + -chardev socket,id=chr0,path=$SOCKET_PATH \ + -netdev type=vhost-user,id=net1,chardev=chr0,vhostforce \ + -device virtio-net-pci,netdev=net1,mac=CC:BB:BB:BB:BB:BB,$VIRTIO_OPTIONS \ + -chardev socket,id=chr1,path=$SOCKET_PATH \ + -netdev type=vhost-user,id=net2,chardev=chr1,vhostforce \ + -device virtio-net-pci,netdev=net2,mac=DD:BB:BB:BB:BB:BB,$VIRTIO_OPTIONS \ + -monitor telnet::3333,server,nowait + +connect_to_qemu_mon_on_host.sh +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #!/bin/sh + # This script is run on both hosts when the VM is up, + # to connect to the Qemu Monitor. + + telnet 0 3333 + +reset_vf_on_212_131.sh +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #!/bin/sh + # This script is run on the host 10.237.212.131 to reset SRIOV + + # BDF for Niantic NIC is 0000:06:00.0 + cat /sys/bus/pci/devices/0000\:06\:00.0/max_vfs + echo 0 > /sys/bus/pci/devices/0000\:06\:00.0/max_vfs + cat /sys/bus/pci/devices/0000\:06\:00.0/max_vfs + + # BDF for Fortville NIC is 0000:03:00.0 + cat /sys/bus/pci/devices/0000\:03\:00.0/max_vfs + echo 0 > /sys/bus/pci/devices/0000\:03\:00.0/max_vfs + cat /sys/bus/pci/devices/0000\:03\:00.0/max_vfs + +vm_virtio_vhost_user_migrate.sh +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #/bin/sh + # Script for use with vhost user sample application + # The host system has 8 cpu's (0-7) + + # Path to KVM tool + KVM_PATH="/usr/bin/qemu-system-x86_64" + + # Guest Disk image + DISK_IMG="/home/user/disk_image/virt1_sml.disk" + + # Number of guest cpus + VCPUS_NR="6" + + # Memory + MEM=1024 + + VIRTIO_OPTIONS="csum=off,gso=off,guest_tso4=off,guest_tso6=off,guest_ecn=off" + + # Socket Path + SOCKET_PATH="/root/dpdk/host_scripts/usvhost" + + taskset -c 2-7 $KVM_PATH \ + -enable-kvm \ + -m $MEM \ + -smp $VCPUS_NR \ + -object memory-backend-file,id=mem,size=1024M,mem-path=/mnt/huge,share=on \ + -numa node,memdev=mem,nodeid=0 \ + -cpu host \ + -name VM1 \ + -no-reboot \ + -net none \ + -vnc none \ + -nographic \ + -hda $DISK_IMG \ + -chardev socket,id=chr0,path=$SOCKET_PATH \ + -netdev type=vhost-user,id=net1,chardev=chr0,vhostforce \ + -device virtio-net-pci,netdev=net1,mac=CC:BB:BB:BB:BB:BB,$VIRTIO_OPTIONS \ + -chardev socket,id=chr1,path=$SOCKET_PATH \ + -netdev type=vhost-user,id=net2,chardev=chr1,vhostforce \ + -device virtio-net-pci,netdev=net2,mac=DD:BB:BB:BB:BB:BB,$VIRTIO_OPTIONS \ + -incoming tcp:0:5555 \ + -monitor telnet::3333,server,nowait + +.. _lm_virtio_vhost_user_vm_scripts: + +Sample VM scripts +----------------- + +setup_dpdk_virtio_in_vm.sh +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #!/bin/sh + # this script matches the vm_virtio_vhost_user script + # virtio port is 03 + # virtio port is 04 + + cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + + ifconfig -a + /root/dpdk/usertools/dpdk-devbind.py --status + + rmmod virtio-pci + + modprobe uio + insmod /root/dpdk/x86_64-default-linux-gcc/kmod/igb_uio.ko + + /root/dpdk/usertools/dpdk-devbind.py -b igb_uio 0000:00:03.0 + /root/dpdk/usertools/dpdk-devbind.py -b igb_uio 0000:00:04.0 + + /root/dpdk/usertools/dpdk-devbind.py --status + +run_testpmd_in_vm.sh +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sh + + #!/bin/sh + # Run testpmd for use with vhost_user sample app. + # test system has 8 cpus (0-7), use cpus 2-7 for VM + + /root/dpdk/x86_64-default-linux-gcc/app/testpmd \ + -l 0-5 -n 4 --socket-mem 350 -- --burst=64 --i diff --git a/src/spdk/dpdk/doc/guides/howto/openwrt.rst b/src/spdk/dpdk/doc/guides/howto/openwrt.rst new file mode 100644 index 000000000..6081f057b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/openwrt.rst @@ -0,0 +1,163 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Intel Corporation. + +Enable DPDK on OpenWrt +====================== + +This document describes how to enable Data Plane Development Kit (DPDK) on +OpenWrt in both a virtual and physical x86 environment. + +Introduction +------------ + +The OpenWrt project is a well-known source-based router OS which provides a +fully writable filesystem with package management. + +Build OpenWrt +------------- + +You can obtain OpenWrt image through https://downloads.openwrt.org/releases. +To fully customize your own OpenWrt, it is highly recommended to build it from +the source code. You can clone the OpenWrt source code as follows: + +.. code-block:: console + + git clone https://git.openwrt.org/openwrt/openwrt.git + +OpenWrt configuration +~~~~~~~~~~~~~~~~~~~~~ + +* Select ``x86`` in ``Target System`` +* Select ``x86_64`` in ``Subtarget`` +* Select ``Build the OpenWrt SDK`` for cross-compilation environment +* Select ``Use glibc`` in ``Advanced configuration options (for developers)`` + then ``ToolChain Options`` and ``C Library implementation`` + +Kernel configuration +~~~~~~~~~~~~~~~~~~~~ + +The following configurations should be enabled: + +* ``CONFIG_VFIO_IOMMU_TYPE1=y`` +* ``CONFIG_VFIO_VIRQFD=y`` +* ``CONFIG_VFIO=y`` +* ``CONFIG_VFIO_NOIOMMU=y`` +* ``CONFIG_VFIO_PCI=y`` +* ``CONFIG_VFIO_PCI_MMAP=y`` +* ``CONFIG_HUGETLBFS=y`` +* ``CONFIG_HUGETLB_PAGE=y`` +* ``CONFIG_PROC_PAGE_MONITOR=y`` + +Build steps +~~~~~~~~~~~ + +For detailed OpenWrt build steps and prerequisites, please refer to the +`OpenWrt build guide +`_. + +After the build is completed, you can find the images and SDK in +``/bin/targets/x86/64-glibc/``. + + +DPDK Cross Compilation for OpenWrt +---------------------------------- + +Pre-requisites +~~~~~~~~~~~~~~ + +NUMA is required to run DPDK in x86. + +.. note:: + + For compiling the NUMA lib, run ``libtool --version`` to ensure the libtool + version >= 2.2, otherwise the compilation will fail with errors. + +.. code-block:: console + + git clone https://github.com/numactl/numactl.git + cd numactl + git checkout v2.0.13 -b v2.0.13 + ./autogen.sh + autoconf -i + export PATH=/glibc/openwrt-sdk-x86-64_gcc-8.3.0_glibc.Linux-x86_64/staging_dir/toolchain-x86_64_gcc-8.3.0_glibc/bin/:$PATH + ./configure CC=x86_64-openwrt-linux-gnu-gcc --prefix= + make install + +The numa header files and lib file is generated in the include and lib folder +respectively under . + +Build DPDK +~~~~~~~~~~ + +To cross compile with meson build, you need to write a customized cross file +first. + +.. code-block:: console + + [binaries] + c = 'x86_64-openwrt-linux-gcc' + cpp = 'x86_64-openwrt-linux-cpp' + ar = 'x86_64-openwrt-linux-ar' + strip = 'x86_64-openwrt-linux-strip' + + meson builddir --cross-file openwrt-cross + ninja -C builddir + +.. note:: + + For compiling the igb_uio with the kernel version used in target machine, + you need to explicitly specify kernel_dir in meson_options.txt. + +Running DPDK application on OpenWrt +----------------------------------- + +Virtual machine +~~~~~~~~~~~~~~~ + +* Extract the boot image + +.. code-block:: console + + gzip -d openwrt-x86-64-combined-ext4.img.gz + +* Launch Qemu + +.. code-block:: console + + qemu-system-x86_64 \ + -cpu host \ + -smp 8 \ + -enable-kvm \ + -M q35 \ + -m 2048M \ + -object memory-backend-file,id=mem,size=2048M,mem-path=/tmp/hugepages,share=on \ + -drive file=/openwrt-x86-64-combined-ext4.img,id=d0,if=none,bus=0,unit=0 \ + -device ide-hd,drive=d0,bus=ide.0 \ + -net nic,vlan=0 \ + -net nic,vlan=1 \ + -net user,vlan=1 \ + -display none \ + + +Physical machine +~~~~~~~~~~~~~~~~ + +You can use the ``dd`` tool to write the OpenWrt image to the drive you +want to write the image on. + +.. code-block:: console + + dd if=openwrt-18.06.1-x86-64-combined-squashfs.img of=/dev/sdX + +Where sdX is name of the drive. (You can find it though ``fdisk -l``) + +Running DPDK +~~~~~~~~~~~~ + +More detailed info about how to run a DPDK application please refer to +``Running DPDK Applications`` section of :ref:`the DPDK documentation `. + +.. note:: + + You need to install pre-built NUMA libraries (including soft link) + to /usr/lib64 in OpenWrt. diff --git a/src/spdk/dpdk/doc/guides/howto/packet_capture_framework.rst b/src/spdk/dpdk/doc/guides/howto/packet_capture_framework.rst new file mode 100644 index 000000000..946a21c8e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/packet_capture_framework.rst @@ -0,0 +1,111 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +DPDK pdump Library and pdump Tool +================================= + +This document describes how the Data Plane Development Kit (DPDK) Packet +Capture Framework is used for capturing packets on DPDK ports. It is intended +for users of DPDK who want to know more about the Packet Capture feature and +for those who want to monitor traffic on DPDK-controlled devices. + +The DPDK packet capture framework was introduced in DPDK v16.07. The DPDK +packet capture framework consists of the DPDK pdump library and DPDK pdump +tool. + + +Introduction +------------ + +The :ref:`librte_pdump ` library provides the APIs required to +allow users to initialize the packet capture framework and to enable or +disable packet capture. The library works on a client/server model and its +usage is recommended for debugging purposes. + +The :ref:`dpdk-pdump ` tool is developed based on the +``librte_pdump`` library. It runs as a DPDK secondary process and is capable +of enabling or disabling packet capture on DPDK ports. The ``dpdk-pdump`` tool +provides command-line options with which users can request enabling or +disabling of the packet capture on DPDK ports. + +The application which initializes the packet capture framework will act as a +server and the application that enables or disables the packet capture will +act as a client. The server sends the Rx and Tx packets from the DPDK ports +to the client. + +In DPDK the ``testpmd`` application can be used to initialize the packet +capture framework and act as a server, and the ``dpdk-pdump`` tool acts as a +client. To view Rx or Tx packets of ``testpmd``, the application should be +launched first, and then the ``dpdk-pdump`` tool. Packets from ``testpmd`` +will be sent to the tool, which then sends them on to the Pcap PMD device and +that device writes them to the Pcap file or to an external interface depending +on the command-line option used. + +Some things to note: + +* The ``dpdk-pdump`` tool can only be used in conjunction with a primary + application which has the packet capture framework initialized already. In + dpdk, only ``testpmd`` is modified to initialize packet capture framework, + other applications remain untouched. So, if the ``dpdk-pdump`` tool has to + be used with any application other than the testpmd, the user needs to + explicitly modify that application to call the packet capture framework + initialization code. Refer to the ``app/test-pmd/testpmd.c`` code and look + for ``pdump`` keyword to see how this is done. + +* The ``dpdk-pdump`` tool depends on the libpcap based PMD which is disabled + by default in the build configuration files, owing to an external dependency + on the libpcap development files. Once the libpcap development files are + installed, the libpcap based PMD can be enabled by setting + ``CONFIG_RTE_LIBRTE_PMD_PCAP=y`` and recompiling the DPDK. + + +Test Environment +---------------- + +The overview of using the Packet Capture Framework and the ``dpdk-pdump`` tool +for packet capturing on the DPDK port in +:numref:`figure_packet_capture_framework`. + +.. _figure_packet_capture_framework: + +.. figure:: img/packet_capture_framework.* + + Packet capturing on a DPDK port using the dpdk-pdump tool. + + +Configuration +------------- + +Modify the DPDK primary application to initialize the packet capture framework +as mentioned in the above notes and enable the following config options and +build DPDK:: + + CONFIG_RTE_LIBRTE_PMD_PCAP=y + CONFIG_RTE_LIBRTE_PDUMP=y + + +Running the Application +----------------------- + +The following steps demonstrate how to run the ``dpdk-pdump`` tool to capture +Rx side packets on dpdk_port0 in :numref:`figure_packet_capture_framework` and +inspect them using ``tcpdump``. + +#. Launch testpmd as the primary application:: + + sudo ./app/testpmd -c 0xf0 -n 4 -- -i --port-topology=chained + +#. Launch the pdump tool as follows:: + + sudo ./build/app/dpdk-pdump -- \ + --pdump 'port=0,queue=*,rx-dev=/tmp/capture.pcap' + +#. Send traffic to dpdk_port0 from traffic generator. + Inspect packets captured in the file capture.pcap using a tool + that can interpret Pcap files, for example tcpdump:: + + $tcpdump -nr /tmp/capture.pcap + reading from file /tmp/capture.pcap, link-type EN10MB (Ethernet) + 11:11:36.891404 IP 4.4.4.4.whois++ > 3.3.3.3.whois++: UDP, length 18 + 11:11:36.891442 IP 4.4.4.4.whois++ > 3.3.3.3.whois++: UDP, length 18 + 11:11:36.891445 IP 4.4.4.4.whois++ > 3.3.3.3.whois++: UDP, length 18 diff --git a/src/spdk/dpdk/doc/guides/howto/pvp_reference_benchmark.rst b/src/spdk/dpdk/doc/guides/howto/pvp_reference_benchmark.rst new file mode 100644 index 000000000..64b1f4d8e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/pvp_reference_benchmark.rst @@ -0,0 +1,372 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2016 Red Hat, Inc. + + + +PVP reference benchmark setup using testpmd +=========================================== + +This guide lists the steps required to setup a PVP benchmark using testpmd as +a simple forwarder between NICs and Vhost interfaces. The goal of this setup +is to have a reference PVP benchmark without using external vSwitches (OVS, +VPP, ...) to make it easier to obtain reproducible results and to facilitate +continuous integration testing. + +The guide covers two ways of launching the VM, either by directly calling the +QEMU command line, or by relying on libvirt. It has been tested with DPDK +v16.11 using RHEL7 for both host and guest. + + +Setup overview +-------------- + +.. _figure_pvp_2nics: + +.. figure:: img/pvp_2nics.* + + PVP setup using 2 NICs + +In this diagram, each red arrow represents one logical core. This use-case +requires 6 dedicated logical cores. A forwarding configuration with a single +NIC is also possible, requiring 3 logical cores. + + +Host setup +---------- + +In this setup, we isolate 6 cores (from CPU2 to CPU7) on the same NUMA +node. Two cores are assigned to the VM vCPUs running testpmd and four are +assigned to testpmd on the host. + + +Host tuning +~~~~~~~~~~~ + +#. On BIOS, disable turbo-boost and hyper-threads. + +#. Append these options to Kernel command line: + + .. code-block:: console + + intel_pstate=disable mce=ignore_ce default_hugepagesz=1G hugepagesz=1G hugepages=6 isolcpus=2-7 rcu_nocbs=2-7 nohz_full=2-7 iommu=pt intel_iommu=on + +#. Disable hyper-threads at runtime if necessary or if BIOS is not accessible: + + .. code-block:: console + + cat /sys/devices/system/cpu/cpu*[0-9]/topology/thread_siblings_list \ + | sort | uniq \ + | awk -F, '{system("echo 0 > /sys/devices/system/cpu/cpu"$2"/online")}' + +#. Disable NMIs: + + .. code-block:: console + + echo 0 > /proc/sys/kernel/nmi_watchdog + +#. Exclude isolated CPUs from the writeback cpumask: + + .. code-block:: console + + echo ffffff03 > /sys/bus/workqueue/devices/writeback/cpumask + +#. Isolate CPUs from IRQs: + + .. code-block:: console + + clear_mask=0xfc #Isolate CPU2 to CPU7 from IRQs + for i in /proc/irq/*/smp_affinity + do + echo "obase=16;$(( 0x$(cat $i) & ~$clear_mask ))" | bc > $i + done + + +Qemu build +~~~~~~~~~~ + +Build Qemu: + + .. code-block:: console + + git clone git://git.qemu.org/qemu.git + cd qemu + mkdir bin + cd bin + ../configure --target-list=x86_64-softmmu + make + + +DPDK build +~~~~~~~~~~ + +Build DPDK: + + .. code-block:: console + + git clone git://dpdk.org/dpdk + cd dpdk + export RTE_SDK=$PWD + make install T=x86_64-native-linux-gcc DESTDIR=install + + +Testpmd launch +~~~~~~~~~~~~~~ + +#. Assign NICs to DPDK: + + .. code-block:: console + + modprobe vfio-pci + $RTE_SDK/install/sbin/dpdk-devbind -b vfio-pci 0000:11:00.0 0000:11:00.1 + + .. Note:: + + The Sandy Bridge family seems to have some IOMMU limitations giving poor + performance results. To achieve good performance on these machines + consider using UIO instead. + +#. Launch the testpmd application: + + .. code-block:: console + + $RTE_SDK/install/bin/testpmd -l 0,2,3,4,5 --socket-mem=1024 -n 4 \ + --vdev 'net_vhost0,iface=/tmp/vhost-user1' \ + --vdev 'net_vhost1,iface=/tmp/vhost-user2' -- \ + --portmask=f -i --rxq=1 --txq=1 \ + --nb-cores=4 --forward-mode=io + + With this command, isolated CPUs 2 to 5 will be used as lcores for PMD threads. + +#. In testpmd interactive mode, set the portlist to obtain the correct port + chaining: + + .. code-block:: console + + set portlist 0,2,1,3 + start + + +VM launch +~~~~~~~~~ + +The VM may be launched either by calling QEMU directly, or by using libvirt. + +Qemu way +^^^^^^^^ + +Launch QEMU with two Virtio-net devices paired to the vhost-user sockets +created by testpmd. Below example uses default Virtio-net options, but options +may be specified, for example to disable mergeable buffers or indirect +descriptors. + + .. code-block:: console + + /bin/x86_64-softmmu/qemu-system-x86_64 \ + -enable-kvm -cpu host -m 3072 -smp 3 \ + -chardev socket,id=char0,path=/tmp/vhost-user1 \ + -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce \ + -device virtio-net-pci,netdev=mynet1,mac=52:54:00:02:d9:01,addr=0x10 \ + -chardev socket,id=char1,path=/tmp/vhost-user2 \ + -netdev type=vhost-user,id=mynet2,chardev=char1,vhostforce \ + -device virtio-net-pci,netdev=mynet2,mac=52:54:00:02:d9:02,addr=0x11 \ + -object memory-backend-file,id=mem,size=3072M,mem-path=/dev/hugepages,share=on \ + -numa node,memdev=mem -mem-prealloc \ + -net user,hostfwd=tcp::1002$1-:22 -net nic \ + -qmp unix:/tmp/qmp.socket,server,nowait \ + -monitor stdio .qcow2 + +You can use this `qmp-vcpu-pin `_ +script to pin vCPUs. + +It can be used as follows, for example to pin 3 vCPUs to CPUs 1, 6 and 7, +where isolated CPUs 6 and 7 will be used as lcores for Virtio PMDs: + + .. code-block:: console + + export PYTHONPATH=$PYTHONPATH:/scripts/qmp + ./qmp-vcpu-pin -s /tmp/qmp.socket 1 6 7 + +Libvirt way +^^^^^^^^^^^ + +Some initial steps are required for libvirt to be able to connect to testpmd's +sockets. + +First, SELinux policy needs to be set to permissive, since testpmd is +generally run as root (note, as reboot is required): + + .. code-block:: console + + cat /etc/selinux/config + + # This file controls the state of SELinux on the system. + # SELINUX= can take one of these three values: + # enforcing - SELinux security policy is enforced. + # permissive - SELinux prints warnings instead of enforcing. + # disabled - No SELinux policy is loaded. + SELINUX=permissive + + # SELINUXTYPE= can take one of three two values: + # targeted - Targeted processes are protected, + # minimum - Modification of targeted policy. + # Only selected processes are protected. + # mls - Multi Level Security protection. + SELINUXTYPE=targeted + + +Also, Qemu needs to be run as root, which has to be specified in +``/etc/libvirt/qemu.conf``: + + .. code-block:: console + + user = "root" + +Once the domain created, the following snippet is an extract of he most +important information (hugepages, vCPU pinning, Virtio PCI devices): + + .. code-block:: xml + + + 3145728 + 3145728 + + + + + + + 3 + + + + + + + + + + + hvm + + + + + + + + + + + + + + +
+ + + + + + +
+ + + + + +Guest setup +----------- + + +Guest tuning +~~~~~~~~~~~~ + +#. Append these options to the Kernel command line: + + .. code-block:: console + + default_hugepagesz=1G hugepagesz=1G hugepages=1 intel_iommu=on iommu=pt isolcpus=1,2 rcu_nocbs=1,2 nohz_full=1,2 + +#. Disable NMIs: + + .. code-block:: console + + echo 0 > /proc/sys/kernel/nmi_watchdog + +#. Exclude isolated CPU1 and CPU2 from the writeback cpumask: + + .. code-block:: console + + echo 1 > /sys/bus/workqueue/devices/writeback/cpumask + +#. Isolate CPUs from IRQs: + + .. code-block:: console + + clear_mask=0x6 #Isolate CPU1 and CPU2 from IRQs + for i in /proc/irq/*/smp_affinity + do + echo "obase=16;$(( 0x$(cat $i) & ~$clear_mask ))" | bc > $i + done + + +DPDK build +~~~~~~~~~~ + +Build DPDK: + + .. code-block:: console + + git clone git://dpdk.org/dpdk + cd dpdk + export RTE_SDK=$PWD + make install T=x86_64-native-linux-gcc DESTDIR=install + + +Testpmd launch +~~~~~~~~~~~~~~ + +Probe vfio module without iommu: + + .. code-block:: console + + modprobe -r vfio_iommu_type1 + modprobe -r vfio + modprobe vfio enable_unsafe_noiommu_mode=1 + cat /sys/module/vfio/parameters/enable_unsafe_noiommu_mode + modprobe vfio-pci + +Bind the virtio-net devices to DPDK: + + .. code-block:: console + + $RTE_SDK/usertools/dpdk-devbind.py -b vfio-pci 0000:00:10.0 0000:00:11.0 + +Start testpmd: + + .. code-block:: console + + $RTE_SDK/install/bin/testpmd -l 0,1,2 --socket-mem 1024 -n 4 \ + --proc-type auto --file-prefix pg -- \ + --portmask=3 --forward-mode=macswap --port-topology=chained \ + --disable-rss -i --rxq=1 --txq=1 \ + --rxd=256 --txd=256 --nb-cores=2 --auto-start + +Results template +---------------- + +Below template should be used when sharing results: + + .. code-block:: none + + Traffic Generator: + Acceptable Loss: % + Validation run time: min + Host DPDK version/commit: + Guest DPDK version/commit: + Patches applied: + QEMU version/commit: + Virtio features: + CPU: , + NIC: + Result: Mpps diff --git a/src/spdk/dpdk/doc/guides/howto/rte_flow.rst b/src/spdk/dpdk/doc/guides/howto/rte_flow.rst new file mode 100644 index 000000000..27d4f28f7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/rte_flow.rst @@ -0,0 +1,305 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2017 Mellanox Technologies, Ltd + +Generic flow API - examples +=========================== + +This document demonstrates some concrete examples for programming flow rules +with the ``rte_flow`` APIs. + +* Detail of the rte_flow APIs can be found in the following link: + :doc:`../prog_guide/rte_flow`. + +* Details of the TestPMD commands to set the flow rules can be found in the + following link: :ref:`TestPMD Flow rules ` + +Simple IPv4 drop +---------------- + +Description +~~~~~~~~~~~ + +In this example we will create a simple rule that drops packets whose IPv4 +destination equals 192.168.3.2. This code is equivalent to the following +testpmd command (wrapped for clarity):: + + testpmd> flow create 0 ingress pattern eth / vlan / + ipv4 dst is 192.168.3.2 / end actions drop / end + +Code +~~~~ + +.. code-block:: c + + /* create the attribute structure */ + struct rte_flow_attr attr = { .ingress = 1 }; + struct rte_flow_item pattern[MAX_PATTERN_IN_FLOW]; + struct rte_flow_action actions[MAX_ACTIONS_IN_FLOW]; + struct rte_flow_item_eth eth; + struct rte_flow_item_vlan vlan; + struct rte_flow_item_ipv4 ipv4; + struct rte_flow *flow; + struct rte_flow_error error; + + /* setting the eth to pass all packets */ + pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; + pattern[0].spec = ð + + /* set the vlan to pass all packets */ + pattern[1] = RTE_FLOW_ITEM_TYPE_VLAN; + pattern[1].spec = &vlan; + + /* set the dst ipv4 packet to the required value */ + ipv4.hdr.dst_addr = htonl(0xc0a80302); + pattern[2].type = RTE_FLOW_ITEM_TYPE_IPV4; + pattern[2].spec = &ipv4; + + /* end the pattern array */ + pattern[3].type = RTE_FLOW_ITEM_TYPE_END; + + /* create the drop action */ + actions[0].type = RTE_FLOW_ACTION_TYPE_DROP; + actions[1].type = RTE_FLOW_ACTION_TYPE_END; + + /* validate and create the flow rule */ + if (!rte_flow_validate(port_id, &attr, pattern, actions, &error)) + flow = rte_flow_create(port_id, &attr, pattern, actions, &error); + +Output +~~~~~~ + +Terminal 1: running sample app with the flow rule disabled:: + + ./filter-program disable + [waiting for packets] + +Terminal 2: running scapy:: + + $scapy + welcome to Scapy + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.4', dst='192.168.3.1'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.5', dst='192.168.3.2'), \ + iface='some interface', count=1) + +Terminal 1: output log:: + + received packet with src ip = 176.80.50.4 + received packet with src ip = 176.80.50.5 + +Terminal 1: running sample the app flow rule enabled:: + + ./filter-program enabled + [waiting for packets] + +Terminal 2: running scapy:: + + $scapy + welcome to Scapy + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.4', dst='192.168.3.1'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.5', dst ='192.168.3.2'), \ + iface='some interface', count=1) + +Terminal 1: output log:: + + received packet with src ip = 176.80.50.4 + +Range IPv4 drop +---------------- + +Description +~~~~~~~~~~~ + +In this example we will create a simple rule that drops packets whose IPv4 +destination is in the range 192.168.3.0 to 192.168.3.255. This is done using +a mask. + +This code is equivalent to the following testpmd command (wrapped for +clarity):: + + testpmd> flow create 0 ingress pattern eth / vlan / + ipv4 dst spec 192.168.3.0 dst mask 255.255.255.0 / + end actions drop / end + +Code +~~~~ + +.. code-block:: c + + struct rte_flow_attr attr = {.ingress = 1}; + struct rte_flow_item pattern[MAX_PATTERN_IN_FLOW]; + struct rte_flow_action actions[MAX_ACTIONS_IN_FLOW]; + struct rte_flow_item_eth eth; + struct rte_flow_item_vlan vlan; + struct rte_flow_item_ipv4 ipv4; + struct rte_flow_item_ipv4 ipv4_mask; + struct rte_flow *flow; + struct rte_flow_error error; + + /* setting the eth to pass all packets */ + pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; + pattern[0].spec = ð + + /* set the vlan to pass all packets */ + pattern[1] = RTE_FLOW_ITEM_TYPE_VLAN; + pattern[1].spec = &vlan; + + /* set the dst ipv4 packet to the required value */ + ipv4.hdr.dst_addr = htonl(0xc0a80300); + ipv4_mask.hdr.dst_addr = htonl(0xffffff00); + pattern[2].type = RTE_FLOW_ITEM_TYPE_IPV4; + pattern[2].spec = &ipv4; + pattern[2].mask = &ipv4_mask; + + /* end the pattern array */ + pattern[3].type = RTE_FLOW_ITEM_TYPE_END; + + /* create the drop action */ + actions[0].type = RTE_FLOW_ACTION_TYPE_DROP; + actions[1].type = RTE_FLOW_ACTION_TYPE_END; + + /* validate and create the flow rule */ + if (!rte_flow_validate(port_id, &attr, pattern, actions, &error)) + flow = rte_flow_create(port_id, &attr, pattern, actions, &error); + +Output +~~~~~~ + +Terminal 1: running sample app flow rule disabled:: + + ./filter-program disable + [waiting for packets] + +Terminal 2: running scapy:: + + $scapy + welcome to Scapy + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.4', dst='192.168.3.1'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.5', dst='192.168.3.2'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.6', dst='192.168.5.2'), \ + iface='some interface', count=1) + +Terminal 1: output log:: + + received packet with src ip = 176.80.50.4 + received packet with src ip = 176.80.50.5 + received packet with src ip = 176.80.50.6 + +Terminal 1: running sample app flow rule enabled:: + + ./filter-program enabled + [waiting for packets] + +Terminal 2: running scapy:: + + $scapy + welcome to Scapy + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.4', dst='192.168.3.1'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.5', dst='192.168.3.2'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q()/IP(src='176.80.50.6', dst='192.168.5.2'), \ + iface='some interface', count=1) + +Terminal 1: output log:: + + received packet with src ip = 176.80.50.6 + +Send vlan to queue +------------------ + +Description +~~~~~~~~~~~ + +In this example we will create a rule that routes all vlan id 123 to queue 3. + +This code is equivalent to the following testpmd command (wrapped for +clarity):: + + testpmd> flow create 0 ingress pattern eth / vlan vid spec 123 / + end actions queue index 3 / end + +Code +~~~~ + +.. code-block:: c + + struct rte_flow_attr attr = { .ingress = 1 }; + struct rte_flow_item pattern[MAX_PATTERN_IN_FLOW]; + struct rte_flow_action actions[MAX_ACTIONS_IN_FLOW]; + struct rte_flow_item_eth eth; + struct rte_flow_item_vlan vlan; + struct rte_flow_action_queue queue = { .index = 3 }; + struct rte_flow *flow; + struct rte_flow_error error; + + /* setting the eth to pass all packets */ + pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH; + pattern[0].spec = ð + + /* set the vlan to pas all packets */ + vlan.vid = 123; + pattern[1] = RTE_FLOW_ITEM_TYPE_VLAN; + pattern[1].spec = &vlan; + + /* end the pattern array */ + pattern[2].type = RTE_FLOW_ITEM_TYPE_END; + + /* create the queue action */ + actions[0].type = RTE_FLOW_ACTION_TYPE_QUEUE; + actions[0].conf = &queue; + actions[1].type = RTE_FLOW_ACTION_TYPE_END; + + /* validate and create the flow rule */ + if (!rte_flow_validate(port_id, &attr, pattern, actions, &error)) + flow = rte_flow_create(port_id, &attr, pattern, actions, &error); + +Output +~~~~~~ + +Terminal 1: running sample app flow rule disabled:: + + ./filter-program disable + [waiting for packets] + +Terminal 2: running scapy:: + + $scapy + welcome to Scapy + >> sendp(Ether()/Dot1Q(vlan=123)/IP(src='176.80.50.4', dst='192.168.3.1'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q(vlan=50)/IP(src='176.80.50.5', dst='192.168.3.2'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q(vlan=123)/IP(src='176.80.50.6', dst='192.168.5.2'), \ + iface='some interface', count=1) + +Terminal 1: output log:: + + received packet with src ip = 176.80.50.4 sent to queue 2 + received packet with src ip = 176.80.50.5 sent to queue 1 + received packet with src ip = 176.80.50.6 sent to queue 0 + +Terminal 1: running sample app flow rule enabled:: + + ./filter-program enabled + [waiting for packets] + +Terminal 2: running scapy:: + + $scapy + welcome to Scapy + >> sendp(Ether()/Dot1Q(vlan=123)/IP(src='176.80.50.4', dst='192.168.3.1'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q(vlan=50)/IP(src='176.80.50.5', dst='192.168.3.2'), \ + iface='some interface', count=1) + >> sendp(Ether()/Dot1Q(vlan=123)/IP(src='176.80.50.6', dst='192.168.5.2'), \ + iface='some interface', count=1) + +Terminal 1: output log:: + + received packet with src ip = 176.80.50.4 sent to queue 3 + received packet with src ip = 176.80.50.5 sent to queue 1 + received packet with src ip = 176.80.50.6 sent to queue 3 diff --git a/src/spdk/dpdk/doc/guides/howto/telemetry.rst b/src/spdk/dpdk/doc/guides/howto/telemetry.rst new file mode 100644 index 000000000..b4a34ed67 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/telemetry.rst @@ -0,0 +1,80 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2020 Intel Corporation. + + +DPDK Telemetry User Guide +========================= + +The Telemetry library provides users with the ability to query DPDK for +telemetry information, currently including information such as ethdev stats, +ethdev port list, and eal parameters. + +.. Note:: + + This library is experimental and the output format may change in the future. + + +Telemetry Interface +------------------- + +The :doc:`../prog_guide/telemetry_lib` opens a socket with path +*/dpdk_telemetry.*. The version represents the +telemetry version, the latest is v2. For example, a client would connect to a +socket with path */var/run/dpdk/\*/dpdk_telemetry.v2* (when the primary process +is run by a root user). + + +Telemetry Initialization +------------------------ + +The library is enabled by default, however an EAL flag to enable the library +exists, to provide backward compatibility for the previous telemetry library +interface. + +.. code-block:: console + + --telemetry + +A flag exists to disable Telemetry also. + +.. code-block:: console + + --no-telemetry + + +Running Telemetry +----------------- + +The following steps show how to run an application with telemetry support, +and query information using the telemetry client python script. + +#. Launch testpmd as the primary application with telemetry. + + .. code-block:: console + + ./app/dpdk-testpmd + +#. Launch the telemetry client script. + + .. code-block:: console + + python usertools/dpdk-telemetry.py + +#. When connected, the script displays the following, waiting for user input. + + .. code-block:: console + + Connecting to /var/run/dpdk/rte/dpdk_telemetry.v2 + {"version": "DPDK 20.05.0-rc0", "pid": 60285, "max_output_len": 16384} + --> + +#. The user can now input commands to send across the socket, and receive the + response. + + .. code-block:: console + + --> / + {"/": ["/", "/eal/app_params", "/eal/params", "/ethdev/list", + "/ethdev/link_status", "/ethdev/xstats", "/help", "/info"]} + --> /ethdev/list + {"/ethdev/list": [0, 1]} diff --git a/src/spdk/dpdk/doc/guides/howto/vfd.rst b/src/spdk/dpdk/doc/guides/howto/vfd.rst new file mode 100644 index 000000000..0ec0a0478 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/vfd.rst @@ -0,0 +1,379 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +VF daemon (VFd) +=============== + +VFd (the VF daemon) is a mechanism which can be used to configure features on +a VF (SR-IOV Virtual Function) without direct access to the PF (SR-IOV +Physical Function). VFd is an *EXPERIMENTAL* feature which can only be used in +the scenario of DPDK PF with a DPDK VF. If the PF port is driven by the Linux +kernel driver then the VFd feature will not work. Currently VFd is only +supported by the ixgbe and i40e drivers. + +In general VF features cannot be configured directly by an end user +application since they are under the control of the PF. The normal approach to +configuring a feature on a VF is that an application would call the APIs +provided by the VF driver. If the required feature cannot be configured by the +VF directly (the most common case) the VF sends a message to the PF through +the mailbox on ixgbe and i40e. This means that the availability of the feature +depends on whether the appropriate mailbox messages are defined. + +DPDK leverages the mailbox interface defined by the Linux kernel driver so +that compatibility with the kernel driver can be guaranteed. The downside of +this approach is that the availability of messages supported by the kernel +become a limitation when the user wants to configure features on the VF. + +VFd is a new method of controlling the features on a VF. The VF driver doesn't +talk directly to the PF driver when configuring a feature on the VF. When a VF +application (i.e., an application using the VF ports) wants to enable a VF +feature, it can send a message to the PF application (i.e., the application +using the PF port, which can be the same as the VF application). The PF +application will configure the feature for the VF. Obviously, the PF +application can also configure the VF features without a request from the VF +application. + +.. _VF_daemon_overview: + +.. figure:: img/vf_daemon_overview.* + + VF daemon (VFd) Overview + +Compared with the traditional approach the VFd moves the negotiation between +VF and PF from the driver level to application level. So the application +should define how the negotiation between the VF and PF works, or even if the +control should be limited to the PF. + +It is the application's responsibility to use VFd. Consider for example a KVM +migration, the VF application may transfer from one VM to another. It is +recommended in this case that the PF control the VF features without +participation from the VF. Then the VF application has no capability to +configure the features. So the user doesn't need to define the interface +between the VF application and the PF application. The service provider should +take the control of all the features. + +The following sections describe the VFd functionality. + +.. Note:: + + Although VFd is supported by both ixgbe and i40e, please be aware that + since the hardware capability is different, the functions supported by + ixgbe and i40e are not the same. + + +Preparing +--------- + +VFd only can be used in the scenario of DPDK PF + DPDK VF. Users should bind +the PF port to ``igb_uio``, then create the VFs based on the DPDK PF host. + +The typical procedure to achieve this is as follows: + +#. Boot the system without iommu, or with ``iommu=pt``. + +#. Bind the PF port to ``igb_uio``, for example:: + + dpdk-devbind.py -b igb_uio 01:00.0 + +#. Create a Virtual Function:: + + echo 1 > /sys/bus/pci/devices/0000:01:00.0/max_vfs + +#. Start a VM with the new VF port bypassed to it. + +#. Run a DPDK application on the PF in the host:: + + testpmd -l 0-7 -n 4 -- -i --txqflags=0 + +#. Bind the VF port to ``igb_uio`` in the VM:: + + dpdk-devbind.py -b igb_uio 03:00.0 + +#. Run a DPDK application on the VF in the VM:: + + testpmd -l 0-7 -n 4 -- -i --txqflags=0 + + +Common functions of IXGBE and I40E +---------------------------------- + +The following sections show how to enable PF/VF functionality based on the +above testpmd setup. + + +TX loopback +~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set TX loopback:: + + set tx loopback 0 on|off + +This sets whether the PF port and all the VF ports that belong to it are +allowed to send the packets to other virtual ports. + +Although it is a VFd function, it is the global setting for the whole +physical port. When using this function, the PF and all the VFs TX loopback +will be enabled/disabled. + + +VF MAC address setting +~~~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set the MAC address for a VF port:: + + set vf mac addr 0 0 A0:36:9F:7B:C3:51 + +This testpmd runtime command will change the MAC address of the VF port to +this new address. If any other addresses are set before, they will be +overwritten. + + +VF MAC anti-spoofing +~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable the MAC +anti-spoofing for a VF port:: + + set vf mac antispoof 0 0 on|off + +When enabling the MAC anti-spoofing, the port will not forward packets whose +source MAC address is not the same as the port. + + +VF VLAN anti-spoofing +~~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable the VLAN +anti-spoofing for a VF port:: + + set vf vlan antispoof 0 0 on|off + +When enabling the VLAN anti-spoofing, the port will not send packets whose +VLAN ID does not belong to VLAN IDs that this port can receive. + + +VF VLAN insertion +~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set the VLAN insertion for a VF +port:: + + set vf vlan insert 0 0 1 + +When using this testpmd runtime command, an assigned VLAN ID can be inserted +to the transmitted packets by the hardware. + +The assigned VLAN ID can be 0. It means disabling the VLAN insertion. + + +VF VLAN stripping +~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable the VLAN stripping +for a VF port:: + + set vf vlan stripq 0 0 on|off + +This testpmd runtime command is used to enable/disable the RX VLAN stripping +for a specific VF port. + + +VF VLAN filtering +~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set the VLAN filtering for a VF +port:: + + rx_vlan add 1 port 0 vf 1 + rx_vlan rm 1 port 0 vf 1 + +These two testpmd runtime commands can be used to add or remove the VLAN +filter for several VF ports. When the VLAN filters are added only the packets +that have the assigned VLAN IDs can be received. Other packets will be dropped +by hardware. + + +The IXGBE specific VFd functions +-------------------------------- + +The functions in this section are specific to the ixgbe driver. + + +All queues drop +~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable the all queues +drop:: + + set all queues drop on|off + +This is a global setting for the PF and all the VF ports of the physical port. + +Enabling the ``all queues drop`` feature means that when there is no available +descriptor for the received packets they are dropped. The ``all queues drop`` +feature should be enabled in SR-IOV mode to avoid one queue blocking others. + + +VF packet drop +~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable the packet drop for +a specific VF:: + + set vf split drop 0 0 on|off + +This is a similar function as ``all queues drop``. The difference is that this +function is per VF setting and the previous function is a global setting. + + +VF rate limit +~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to all queues' rate limit for a +specific VF:: + + set port 0 vf 0 rate 10 queue_mask 1 + +This is a function to set the rate limit for all the queues in the +``queue_mask`` bitmap. It is not used to set the summary of the rate +limit. The rate limit of every queue will be set equally to the assigned rate +limit. + + +VF RX enabling +~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable packet receiving for +a specific VF:: + + set port 0 vf 0 rx on|off + +This function can be used to stop/start packet receiving on a VF. + + +VF TX enabling +~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable packet transmitting +for a specific VF:: + + set port 0 vf 0 tx on|off + +This function can be used to stop/start packet transmitting on a VF. + + +VF RX mode setting +~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set the RX mode for a specific VF:: + + set port 0 vf 0 rxmode AUPE|ROPE|BAM|MPE on|off + +This function can be used to enable/disable some RX modes on the VF, including: + +* If it accept untagged packets. +* If it accepts packets matching the MAC filters. +* If it accept MAC broadcast packets, +* If it enables MAC multicast promiscuous mode. + + +The I40E specific VFd functions +------------------------------- + +The functions in this section are specific to the i40e driver. + + +VF statistics +~~~~~~~~~~~~~ + +This provides an API to get the a specific VF's statistic from PF. + + +VF statistics resetting +~~~~~~~~~~~~~~~~~~~~~~~ + +This provides an API to rest the a specific VF's statistic from PF. + + +VF link status change notification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This provide an API to let a specific VF know if the physical link status +changed. + +Normally if a VF received this notification, the driver should notify the +application to reset the VF port. + + +VF MAC broadcast setting +~~~~~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable MAC broadcast packet +receiving for a specific VF:: + + set vf broadcast 0 0 on|off + + +VF MAC multicast promiscuous mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable MAC multicast +promiscuous mode for a specific VF:: + + set vf allmulti 0 0 on|off + + +VF MAC unicast promiscuous mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable MAC unicast +promiscuous mode for a specific VF:: + + set vf promisc 0 0 on|off + + +VF max bandwidth +~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set the TX maximum bandwidth for a +specific VF:: + + set vf tx max-bandwidth 0 0 2000 + +The maximum bandwidth is an absolute value in Mbps. + + +VF TC bandwidth allocation +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set the TCs (traffic class) TX +bandwidth allocation for a specific VF:: + + set vf tc tx min-bandwidth 0 0 (20,20,20,40) + +The allocated bandwidth should be set for all the TCs. The allocated bandwidth +is a relative value as a percentage. The sum of all the bandwidth should +be 100. + + +VF TC max bandwidth +~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to set the TCs TX maximum bandwidth +for a specific VF:: + + set vf tc tx max-bandwidth 0 0 0 10000 + +The maximum bandwidth is an absolute value in Mbps. + + +TC strict priority scheduling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run a testpmd runtime command on the PF to enable/disable several TCs TX +strict priority scheduling:: + + set tx strict-link-priority 0 0x3 + +The 0 in the TC bitmap means disabling the strict priority scheduling for this +TC. To enable use a value of 1. diff --git a/src/spdk/dpdk/doc/guides/howto/virtio_user_as_exceptional_path.rst b/src/spdk/dpdk/doc/guides/howto/virtio_user_as_exceptional_path.rst new file mode 100644 index 000000000..ec021af39 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/virtio_user_as_exceptional_path.rst @@ -0,0 +1,118 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +.. _virtio_user_as_exceptional_path: + +Virtio_user as Exceptional Path +=============================== + +The virtual device, virtio-user, was originally introduced with vhost-user +backend, as a high performance solution for IPC (Inter-Process Communication) +and user space container networking. + +Virtio_user with vhost-kernel backend is a solution for exceptional path, +such as KNI which exchanges packets with kernel networking stack. This +solution is very promising in: + +* Maintenance + + All kernel modules needed by this solution, vhost and vhost-net (kernel), + are upstreamed and extensively used kernel module. + +* Features + + vhost-net is born to be a networking solution, which has lots of networking + related features, like multi queue, tso, multi-seg mbuf, etc. + +* Performance + + similar to KNI, this solution would use one or more kthreads to + send/receive packets to/from user space DPDK applications, which has little + impact on user space polling thread (except that it might enter into kernel + space to wake up those kthreads if necessary). + +The overview of an application using virtio-user as exceptional path is shown +in :numref:`figure_virtio_user_as_exceptional_path`. + +.. _figure_virtio_user_as_exceptional_path: + +.. figure:: img/virtio_user_as_exceptional_path.* + + Overview of a DPDK app using virtio-user as exceptional path + + +Sample Usage +------------ + +As a prerequisite, the vhost/vhost-net kernel CONFIG should be chosen before +compiling the kernel and those kernel modules should be inserted. + +#. Compile DPDK and bind a physical NIC to igb_uio/uio_pci_generic/vfio-pci. + + This physical NIC is for communicating with outside. + +#. Run testpmd. + + .. code-block:: console + + $(testpmd) -l 2-3 -n 4 \ + --vdev=virtio_user0,path=/dev/vhost-net,queue_size=1024 \ + -- -i --tx-offloads=0x0000002c --enable-lro \ + --txd=1024 --rxd=1024 + + This command runs testpmd with two ports, one physical NIC to communicate + with outside, and one virtio-user to communicate with kernel. + +* ``--enable-lro`` + + This is used to negotiate VIRTIO_NET_F_GUEST_TSO4 and + VIRTIO_NET_F_GUEST_TSO6 feature so that large packets from kernel can be + transmitted to DPDK application and further TSOed by physical NIC. + +* ``queue_size`` + + 256 by default. To avoid shortage of descriptors, we can increase it to 1024. + +* ``queues`` + + Number of multi-queues. Each queue will be served by a kthread. For example: + + .. code-block:: console + + $(testpmd) -l 2-3 -n 4 \ + --vdev=virtio_user0,path=/dev/vhost-net,queues=2,queue_size=1024 \ + -- -i --tx-offloads=0x0000002c --enable-lro \ + --txq=2 --rxq=2 --txd=1024 --rxd=1024 + +#. Enable Rx checksum offloads in testpmd: + + .. code-block:: console + + (testpmd) port stop 0 + (testpmd) port config 0 rx_offload tcp_cksum on + (testpmd) port config 0 rx_offload udp_cksum on + (testpmd) port start 0 + +#. Start testpmd: + + .. code-block:: console + + (testpmd) start + +#. Configure IP address and start tap: + + .. code-block:: console + + ifconfig tap0 1.1.1.1/24 up + +.. note:: + + The tap device will be named tap0, tap1, etc, by kernel. + +Then, all traffic from physical NIC can be forwarded into kernel stack, and all +traffic on the tap0 can be sent out from physical NIC. + +Limitations +----------- + +This solution is only available on Linux systems. diff --git a/src/spdk/dpdk/doc/guides/howto/virtio_user_for_container_networking.rst b/src/spdk/dpdk/doc/guides/howto/virtio_user_for_container_networking.rst new file mode 100644 index 000000000..f31d918bc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/howto/virtio_user_for_container_networking.rst @@ -0,0 +1,118 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +.. _virtio_user_for_container_networking: + +Virtio_user for Container Networking +==================================== + +Container becomes more and more popular for strengths, like low overhead, fast +boot-up time, and easy to deploy, etc. How to use DPDK to accelerate container +networking becomes a common question for users. There are two use models of +running DPDK inside containers, as shown in +:numref:`figure_use_models_for_running_dpdk_in_containers`. + +.. _figure_use_models_for_running_dpdk_in_containers: + +.. figure:: img/use_models_for_running_dpdk_in_containers.* + + Use models of running DPDK inside container + +This page will only cover aggregation model. + +Overview +-------- + +The virtual device, virtio-user, with unmodified vhost-user backend, is designed +for high performance user space container networking or inter-process +communication (IPC). + +The overview of accelerating container networking by virtio-user is shown +in :numref:`figure_virtio_user_for_container_networking`. + +.. _figure_virtio_user_for_container_networking: + +.. figure:: img/virtio_user_for_container_networking.* + + Overview of accelerating container networking by virtio-user + +Different virtio PCI devices we usually use as a para-virtualization I/O in the +context of QEMU/VM, the basic idea here is to present a kind of virtual devices, +which can be attached and initialized by DPDK. The device emulation layer by +QEMU in VM's context is saved by just registering a new kind of virtual device +in DPDK's ether layer. And to minimize the change, we reuse already-existing +virtio PMD code (driver/net/virtio/). + +Virtio, in essence, is a shm-based solution to transmit/receive packets. How is +memory shared? In VM's case, qemu always shares the whole physical layout of VM +to vhost backend. But it's not feasible for a container, as a process, to share +all virtual memory regions to backend. So only those virtual memory regions +(aka, hugepages initialized in DPDK) are sent to backend. It restricts that only +addresses in these areas can be used to transmit or receive packets. + +Sample Usage +------------ + +Here we use Docker as container engine. It also applies to LXC, Rocket with +some minor changes. + +#. Compile DPDK. + + .. code-block:: console + + make install RTE_SDK=`pwd` T=x86_64-native-linux-gcc + +#. Write a Dockerfile like below. + + .. code-block:: console + + cat <> Dockerfile + FROM ubuntu:latest + WORKDIR /usr/src/dpdk + COPY . /usr/src/dpdk + ENV PATH "$PATH:/usr/src/dpdk/x86_64-native-linux-gcc/app/" + EOT + +#. Build a Docker image. + + .. code-block:: console + + docker build -t dpdk-app-testpmd . + +#. Start a testpmd on the host with a vhost-user port. + + .. code-block:: console + + $(testpmd) -l 0-1 -n 4 --socket-mem 1024,1024 \ + --vdev 'eth_vhost0,iface=/tmp/sock0' \ + --file-prefix=host --no-pci -- -i + +#. Start a container instance with a virtio-user port. + + .. code-block:: console + + docker run -i -t -v /tmp/sock0:/var/run/usvhost \ + -v /dev/hugepages:/dev/hugepages \ + dpdk-app-testpmd testpmd -l 6-7 -n 4 -m 1024 --no-pci \ + --vdev=virtio_user0,path=/var/run/usvhost \ + --file-prefix=container \ + -- -i + +Note: If we run all above setup on the host, it's a shm-based IPC. + +Limitations +----------- + +We have below limitations in this solution: + * Cannot work with --huge-unlink option. As we need to reopen the hugepage + file to share with vhost backend. + * Cannot work with --no-huge option. Currently, DPDK uses anonymous mapping + under this option which cannot be reopened to share with vhost backend. + * Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. + If you have more regions (especially when 2MB hugepages are used), the option, + --single-file-segments, can help to reduce the number of shared files. + * Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). That + will bring confusion when sharing hugepage files with backend by name. + * Root privilege is a must. DPDK resolves physical addresses of hugepages + which seems not necessary, and some discussions are going on to remove this + restriction. diff --git a/src/spdk/dpdk/doc/guides/index.rst b/src/spdk/dpdk/doc/guides/index.rst new file mode 100644 index 000000000..988c6ea87 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/index.rst @@ -0,0 +1,29 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2019 Intel Corporation. + +DPDK documentation +================== + +.. toctree:: + :maxdepth: 1 + + linux_gsg/index + freebsd_gsg/index + windows_gsg/index + sample_app_ug/index + prog_guide/index + howto/index + tools/index + testpmd_app_ug/index + nics/index + bbdevs/index + cryptodevs/index + compressdevs/index + vdpadevs/index + eventdevs/index + rawdevs/index + mempool/index + platform/index + contributing/index + rel_notes/index + faq/index diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/build_dpdk.rst b/src/spdk/dpdk/doc/guides/linux_gsg/build_dpdk.rst new file mode 100644 index 000000000..4aeb4697d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/build_dpdk.rst @@ -0,0 +1,249 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2015 Intel Corporation. + +.. _linux_gsg_compiling_dpdk: + +Compiling the DPDK Target from Source +===================================== + +.. note:: + + Parts of this process can also be done using the setup script described in + the :ref:`linux_setup_script` section of this document. + +Uncompress DPDK and Browse Sources +---------------------------------- + +First, uncompress the archive and move to the uncompressed DPDK source directory: + +.. code-block:: console + + tar xJf dpdk-.tar.xz + cd dpdk- + +The DPDK is composed of several directories: + +* lib: Source code of DPDK libraries + +* drivers: Source code of DPDK poll-mode drivers + +* app: Source code of DPDK applications (automatic tests) + +* examples: Source code of DPDK application examples + +* config, buildtools, mk: Framework-related makefiles, scripts and configuration + +Compiling and Installing DPDK System-wide +----------------------------------------- + +DPDK can be configured, built and installed on your system using the tools +``meson`` and ``ninja``. + +.. note:: + + The older makefile-based build system used in older DPDK releases is + still present and its use is described in section + `Installation of DPDK Target Environment using Make`_. + +DPDK Configuration +~~~~~~~~~~~~~~~~~~ + +To configure a DPDK build use: + +.. code-block:: console + + meson build + +where "build" is the desired output build directory, and "" can be +empty or one of a number of meson or DPDK-specific build options, described +later in this section. The configuration process will finish with a summary +of what DPDK libraries and drivers are to be built and installed, and for +each item disabled, a reason why that is the case. This information can be +used, for example, to identify any missing required packages for a driver. + +Once configured, to build and then install DPDK system-wide use: + +.. code-block:: console + + cd build + ninja + ninja install + ldconfig + +The last two commands above generally need to be run as root, +with the `ninja install` step copying the built objects to their final system-wide locations, +and the last step causing the dynamic loader `ld.so` to update its cache to take account of the new objects. + +.. note:: + + On some linux distributions, such as Fedora or Redhat, paths in `/usr/local` are + not in the default paths for the loader. Therefore, on these + distributions, `/usr/local/lib` and `/usr/local/lib64` should be added + to a file in `/etc/ld.so.conf.d/` before running `ldconfig`. + + +Adjusting Build Options +~~~~~~~~~~~~~~~~~~~~~~~ + +DPDK has a number of options that can be adjusted as part of the build configuration process. +These options can be listed by running ``meson configure`` inside a configured build folder. +Many of these options come from the "meson" tool itself and can be seen documented on the +`Meson Website `_. + +For example, to change the build-type from the default, "debugoptimized", +to a regular "debug" build, you can either: + +* pass ``-Dbuildtype=debug`` or ``--buildtype=debug`` to meson when configuring the build folder initially + +* run ``meson configure -Dbuildtype=debug`` inside the build folder after the initial meson run. + +Other options are specific to the DPDK project but can be adjusted similarly. +To set the "max_lcores" value to 256, for example, you can either: + +* pass ``-Dmax_lcores=256`` to meson when configuring the build folder initially + +* run ``meson configure -Dmax_lcores=256`` inside the build folder after the initial meson run. + +Some of the DPDK sample applications in the `examples` directory can be +automatically built as part of a meson build too. +To do so, pass a comma-separated list of the examples to build to the +`-Dexamples` meson option as below:: + + meson -Dexamples=l2fwd,l3fwd build + +As with other meson options, this can also be set post-initial-config using `meson configure` in the build directory. +There is also a special value "all" to request that all example applications whose +dependencies are met on the current system are built. +When `-Dexamples=all` is set as a meson option, meson will check each example application to see if it can be built, +and add all which can be built to the list of tasks in the ninja build configuration file. + +Building Applications Using Installed DPDK +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When installed system-wide, DPDK provides a pkg-config file ``libdpdk.pc`` for applications to query as part of their build. +It's recommended that the pkg-config file be used, rather than hard-coding the parameters (cflags/ldflags) +for DPDK into the application build process. + +An example of how to query and use the pkg-config file can be found in the ``Makefile`` of each of the example applications included with DPDK. +A simplified example snippet is shown below, where the target binary name has been stored in the variable ``$(APP)`` +and the sources for that build are stored in ``$(SRCS-y)``. + +.. code-block:: makefile + + PKGCONF = pkg-config + + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS += $(shell $(PKGCONF) --libs libdpdk) + + $(APP): $(SRCS-y) Makefile + $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) + +.. note:: + + Unlike with the older make build system, the meson system is not + designed to be used directly from a build directory. Instead it is + recommended that it be installed either system-wide or to a known + location in the user's home directory. The install location can be set + using the `--prefix` meson option (default: `/usr/local`). + +an equivalent build recipe for a simple DPDK application using meson as a +build system is shown below: + +.. code-block:: python + + project('dpdk-app', 'c') + + dpdk = dependency('libdpdk') + sources = files('main.c') + executable('dpdk-app', sources, dependencies: dpdk) + + +Installation of DPDK Target Environment using Make +-------------------------------------------------- + +.. note:: + + The building of DPDK using make will be deprecated in a future release. It + is therefore recommended that DPDK installation is done using meson and + ninja as described above. + +The format of a DPDK target is:: + + ARCH-MACHINE-EXECENV-TOOLCHAIN + +where: + +* ``ARCH`` can be: ``i686``, ``x86_64``, ``ppc_64``, ``arm64`` + +* ``MACHINE`` can be: ``native``, ``power8``, ``armv8a`` + +* ``EXECENV`` can be: ``linux``, ``freebsd`` + +* ``TOOLCHAIN`` can be: ``gcc``, ``icc`` + +The targets to be installed depend on the 32-bit and/or 64-bit packages and compilers installed on the host. +Available targets can be found in the DPDK/config directory. +The defconfig\_ prefix should not be used. + +.. note:: + + Configuration files are provided with the ``RTE_MACHINE`` optimization level set. + Within the configuration files, the ``RTE_MACHINE`` configuration value is set to native, + which means that the compiled software is tuned for the platform on which it is built. + For more information on this setting, and its possible values, see the *DPDK Programmers Guide*. + +When using the Intel® C++ Compiler (icc), one of the following commands should be invoked for 64-bit or 32-bit use respectively. +Notice that the shell scripts update the ``$PATH`` variable and therefore should not be performed in the same session. +Also, verify the compiler's installation directory since the path may be different: + +.. code-block:: console + + source /opt/intel/bin/iccvars.sh intel64 + source /opt/intel/bin/iccvars.sh ia32 + +To install and make targets, use the ``make install T=`` command in the top-level DPDK directory. + +For example, to compile a 64-bit target using icc, run: + +.. code-block:: console + + make install T=x86_64-native-linux-icc + +To compile a 32-bit build using gcc, the make command should be: + +.. code-block:: console + + make install T=i686-native-linux-gcc + +To prepare a target without building it, for example, if the configuration changes need to be made before compilation, +use the ``make config T=`` command: + +.. code-block:: console + + make config T=x86_64-native-linux-gcc + +.. warning:: + + Any kernel modules to be used, e.g. ``igb_uio``, ``kni``, must be compiled with the + same kernel as the one running on the target. + If the DPDK is not being built on the target machine, + the ``RTE_KERNELDIR`` environment variable should be used to point the compilation at a copy of the kernel version to be used on the target machine. + +Once the target environment is created, the user may move to the target environment directory and continue to make code changes and re-compile. +The user may also make modifications to the compile-time DPDK configuration by editing the .config file in the build directory. +(This is a build-local copy of the defconfig file from the top- level config directory). + +.. code-block:: console + + cd x86_64-native-linux-gcc + vi .config + make + +In addition, the make clean command can be used to remove any existing compiled files for a subsequent full, clean rebuild of the code. + +Browsing the Installed DPDK Environment Target +---------------------------------------------- + +Once a target is created it contains all libraries, including poll-mode drivers, and header files for the DPDK environment that are required to build customer applications. +In addition, the test and testpmd applications are built under the build/app directory, which may be used for testing. +A kmod directory is also present that contains kernel modules which may be loaded if needed. diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/build_sample_apps.rst b/src/spdk/dpdk/doc/guides/linux_gsg/build_sample_apps.rst new file mode 100644 index 000000000..2f606535c --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/build_sample_apps.rst @@ -0,0 +1,250 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +Compiling and Running Sample Applications +========================================= + +The chapter describes how to compile and run applications in an DPDK environment. +It also provides a pointer to where sample applications are stored. + +.. note:: + + Parts of this process can also be done using the setup script described the + :ref:`linux_setup_script` section of this document. + +Compiling a Sample Application +------------------------------ + +Once an DPDK target environment directory has been created (such as ``x86_64-native-linux-gcc``), +it contains all libraries and header files required to build an application. + +When compiling an application in the Linux* environment on the DPDK, the following variables must be exported: + +* ``RTE_SDK`` - Points to the DPDK installation directory. + +* ``RTE_TARGET`` - Points to the DPDK target environment directory. + +The following is an example of creating the ``helloworld`` application, which runs in the DPDK Linux environment. +This example may be found in the ``${RTE_SDK}/examples`` directory. + +The directory contains the ``main.c`` file. This file, when combined with the libraries in the DPDK target environment, +calls the various functions to initialize the DPDK environment, +then launches an entry point (dispatch application) for each core to be utilized. +By default, the binary is generated in the build directory. + +.. code-block:: console + + cd examples/helloworld/ + export RTE_SDK=$HOME/DPDK + export RTE_TARGET=x86_64-native-linux-gcc + + make + CC main.o + LD helloworld + INSTALL-APP helloworld + INSTALL-MAP helloworld.map + + ls build/app + helloworld helloworld.map + +.. note:: + + In the above example, ``helloworld`` was in the directory structure of the DPDK. + However, it could have been located outside the directory structure to keep the DPDK structure intact. + In the following case, the ``helloworld`` application is copied to a new directory as a new starting point. + + .. code-block:: console + + export RTE_SDK=/home/user/DPDK + cp -r $(RTE_SDK)/examples/helloworld my_rte_app + cd my_rte_app/ + export RTE_TARGET=x86_64-native-linux-gcc + + make + CC main.o + LD helloworld + INSTALL-APP helloworld + INSTALL-MAP helloworld.map + +Running a Sample Application +---------------------------- + +.. warning:: + + Before running the application make sure: + + - Hugepages setup is done. + - Any kernel driver being used is loaded. + - In case needed, ports being used by the application should be + bound to the corresponding kernel driver. + + refer to :ref:`linux_gsg_linux_drivers` for more details. + +The application is linked with the DPDK target environment's Environmental Abstraction Layer (EAL) library, +which provides some options that are generic to every DPDK application. + +The following is the list of options that can be given to the EAL: + +.. code-block:: console + + ./rte-app [-c COREMASK | -l CORELIST] [-n NUM] [-b ] \ + [--socket-mem=MB,...] [-d LIB.so|DIR] [-m MB] [-r NUM] [-v] [--file-prefix] \ + [--proc-type ] + +The EAL options are as follows: + +* ``-c COREMASK`` or ``-l CORELIST``: + An hexadecimal bit mask of the cores to run on. Note that core numbering can + change between platforms and should be determined beforehand. The corelist is + a set of core numbers instead of a bitmap core mask. + +* ``-n NUM``: + Number of memory channels per processor socket. + +* ``-b ``: + Blacklisting of ports; prevent EAL from using specified PCI device + (multiple ``-b`` options are allowed). + +* ``--use-device``: + use the specified Ethernet device(s) only. Use comma-separate + ``[domain:]bus:devid.func`` values. Cannot be used with ``-b`` option. + +* ``--socket-mem``: + Memory to allocate from hugepages on specific sockets. In dynamic memory mode, + this memory will also be pinned (i.e. not released back to the system until + application closes). + +* ``--socket-limit``: + Limit maximum memory available for allocation on each socket. Does not support + legacy memory mode. + +* ``-d``: + Add a driver or driver directory to be loaded. + The application should use this option to load the pmd drivers + that are built as shared libraries. + +* ``-m MB``: + Memory to allocate from hugepages, regardless of processor socket. It is + recommended that ``--socket-mem`` be used instead of this option. + +* ``-r NUM``: + Number of memory ranks. + +* ``-v``: + Display version information on startup. + +* ``--huge-dir``: + The directory where hugetlbfs is mounted. + +* ``mbuf-pool-ops-name``: + Pool ops name for mbuf to use. + +* ``--file-prefix``: + The prefix text used for hugepage filenames. + +* ``--proc-type``: + The type of process instance. + +* ``--vmware-tsc-map``: + Use VMware TSC map instead of native RDTSC. + +* ``--base-virtaddr``: + Specify base virtual address. + +* ``--vfio-intr``: + Specify interrupt type to be used by VFIO (has no effect if VFIO is not used). + +* ``--legacy-mem``: + Run DPDK in legacy memory mode (disable memory reserve/unreserve at runtime, + but provide more IOVA-contiguous memory). + +* ``--single-file-segments``: + Store memory segments in fewer files (dynamic memory mode only - does not + affect legacy memory mode). + +The ``-c`` or ``-l`` and option is mandatory; the others are optional. + +Copy the DPDK application binary to your target, then run the application as follows +(assuming the platform has four memory channels per processor socket, +and that cores 0-3 are present and are to be used for running the application):: + + ./helloworld -l 0-3 -n 4 + +.. note:: + + The ``--proc-type`` and ``--file-prefix`` EAL options are used for running + multiple DPDK processes. See the "Multi-process Sample Application" + chapter in the *DPDK Sample Applications User Guide* and the *DPDK + Programmers Guide* for more details. + +Logical Core Use by Applications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The coremask (-c 0x0f) or corelist (-l 0-3) parameter is always mandatory for DPDK applications. +Each bit of the mask corresponds to the equivalent logical core number as reported by Linux. The preferred corelist option is a cleaner method to define cores to be used. +Since these logical core numbers, and their mapping to specific cores on specific NUMA sockets, can vary from platform to platform, +it is recommended that the core layout for each platform be considered when choosing the coremask/corelist to use in each case. + +On initialization of the EAL layer by an DPDK application, the logical cores to be used and their socket location are displayed. +This information can also be determined for all cores on the system by examining the ``/proc/cpuinfo`` file, for example, by running cat ``/proc/cpuinfo``. +The physical id attribute listed for each processor indicates the CPU socket to which it belongs. +This can be useful when using other processors to understand the mapping of the logical cores to the sockets. + +.. note:: + + A more graphical view of the logical core layout may be obtained using the ``lstopo`` Linux utility. + On Fedora Linux, this may be installed and run using the following command:: + + sudo yum install hwloc + ./lstopo + +.. warning:: + + The logical core layout can change between different board layouts and should be checked before selecting an application coremask/corelist. + +Hugepage Memory Use by Applications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When running an application, it is recommended to use the same amount of memory as that allocated for hugepages. +This is done automatically by the DPDK application at startup, +if no ``-m`` or ``--socket-mem`` parameter is passed to it when run. + +If more memory is requested by explicitly passing a ``-m`` or ``--socket-mem`` value, the application fails. +However, the application itself can also fail if the user requests less memory than the reserved amount of hugepage-memory, particularly if using the ``-m`` option. +The reason is as follows. +Suppose the system has 1024 reserved 2 MB pages in socket 0 and 1024 in socket 1. +If the user requests 128 MB of memory, the 64 pages may not match the constraints: + +* The hugepage memory by be given to the application by the kernel in socket 1 only. + In this case, if the application attempts to create an object, such as a ring or memory pool in socket 0, it fails. + To avoid this issue, it is recommended that the ``--socket-mem`` option be used instead of the ``-m`` option. + +* These pages can be located anywhere in physical memory, and, although the DPDK EAL will attempt to allocate memory in contiguous blocks, + it is possible that the pages will not be contiguous. In this case, the application is not able to allocate big memory pools. + +The socket-mem option can be used to request specific amounts of memory for specific sockets. +This is accomplished by supplying the ``--socket-mem`` flag followed by amounts of memory requested on each socket, +for example, supply ``--socket-mem=0,512`` to try and reserve 512 MB for socket 1 only. +Similarly, on a four socket system, to allocate 1 GB memory on each of sockets 0 and 2 only, the parameter ``--socket-mem=1024,0,1024`` can be used. +No memory will be reserved on any CPU socket that is not explicitly referenced, for example, socket 3 in this case. +If the DPDK cannot allocate enough memory on each socket, the EAL initialization fails. + +Additional Sample Applications +------------------------------ + +Additional sample applications are included in the ${RTE_SDK}/examples directory. +These sample applications may be built and run in a manner similar to that described in earlier sections in this manual. +In addition, see the *DPDK Sample Applications User Guide* for a description of the application, +specific instructions on compilation and execution and some explanation of the code. + +Additional Test Applications +---------------------------- + +In addition, there are two other applications that are built when the libraries are created. +The source files for these are in the DPDK/app directory and are called test and testpmd. +Once the libraries are created, they can be found in the build/app directory. + +* The test application provides a variety of specific tests for the various functions in the DPDK. + +* The testpmd application provides a number of different packet throughput tests and + examples of features such as how to use the Flow Director found in the Intel® 82599 10 Gigabit Ethernet Controller. diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/cross_build_dpdk_for_arm64.rst b/src/spdk/dpdk/doc/guides/linux_gsg/cross_build_dpdk_for_arm64.rst new file mode 100644 index 000000000..c5875a6d5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/cross_build_dpdk_for_arm64.rst @@ -0,0 +1,143 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 ARM Corporation. + +Cross compile DPDK for ARM64 +============================ +This chapter describes how to cross compile DPDK for ARM64 from x86 build hosts. + +.. note:: + + Whilst it is recommended to natively build DPDK on ARM64 (just + like with x86), it is also possible to cross-build DPDK for ARM64. An + ARM64 cross compile GNU toolchain is used for this. + +Obtain the cross tool chain +--------------------------- +The latest cross compile tool chain can be downloaded from: +https://developer.arm.com/open-source/gnu-toolchain/gnu-a/downloads. + +It is always recommended to check and get the latest compiler tool from the page and use +it to generate better code. As of this writing 8.3-2019.03 is the newest, the following +description is an example of this version. + +.. code-block:: console + + wget https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz + +Unzip and add into the PATH +--------------------------- + +.. code-block:: console + + tar -xvf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz + export PATH=$PATH:/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/bin + +.. note:: + + For the host requirements and other info, refer to the release note section: https://releases.linaro.org/components/toolchain/binaries/ + +.. _arm_cross_build_getting_the_prerequisite_library: + +Getting the prerequisite library +-------------------------------- + +NUMA is required by most modern machines, not needed for non-NUMA architectures. + +.. note:: + + For compiling the NUMA lib, run libtool --version to ensure the libtool version >= 2.2, + otherwise the compilation will fail with errors. + +.. code-block:: console + + git clone https://github.com/numactl/numactl.git + cd numactl + git checkout v2.0.13 -b v2.0.13 + ./autogen.sh + autoconf -i + ./configure --host=aarch64-linux-gnu CC=aarch64-linux-gnu-gcc --prefix= + make install + +The numa header files and lib file is generated in the include and lib folder respectively under . + +.. _augment_the_cross_toolchain_with_numa_support: + +Augment the cross toolchain with NUMA support +--------------------------------------------- + +.. note:: + + This way is optional, an alternative is to use extra CFLAGS and LDFLAGS, depicted in :ref:`configure_and_cross_compile_dpdk_build` below. + +Copy the NUMA header files and lib to the cross compiler's directories: + +.. code-block:: console + + cp /include/numa*.h /gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/aarch64-linux-gnu/libc/usr/include/ + cp /lib/libnuma.a /gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/lib/gcc/aarch64-linux-gnu/8.3.0/ + cp /lib/libnuma.so /gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/lib/gcc/aarch64-linux-gnu/8.3.0/ + +.. _configure_and_cross_compile_dpdk_build: + +Cross Compiling DPDK using Meson +-------------------------------- + +Meson depends on pkgconfig to find the dependencies. +The package ``pkg-config-aarch64-linux-gnu`` is required for aarch64. +To install it in Ubuntu:: + + sudo apt-get install pkg-config-aarch64-linux-gnu + +To cross-compile DPDK on a desired target machine we can use the following +command:: + + meson cross-build --cross-file + ninja -C cross-build + +For example if the target machine is arm64 we can use the following +command:: + + meson arm64-build --cross-file config/arm/arm64_armv8_linux_gcc + ninja -C arm64-build + +Configure and Cross Compile DPDK using Make +------------------------------------------- +To configure a build, choose one of the target configurations, like arm64-dpaa-linux-gcc and arm64-thunderx-linux-gcc. + +.. code-block:: console + + make config T=arm64-armv8a-linux-gcc + +To cross-compile, without compiling the kernel modules, use the following command: + +.. code-block:: console + + make -j CROSS=aarch64-linux-gnu- CONFIG_RTE_KNI_KMOD=n CONFIG_RTE_EAL_IGB_UIO=n + +To cross-compile, including the kernel modules, the kernel source tree needs to be specified by setting +RTE_KERNELDIR: + +.. code-block:: console + + make -j CROSS=aarch64-linux-gnu- RTE_KERNELDIR= CROSS_COMPILE=aarch64-linux-gnu- + +To compile for non-NUMA targets, without compiling the kernel modules, use the following command: + +.. code-block:: console + + make -j CROSS=aarch64-linux-gnu- CONFIG_RTE_KNI_KMOD=n CONFIG_RTE_EAL_IGB_UIO=n CONFIG_RTE_LIBRTE_VHOST_NUMA=n CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n + +.. note:: + + 1. EXTRA_CFLAGS and EXTRA_LDFLAGS should be added to include the NUMA headers and link the library respectively, + if the above step :ref:`augment_the_cross_toolchain_with_numa_support` was skipped therefore the toolchain was not + augmented with NUMA support. + + 2. "-isystem /include" should be add to EXTRA_CFLAGS, otherwise the numa.h file will get a lot of compiling + errors of Werror=cast-qual, Werror=strict-prototypes and Werror=old-style-definition. + + An example is given below: + + .. code-block:: console + + make -j CROSS=aarch64-linux-gnu- CONFIG_RTE_KNI_KMOD=n CONFIG_RTE_EAL_IGB_UIO=n EXTRA_CFLAGS="-isystem /include" EXTRA_LDFLAGS="-L/lib -lnuma" diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/eal_args.include.rst b/src/spdk/dpdk/doc/guides/linux_gsg/eal_args.include.rst new file mode 100644 index 000000000..0fe445796 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/eal_args.include.rst @@ -0,0 +1,212 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +Lcore-related options +~~~~~~~~~~~~~~~~~~~~~ + +* ``-c `` + + Set the hexadecimal bitmask of the cores to run on. + +* ``-l `` + + List of cores to run on + + The argument format is ``[-c2][,c3[-c4],...]`` + where ``c1``, ``c2``, etc are core indexes between 0 and 128. + +* ``--lcores `` + + Map lcore set to physical cpu set + + The argument format is:: + + [<,lcores[@cpus]>...] + + Lcore and CPU lists are grouped by ``(`` and ``)`` Within the group. + The ``-`` character is used as a range separator and ``,`` is used as a + single number separator. + The grouping ``()`` can be omitted for single element group. + The ``@`` can be omitted if cpus and lcores have the same value. + +.. Note:: + At a given instance only one core option ``--lcores``, ``-l`` or ``-c`` can + be used. + +* ``--master-lcore `` + + Core ID that is used as master. + +* ``-s `` + + Hexadecimal bitmask of cores to be used as service cores. + +Device-related options +~~~~~~~~~~~~~~~~~~~~~~ + +* ``-b, --pci-blacklist <[domain:]bus:devid.func>`` + + Blacklist a PCI device to prevent EAL from using it. Multiple -b options are + allowed. + +.. Note:: + PCI blacklist cannot be used with ``-w`` option. + +* ``-w, --pci-whitelist <[domain:]bus:devid.func>`` + + Add a PCI device in white list. + +.. Note:: + PCI whitelist cannot be used with ``-b`` option. + +* ``--vdev `` + + Add a virtual device using the format:: + + [,key=val, ...] + + For example:: + + --vdev 'net_pcap0,rx_pcap=input.pcap,tx_pcap=output.pcap' + +* ``-d `` + + Load external drivers. An argument can be a single shared object file, or a + directory containing multiple driver shared objects. Multiple -d options are + allowed. + +* ``--no-pci`` + + Disable PCI bus. + +Multiprocessing-related options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ``--proc-type `` + + Set the type of the current process. + +* ``--base-virtaddr
`` + + Attempt to use a different starting address for all memory maps of the + primary DPDK process. This can be helpful if secondary processes cannot + start due to conflicts in address map. + +Memory-related options +~~~~~~~~~~~~~~~~~~~~~~ + +* ``-n `` + + Set the number of memory channels to use. + +* ``-r `` + + Set the number of memory ranks (auto-detected by default). + +* ``-m `` + + Amount of memory to preallocate at startup. + +* ``--in-memory`` + + Do not create any shared data structures and run entirely in memory. Implies + ``--no-shconf`` and (if applicable) ``--huge-unlink``. + +* ``--iova-mode `` + + Force IOVA mode to a specific value. + +Debugging options +~~~~~~~~~~~~~~~~~ + +* ``--no-shconf`` + + No shared files created (implies no secondary process support). + +* ``--no-huge`` + + Use anonymous memory instead of hugepages (implies no secondary process + support). + +* ``--log-level `` + + Specify log level for a specific component. For example:: + + --log-level lib.eal:debug + + Can be specified multiple times. + +* ``--trace=`` + + Enable trace based on regular expression trace name. By default, the trace is + disabled. User must specify this option to enable trace. + For example: + + Global trace configuration for EAL only:: + + --trace=eal + + Global trace configuration for ALL the components:: + + --trace=.* + + Can be specified multiple times up to 32 times. + +* ``--trace-dir=`` + + Specify trace directory for trace output. For example: + + Configuring ``/tmp/`` as a trace output directory:: + + --trace-dir=/tmp + + By default, trace output will created at ``home`` directory and parameter + must be specified once only. + +* ``--trace-bufsz=`` + + Specify maximum size of allocated memory for trace output for each thread. + Valid unit can be either ``B`` or ``K`` or ``M`` for ``Bytes``, ``KBytes`` + and ``MBytes`` respectively. For example: + + Configuring ``2MB`` as a maximum size for trace output file:: + + --trace-bufsz=2M + + By default, size of trace output file is ``1MB`` and parameter + must be specified once only. + +* ``--trace-mode=`` + + Specify the mode of update of trace output file. Either update on a file + can be wrapped or discarded when file size reaches its maximum limit. + For example: + + To ``discard`` update on trace output file:: + + --trace-mode=d or --trace-mode=discard + + Default mode is ``overwrite`` and parameter must be specified once only. + +Other options +~~~~~~~~~~~~~ + +* ``-h``, ``--help`` + + Display help message listing all EAL parameters. + +* ``-v`` + + Display the version information on startup. + +* ``mbuf-pool-ops-name``: + + Pool ops name for mbuf to use. + +* ``--telemetry``: + + Enable telemetry (enabled by default). + +* ``--no-telemetry``: + + Disable telemetry. diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/enable_func.rst b/src/spdk/dpdk/doc/guides/linux_gsg/enable_func.rst new file mode 100644 index 000000000..b2bda80bb --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/enable_func.rst @@ -0,0 +1,159 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +.. _Enabling_Additional_Functionality: + +Enabling Additional Functionality +================================= + +.. _High_Precision_Event_Timer: + +High Precision Event Timer (HPET) Functionality +----------------------------------------------- + +BIOS Support +~~~~~~~~~~~~ + +The High Precision Timer (HPET) must be enabled in the platform BIOS if the HPET is to be used. +Otherwise, the Time Stamp Counter (TSC) is used by default. +The BIOS is typically accessed by pressing F2 while the platform is starting up. +The user can then navigate to the HPET option. On the Crystal Forest platform BIOS, the path is: +**Advanced -> PCH-IO Configuration -> High Precision Timer ->** (Change from Disabled to Enabled if necessary). + +On a system that has already booted, the following command can be issued to check if HPET is enabled:: + + grep hpet /proc/timer_list + +If no entries are returned, HPET must be enabled in the BIOS (as per the instructions above) and the system rebooted. + +Linux Kernel Support +~~~~~~~~~~~~~~~~~~~~ + +The DPDK makes use of the platform HPET timer by mapping the timer counter into the process address space, and as such, +requires that the ``HPET_MMAP`` kernel configuration option be enabled. + +.. warning:: + + On Fedora, and other common distributions such as Ubuntu, the ``HPET_MMAP`` kernel option is not enabled by default. + To recompile the Linux kernel with this option enabled, please consult the distributions documentation for the relevant instructions. + +Enabling HPET in the DPDK +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, HPET support is disabled in the DPDK build configuration files. +To use HPET, the ``CONFIG_RTE_LIBEAL_USE_HPET`` setting should be changed to ``y``, which will enable the HPET settings at compile time. + +For an application to use the ``rte_get_hpet_cycles()`` and ``rte_get_hpet_hz()`` API calls, +and optionally to make the HPET the default time source for the rte_timer library, +the new ``rte_eal_hpet_init()`` API call should be called at application initialization. +This API call will ensure that the HPET is accessible, returning an error to the application if it is not, +for example, if ``HPET_MMAP`` is not enabled in the kernel. +The application can then determine what action to take, if any, if the HPET is not available at run-time. + +.. note:: + + For applications that require timing APIs, but not the HPET timer specifically, + it is recommended that the ``rte_get_timer_cycles()`` and ``rte_get_timer_hz()`` API calls be used instead of the HPET-specific APIs. + These generic APIs can work with either TSC or HPET time sources, depending on what is requested by an application call to ``rte_eal_hpet_init()``, + if any, and on what is available on the system at runtime. + +Running DPDK Applications Without Root Privileges +-------------------------------------------------------- + +.. note:: + + The instructions below will allow running DPDK as non-root with older + Linux kernel versions. However, since version 4.0, the kernel does not allow + unprivileged processes to read the physical address information from + the pagemaps file, making it impossible for those processes to use HW + devices which require physical addresses + +Although applications using the DPDK use network ports and other hardware resources directly, +with a number of small permission adjustments it is possible to run these applications as a user other than "root". +To do so, the ownership, or permissions, on the following Linux file system objects should be adjusted to ensure that +the Linux user account being used to run the DPDK application has access to them: + +* All directories which serve as hugepage mount points, for example, ``/mnt/huge`` + +* The userspace-io device files in ``/dev``, for example, ``/dev/uio0``, ``/dev/uio1``, and so on + +* The userspace-io sysfs config and resource files, for example for ``uio0``:: + + /sys/class/uio/uio0/device/config + /sys/class/uio/uio0/device/resource* + +* If the HPET is to be used, ``/dev/hpet`` + +.. note:: + + On some Linux installations, ``/dev/hugepages`` is also a hugepage mount point created by default. + +Power Management and Power Saving Functionality +----------------------------------------------- + +Enhanced Intel SpeedStep® Technology must be enabled in the platform BIOS if the power management feature of DPDK is to be used. +Otherwise, the sys file folder ``/sys/devices/system/cpu/cpu0/cpufreq`` will not exist, and the CPU frequency- based power management cannot be used. +Consult the relevant BIOS documentation to determine how these settings can be accessed. + +For example, on some Intel reference platform BIOS variants, the path to Enhanced Intel SpeedStep® Technology is:: + + Advanced + -> Processor Configuration + -> Enhanced Intel SpeedStep® Tech + +In addition, C3 and C6 should be enabled as well for power management. The path of C3 and C6 on the same platform BIOS is:: + + Advanced + -> Processor Configuration + -> Processor C3 Advanced + -> Processor Configuration + -> Processor C6 + +Using Linux Core Isolation to Reduce Context Switches +----------------------------------------------------- + +While the threads used by an DPDK application are pinned to logical cores on the system, +it is possible for the Linux scheduler to run other tasks on those cores also. +To help prevent additional workloads from running on those cores, +it is possible to use the ``isolcpus`` Linux kernel parameter to isolate them from the general Linux scheduler. + +For example, if DPDK applications are to run on logical cores 2, 4 and 6, +the following should be added to the kernel parameter list: + +.. code-block:: console + + isolcpus=2,4,6 + +Loading the DPDK KNI Kernel Module +---------------------------------- + +To run the DPDK Kernel NIC Interface (KNI) sample application, an extra kernel module (the kni module) must be loaded into the running kernel. +The module is found in the kmod sub-directory of the DPDK target directory. +Similar to the loading of the ``igb_uio`` module, this module should be loaded using the insmod command as shown below +(assuming that the current directory is the DPDK target directory): + +.. code-block:: console + + insmod kmod/rte_kni.ko + +.. note:: + + See the "Kernel NIC Interface Sample Application" chapter in the *DPDK Sample Applications User Guide* for more details. + +Using Linux IOMMU Pass-Through to Run DPDK with Intel® VT-d +----------------------------------------------------------- + +To enable Intel® VT-d in a Linux kernel, a number of kernel configuration options must be set. These include: + +* ``IOMMU_SUPPORT`` + +* ``IOMMU_API`` + +* ``INTEL_IOMMU`` + +In addition, to run the DPDK with Intel® VT-d, the ``iommu=pt`` kernel parameter must be used when using ``igb_uio`` driver. +This results in pass-through of the DMAR (DMA Remapping) lookup in the host. +Also, if ``INTEL_IOMMU_DEFAULT_ON`` is not set in the kernel, the ``intel_iommu=on`` kernel parameter must be used too. +This ensures that the Intel IOMMU is being initialized as expected. + +Please note that while using ``iommu=pt`` is compulsory for ``igb_uio driver``, the ``vfio-pci`` driver can actually work with both ``iommu=pt`` and ``iommu=on``. diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/index.rst b/src/spdk/dpdk/doc/guides/linux_gsg/index.rst new file mode 100644 index 000000000..0f9f6242c --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/index.rst @@ -0,0 +1,22 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2015 Intel Corporation. + +.. _linux_gsg: + +Getting Started Guide for Linux +=============================== + +.. toctree:: + :maxdepth: 2 + :numbered: + + intro + sys_reqs + build_dpdk + cross_build_dpdk_for_arm64 + linux_drivers + build_sample_apps + linux_eal_parameters + enable_func + quick_start + nic_perf_intel_platform diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/intro.rst b/src/spdk/dpdk/doc/guides/linux_gsg/intro.rst new file mode 100644 index 000000000..94877f4ae --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/intro.rst @@ -0,0 +1,36 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +Introduction +============ + +This document contains instructions for installing and configuring the Data Plane Development Kit (DPDK) software. +It is designed to get customers up and running quickly. +The document describes how to compile and run a DPDK application in a Linux application (linux) environment, +without going deeply into detail. + +Documentation Roadmap +--------------------- + +The following is a list of DPDK documents in the suggested reading order: + +* Release Notes: Provides release-specific information, including supported features, limitations, fixed issues, known issues and so on. + Also, provides the answers to frequently asked questions in FAQ format. + +* Getting Started Guide (this document): Describes how to install and configure the DPDK; designed to get users up and running quickly with the software. + +* Programmer's Guide: Describes: + + * The software architecture and how to use it (through examples), specifically in a Linux application (linux) environment + + * The content of the DPDK, the build system (including the commands that can be used in the root DPDK Makefile to build the development kit and + an application) and guidelines for porting an application + + * Optimizations used in the software and those that should be considered for new development + + A glossary of terms is also provided. + +* API Reference: Provides detailed information about DPDK functions, data structures and other programming constructs. + +* Sample Applications User Guide: Describes a set of sample applications. + Each chapter describes a sample application that showcases specific functionality and provides instructions on how to compile, run and use the sample application. diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/linux_drivers.rst b/src/spdk/dpdk/doc/guides/linux_gsg/linux_drivers.rst new file mode 100644 index 000000000..d40b495c1 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/linux_drivers.rst @@ -0,0 +1,196 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2015 Intel Corporation. + Copyright 2017 Mellanox Technologies, Ltd + All rights reserved. + +.. _linux_gsg_linux_drivers: + +Linux Drivers +============= + +Different PMDs may require different kernel drivers in order to work properly. +Depends on the PMD being used, a corresponding kernel driver should be load +and bind to the network ports. + +UIO +--- + +A small kernel module to set up the device, map device memory to user-space and register interrupts. +In many cases, the standard ``uio_pci_generic`` module included in the Linux kernel +can provide the uio capability. This module can be loaded using the command: + +.. code-block:: console + + sudo modprobe uio_pci_generic + +.. note:: + + ``uio_pci_generic`` module doesn't support the creation of virtual functions. + +As an alternative to the ``uio_pci_generic``, the DPDK also includes the igb_uio +module which can be found in the kmod subdirectory referred to above. It can +be loaded as shown below: + +.. code-block:: console + + sudo modprobe uio + sudo insmod kmod/igb_uio.ko + +.. note:: + + ``igb_uio`` module is disabled by default starting from ``DPDK v20.02``. + To build it, the config option ``CONFIG_RTE_EAL_IGB_UIO`` should be enabled. + It is planned to move ``igb_uio`` module to a different git repository. + +.. note:: + + For some devices which lack support for legacy interrupts, e.g. virtual function + (VF) devices, the ``igb_uio`` module may be needed in place of ``uio_pci_generic``. + +.. note:: + + If UEFI secure boot is enabled, the Linux kernel may disallow the use of + UIO on the system. Therefore, devices for use by DPDK should be bound to the + ``vfio-pci`` kernel module rather than ``igb_uio`` or ``uio_pci_generic``. + For more details see :ref:`linux_gsg_binding_kernel` below. + +.. note:: + + If the devices used for DPDK are bound to the ``uio_pci_generic`` kernel module, + please make sure that the IOMMU is disabled or passthrough. One can add + ``intel_iommu=off`` or ``amd_iommu=off`` or ``intel_iommu=on iommu=pt``in GRUB + command line on x86_64 systems, or add ``iommu.passthrough=1`` on arm64 system. + +Since DPDK release 1.7 onward provides VFIO support, use of UIO is optional +for platforms that support using VFIO. + +VFIO +---- + +A more robust and secure driver in compare to the ``UIO``, relying on IOMMU protection. +To make use of VFIO, the ``vfio-pci`` module must be loaded: + +.. code-block:: console + + sudo modprobe vfio-pci + +Note that in order to use VFIO, your kernel must support it. +VFIO kernel modules have been included in the Linux kernel since version 3.6.0 and are usually present by default, +however please consult your distributions documentation to make sure that is the case. + +Also, to use VFIO, both kernel and BIOS must support and be configured to use IO virtualization (such as Intel® VT-d). + +.. note:: + + ``vfio-pci`` module doesn't support the creation of virtual functions. + +For proper operation of VFIO when running DPDK applications as a non-privileged user, correct permissions should also be set up. +This can be done by using the DPDK setup script (called dpdk-setup.sh and located in the usertools directory). + +.. note:: + + VFIO can be used without IOMMU. While this is just as unsafe as using UIO, it does make it possible for the user to keep the degree of device access and programming that VFIO has, in situations where IOMMU is not available. + +.. _bifurcated_driver: + +Bifurcated Driver +----------------- + +PMDs which use the bifurcated driver co-exists with the device kernel driver. +On such model the NIC is controlled by the kernel, while the data +path is performed by the PMD directly on top of the device. + +Such model has the following benefits: + + - It is secure and robust, as the memory management and isolation + is done by the kernel. + - It enables the user to use legacy linux tools such as ``ethtool`` or + ``ifconfig`` while running DPDK application on the same network ports. + - It enables the DPDK application to filter only part of the traffic, + while the rest will be directed and handled by the kernel driver. + The flow bifurcation is performed by the NIC hardware. + As an example, using :ref:`flow_isolated_mode` allows to choose + strictly what is received in DPDK. + +More about the bifurcated driver can be found in +`Mellanox Bifurcated DPDK PMD +`__. + +.. _linux_gsg_binding_kernel: + +Binding and Unbinding Network Ports to/from the Kernel Modules +-------------------------------------------------------------- + +.. note:: + + PMDs Which use the bifurcated driver should not be unbind from their kernel drivers. this section is for PMDs which use the UIO or VFIO drivers. + +As of release 1.4, DPDK applications no longer automatically unbind all supported network ports from the kernel driver in use. +Instead, in case the PMD being used use the UIO or VFIO drivers, all ports that are to be used by an DPDK application must be bound to the +``uio_pci_generic``, ``igb_uio`` or ``vfio-pci`` module before the application is run. +For such PMDs, any network ports under Linux* control will be ignored and cannot be used by the application. + +To bind ports to the ``uio_pci_generic``, ``igb_uio`` or ``vfio-pci`` module for DPDK use, +and then subsequently return ports to Linux* control, +a utility script called dpdk-devbind.py is provided in the usertools subdirectory. +This utility can be used to provide a view of the current state of the network ports on the system, +and to bind and unbind those ports from the different kernel modules, including the uio and vfio modules. +The following are some examples of how the script can be used. +A full description of the script and its parameters can be obtained by calling the script with the ``--help`` or ``--usage`` options. +Note that the uio or vfio kernel modules to be used, should be loaded into the kernel before +running the ``dpdk-devbind.py`` script. + +.. warning:: + + Due to the way VFIO works, there are certain limitations to which devices can be used with VFIO. + Mainly it comes down to how IOMMU groups work. + Any Virtual Function device can be used with VFIO on its own, but physical devices will require either all ports bound to VFIO, + or some of them bound to VFIO while others not being bound to anything at all. + + If your device is behind a PCI-to-PCI bridge, the bridge will then be part of the IOMMU group in which your device is in. + Therefore, the bridge driver should also be unbound from the bridge PCI device for VFIO to work with devices behind the bridge. + +.. warning:: + + While any user can run the dpdk-devbind.py script to view the status of the network ports, + binding or unbinding network ports requires root privileges. + +To see the status of all network ports on the system: + +.. code-block:: console + + ./usertools/dpdk-devbind.py --status + + Network devices using DPDK-compatible driver + ============================================ + 0000:82:00.0 '82599EB 10-GbE NIC' drv=uio_pci_generic unused=ixgbe + 0000:82:00.1 '82599EB 10-GbE NIC' drv=uio_pci_generic unused=ixgbe + + Network devices using kernel driver + =================================== + 0000:04:00.0 'I350 1-GbE NIC' if=em0 drv=igb unused=uio_pci_generic *Active* + 0000:04:00.1 'I350 1-GbE NIC' if=eth1 drv=igb unused=uio_pci_generic + 0000:04:00.2 'I350 1-GbE NIC' if=eth2 drv=igb unused=uio_pci_generic + 0000:04:00.3 'I350 1-GbE NIC' if=eth3 drv=igb unused=uio_pci_generic + + Other network devices + ===================== + + +To bind device ``eth1``,``04:00.1``, to the ``uio_pci_generic`` driver: + +.. code-block:: console + + ./usertools/dpdk-devbind.py --bind=uio_pci_generic 04:00.1 + +or, alternatively, + +.. code-block:: console + + ./usertools/dpdk-devbind.py --bind=uio_pci_generic eth1 + +To restore device ``82:00.0`` to its original kernel binding: + +.. code-block:: console + + ./usertools/dpdk-devbind.py --bind=ixgbe 82:00.0 diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/linux_eal_parameters.rst b/src/spdk/dpdk/doc/guides/linux_gsg/linux_eal_parameters.rst new file mode 100644 index 000000000..b2cc60e44 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/linux_eal_parameters.rst @@ -0,0 +1,116 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +EAL parameters +============== + +This document contains a list of all EAL parameters. These parameters can be +used by any DPDK application running on Linux. + +Common EAL parameters +--------------------- + +The following EAL parameters are common to all platforms supported by DPDK. + +.. include:: eal_args.include.rst + +Linux-specific EAL parameters +----------------------------- + +In addition to common EAL parameters, there are also Linux-specific EAL +parameters. + +Device-related options +~~~~~~~~~~~~~~~~~~~~~~ + +* ``--create-uio-dev`` + + Create ``/dev/uioX`` files for devices bound to igb_uio kernel driver + (usually done by the igb_uio driver itself). + +* ``--vmware-tsc-map`` + + Use VMware TSC map instead of native RDTSC. + +* ``--no-hpet`` + + Do not use the HPET timer. + +* ``--vfio-intr `` + + Use specified interrupt mode for devices bound to VFIO kernel driver. + +Multiprocessing-related options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ``--file-prefix `` + + Use a different shared data file prefix for a DPDK process. This option + allows running multiple independent DPDK primary/secondary processes under + different prefixes. + +Memory-related options +~~~~~~~~~~~~~~~~~~~~~~ + +* ``--legacy-mem`` + + Use legacy DPDK memory allocation mode. + +* ``--socket-mem `` + + Preallocate specified amounts of memory per socket. The parameter is a + comma-separated list of values. For example:: + + --socket-mem 1024,2048 + + This will allocate 1 gigabyte of memory on socket 0, and 2048 megabytes of + memory on socket 1. + +* ``--socket-limit `` + + Place a per-socket upper limit on memory use (non-legacy memory mode only). + 0 will disable the limit for a particular socket. + +* ``--single-file-segments`` + + Create fewer files in hugetlbfs (non-legacy mode only). + +* ``--huge-dir `` + + Use specified hugetlbfs directory instead of autodetected ones. + +* ``--huge-unlink`` + + Unlink hugepage files after creating them (implies no secondary process + support). + +* ``--match-allocations`` + + Free hugepages back to system exactly as they were originally allocated. + +Other options +~~~~~~~~~~~~~ + +* ``--syslog `` + + Set syslog facility. Valid syslog facilities are:: + + auth + cron + daemon + ftp + kern + lpr + mail + news + syslog + user + uucp + local0 + local1 + local2 + local3 + local4 + local5 + local6 + local7 diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst b/src/spdk/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst new file mode 100644 index 000000000..1dabbce24 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst @@ -0,0 +1,188 @@ +How to get best performance with NICs on Intel platforms +======================================================== + +This document is a step-by-step guide for getting high performance from DPDK applications on Intel platforms. + + +Hardware and Memory Requirements +-------------------------------- + +For best performance use an Intel Xeon class server system such as Ivy Bridge, Haswell or newer. + +Ensure that each memory channel has at least one memory DIMM inserted, and that the memory size for each is at least 4GB. +**Note**: this has one of the most direct effects on performance. + +You can check the memory configuration using ``dmidecode`` as follows:: + + dmidecode -t memory | grep Locator + + Locator: DIMM_A1 + Bank Locator: NODE 1 + Locator: DIMM_A2 + Bank Locator: NODE 1 + Locator: DIMM_B1 + Bank Locator: NODE 1 + Locator: DIMM_B2 + Bank Locator: NODE 1 + ... + Locator: DIMM_G1 + Bank Locator: NODE 2 + Locator: DIMM_G2 + Bank Locator: NODE 2 + Locator: DIMM_H1 + Bank Locator: NODE 2 + Locator: DIMM_H2 + Bank Locator: NODE 2 + +The sample output above shows a total of 8 channels, from ``A`` to ``H``, where each channel has 2 DIMMs. + +You can also use ``dmidecode`` to determine the memory frequency:: + + dmidecode -t memory | grep Speed + + Speed: 2133 MHz + Configured Clock Speed: 2134 MHz + Speed: Unknown + Configured Clock Speed: Unknown + Speed: 2133 MHz + Configured Clock Speed: 2134 MHz + Speed: Unknown + ... + Speed: 2133 MHz + Configured Clock Speed: 2134 MHz + Speed: Unknown + Configured Clock Speed: Unknown + Speed: 2133 MHz + Configured Clock Speed: 2134 MHz + Speed: Unknown + Configured Clock Speed: Unknown + +The output shows a speed of 2133 MHz (DDR4) and Unknown (not existing). +This aligns with the previous output which showed that each channel has one memory bar. + + +Network Interface Card Requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a `DPDK supported `_ high end NIC such as the Intel XL710 40GbE. + +Make sure each NIC has been flashed the latest version of NVM/firmware. + +Use PCIe Gen3 slots, such as Gen3 ``x8`` or Gen3 ``x16`` because PCIe Gen2 slots don't provide enough bandwidth +for 2 x 10GbE and above. +You can use ``lspci`` to check the speed of a PCI slot using something like the following:: + + lspci -s 03:00.1 -vv | grep LnkSta + + LnkSta: Speed 8GT/s, Width x8, TrErr- Train- SlotClk+ DLActive- ... + LnkSta2: Current De-emphasis Level: -6dB, EqualizationComplete+ ... + +When inserting NICs into PCI slots always check the caption, such as CPU0 or CPU1 to indicate which socket it is connected to. + +Care should be take with NUMA. +If you are using 2 or more ports from different NICs, it is best to ensure that these NICs are on the same CPU socket. +An example of how to determine this is shown further below. + + +BIOS Settings +~~~~~~~~~~~~~ + +The following are some recommendations on BIOS settings. Different platforms will have different BIOS naming +so the following is mainly for reference: + +#. Establish the steady state for the system, consider reviewing BIOS settings desired for best performance characteristic e.g. optimize for performance or energy efficiency. + +#. Match the BIOS settings to the needs of the application you are testing. + +#. Typically, **Performance** as the CPU Power and Performance policy is a reasonable starting point. + +#. Consider using Turbo Boost to increase the frequency on cores. + +#. Disable all virtualization options when you test the physical function of the NIC, and turn on VT-d if you wants to use VFIO. + + +Linux boot command line +~~~~~~~~~~~~~~~~~~~~~~~ + +The following are some recommendations on GRUB boot settings: + +#. Use the default grub file as a starting point. + +#. Reserve 1G huge pages via grub configurations. For example to reserve 8 huge pages of 1G size:: + + default_hugepagesz=1G hugepagesz=1G hugepages=8 + +#. Isolate CPU cores which will be used for DPDK. For example:: + + isolcpus=2,3,4,5,6,7,8 + +#. If it wants to use VFIO, use the following additional grub parameters:: + + iommu=pt intel_iommu=on + + +Configurations before running DPDK +---------------------------------- + +1. Build the DPDK target and reserve huge pages. + See the earlier section on :ref:`linux_gsg_hugepages` for more details. + + The following shell commands may help with building and configuration: + + .. code-block:: console + + # Build DPDK target. + cd dpdk_folder + make install T=x86_64-native-linux-gcc -j + + # Get the hugepage size. + awk '/Hugepagesize/ {print $2}' /proc/meminfo + + # Get the total huge page numbers. + awk '/HugePages_Total/ {print $2} ' /proc/meminfo + + # Unmount the hugepages. + umount `awk '/hugetlbfs/ {print $2}' /proc/mounts` + + # Create the hugepage mount folder. + mkdir -p /mnt/huge + + # Mount to the specific folder. + mount -t hugetlbfs nodev /mnt/huge + +2. Check the CPU layout using the DPDK ``cpu_layout`` utility: + + .. code-block:: console + + cd dpdk_folder + + usertools/cpu_layout.py + + Or run ``lscpu`` to check the cores on each socket. + +3. Check your NIC id and related socket id: + + .. code-block:: console + + # List all the NICs with PCI address and device IDs. + lspci -nn | grep Eth + + For example suppose your output was as follows:: + + 82:00.0 Ethernet [0200]: Intel XL710 for 40GbE QSFP+ [8086:1583] + 82:00.1 Ethernet [0200]: Intel XL710 for 40GbE QSFP+ [8086:1583] + 85:00.0 Ethernet [0200]: Intel XL710 for 40GbE QSFP+ [8086:1583] + 85:00.1 Ethernet [0200]: Intel XL710 for 40GbE QSFP+ [8086:1583] + + Check the PCI device related numa node id: + + .. code-block:: console + + cat /sys/bus/pci/devices/0000\:xx\:00.x/numa_node + + Usually ``0x:00.x`` is on socket 0 and ``8x:00.x`` is on socket 1. + **Note**: To get the best performance, ensure that the core and NICs are in the same socket. + In the example above ``85:00.0`` is on socket 1 and should be used by cores on socket 1 for the best performance. + +4. Check which kernel drivers needs to be loaded and whether there is a need to unbind the network ports from their kernel drivers. +More details about DPDK setup and Linux kernel requirements see :ref:`linux_gsg_compiling_dpdk` and :ref:`linux_gsg_linux_drivers`. diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/quick_start.rst b/src/spdk/dpdk/doc/guides/linux_gsg/quick_start.rst new file mode 100644 index 000000000..d7b04ae01 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/quick_start.rst @@ -0,0 +1,304 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +.. _linux_setup_script: + +Quick Start Setup Script +======================== + +The dpdk-setup.sh script, found in the usertools subdirectory, allows the user to perform the following tasks: + +* Build the DPDK libraries + +* Insert and remove the DPDK IGB_UIO kernel module + +* Insert and remove VFIO kernel modules + +* Insert and remove the DPDK KNI kernel module + +* Create and delete hugepages for NUMA and non-NUMA cases + +* View network port status and reserve ports for DPDK application use + +* Set up permissions for using VFIO as a non-privileged user + +* Run the test and testpmd applications + +* Look at hugepages in the meminfo + +* List hugepages in ``/mnt/huge`` + +* Remove built DPDK libraries + +Once these steps have been completed for one of the EAL targets, +the user may compile their own application that links in the EAL libraries to create the DPDK image. + +Script Organization +------------------- + +The dpdk-setup.sh script is logically organized into a series of steps that a user performs in sequence. +Each step provides a number of options that guide the user to completing the desired task. +The following is a brief synopsis of each step. + +**Step 1: Build DPDK Libraries** + +Initially, the user must select a DPDK target to choose the correct target type and compiler options to use when building the libraries. + +The user must have all libraries, modules, updates and compilers installed in the system prior to this, +as described in the earlier chapters in this Getting Started Guide. + +**Step 2: Setup Environment** + +The user configures the Linux* environment to support the running of DPDK applications. +Hugepages can be set up for NUMA or non-NUMA systems. Any existing hugepages will be removed. +The DPDK kernel module that is needed can also be inserted in this step, +and network ports may be bound to this module for DPDK application use. + +**Step 3: Run an Application** + +The user may run the test application once the other steps have been performed. +The test application allows the user to run a series of functional tests for the DPDK. +The testpmd application, which supports the receiving and sending of packets, can also be run. + +**Step 4: Examining the System** + +This step provides some tools for examining the status of hugepage mappings. + +**Step 5: System Cleanup** + +The final step has options for restoring the system to its original state. + +Use Cases +--------- + +The following are some example of how to use the dpdk-setup.sh script. +The script should be run using the source command. +Some options in the script prompt the user for further data before proceeding. + +.. warning:: + + The dpdk-setup.sh script should be run with root privileges. + +.. code-block:: console + + source usertools/dpdk-setup.sh + + ------------------------------------------------------------------------ + + RTE_SDK exported as /home/user/rte + + ------------------------------------------------------------------------ + + Step 1: Select the DPDK environment to build + + ------------------------------------------------------------------------ + + [1] i686-native-linux-gcc + + [2] i686-native-linux-icc + + [3] ppc_64-power8-linux-gcc + + [4] x86_64-native-freebsd-clang + + [5] x86_64-native-freebsd-gcc + + [6] x86_64-native-linux-clang + + [7] x86_64-native-linux-gcc + + [8] x86_64-native-linux-icc + + ------------------------------------------------------------------------ + + Step 2: Setup linux environment + + ------------------------------------------------------------------------ + + [11] Insert IGB UIO module + + [12] Insert VFIO module + + [13] Insert KNI module + + [14] Setup hugepage mappings for non-NUMA systems + + [15] Setup hugepage mappings for NUMA systems + + [16] Display current Ethernet device settings + + [17] Bind Ethernet device to IGB UIO module + + [18] Bind Ethernet device to VFIO module + + [19] Setup VFIO permissions + + ------------------------------------------------------------------------ + + Step 3: Run test application for linux environment + + ------------------------------------------------------------------------ + + [20] Run test application ($RTE_TARGET/app/test) + + [21] Run testpmd application in interactive mode ($RTE_TARGET/app/testpmd) + + ------------------------------------------------------------------------ + + Step 4: Other tools + + ------------------------------------------------------------------------ + + [22] List hugepage info from /proc/meminfo + + ------------------------------------------------------------------------ + + Step 5: Uninstall and system cleanup + + ------------------------------------------------------------------------ + + [23] Uninstall all targets + + [24] Unbind NICs from IGB UIO driver + + [25] Remove IGB UIO module + + [26] Remove VFIO module + + [27] Remove KNI module + + [28] Remove hugepage mappings + + [29] Exit Script + +Option: + +The following selection demonstrates the creation of the ``x86_64-native-linux-gcc`` DPDK library. + +.. code-block:: console + + Option: 9 + + ================== Installing x86_64-native-linux-gcc + + Configuration done + == Build lib + ... + Build complete + RTE_TARGET exported as x86_64-native-linux-gcc + +The following selection demonstrates the starting of the DPDK UIO driver. + +.. code-block:: console + + Option: 25 + + Unloading any existing DPDK UIO module + Loading DPDK UIO module + +The following selection demonstrates the creation of hugepages in a NUMA system. +1024 2 MByte pages are assigned to each node. +The result is that the application should use -m 4096 for starting the application to access both memory areas +(this is done automatically if the -m option is not provided). + +.. note:: + + If prompts are displayed to remove temporary files, type 'y'. + +.. code-block:: console + + Option: 15 + + Removing currently reserved hugepages + mounting /mnt/huge and removing directory + Input the number of 2MB pages for each node + Example: to have 128MB of hugepages available per node, + enter '64' to reserve 64 * 2MB pages on each node + Number of pages for node0: 1024 + Number of pages for node1: 1024 + Reserving hugepages + Creating /mnt/huge and mounting as hugetlbfs + +The following selection demonstrates the launch of the test application to run on a single core. + +.. code-block:: console + + Option: 20 + + Enter hex bitmask of cores to execute test app on + Example: to execute app on cores 0 to 7, enter 0xff + bitmask: 0x01 + Launching app + EAL: coremask set to 1 + EAL: Detected lcore 0 on socket 0 + ... + EAL: Master core 0 is ready (tid=1b2ad720) + RTE>> + +Applications +------------ + +Once the user has run the dpdk-setup.sh script, built one of the EAL targets and set up hugepages (if using one of the Linux EAL targets), +the user can then move on to building and running their application or one of the examples provided. + +The examples in the /examples directory provide a good starting point to gain an understanding of the operation of the DPDK. +The following command sequence shows how the helloworld sample application is built and run. +As recommended in Section 4.2.1 , "Logical Core Use by Applications", +the logical core layout of the platform should be determined when selecting a core mask to use for an application. + +.. code-block:: console + + cd helloworld/ + make + CC main.o + LD helloworld + INSTALL-APP helloworld + INSTALL-MAP helloworld.map + + sudo ./build/app/helloworld -l 0-3 -n 3 + [sudo] password for rte: + + EAL: coremask set to f + EAL: Detected lcore 0 as core 0 on socket 0 + EAL: Detected lcore 1 as core 0 on socket 1 + EAL: Detected lcore 2 as core 1 on socket 0 + EAL: Detected lcore 3 as core 1 on socket 1 + EAL: Setting up hugepage memory... + EAL: Ask a virtual area of 0x200000 bytes + EAL: Virtual area found at 0x7f0add800000 (size = 0x200000) + EAL: Ask a virtual area of 0x3d400000 bytes + EAL: Virtual area found at 0x7f0aa0200000 (size = 0x3d400000) + EAL: Ask a virtual area of 0x400000 bytes + EAL: Virtual area found at 0x7f0a9fc00000 (size = 0x400000) + EAL: Ask a virtual area of 0x400000 bytes + EAL: Virtual area found at 0x7f0a9f600000 (size = 0x400000) + EAL: Ask a virtual area of 0x400000 bytes + EAL: Virtual area found at 0x7f0a9f000000 (size = 0x400000) + EAL: Ask a virtual area of 0x800000 bytes + EAL: Virtual area found at 0x7f0a9e600000 (size = 0x800000) + EAL: Ask a virtual area of 0x800000 bytes + EAL: Virtual area found at 0x7f0a9dc00000 (size = 0x800000) + EAL: Ask a virtual area of 0x400000 bytes + EAL: Virtual area found at 0x7f0a9d600000 (size = 0x400000) + EAL: Ask a virtual area of 0x400000 bytes + EAL: Virtual area found at 0x7f0a9d000000 (size = 0x400000) + EAL: Ask a virtual area of 0x400000 bytes + EAL: Virtual area found at 0x7f0a9ca00000 (size = 0x400000) + EAL: Ask a virtual area of 0x200000 bytes + EAL: Virtual area found at 0x7f0a9c600000 (size = 0x200000) + EAL: Ask a virtual area of 0x200000 bytes + EAL: Virtual area found at 0x7f0a9c200000 (size = 0x200000) + EAL: Ask a virtual area of 0x3fc00000 bytes + EAL: Virtual area found at 0x7f0a5c400000 (size = 0x3fc00000) + EAL: Ask a virtual area of 0x200000 bytes + EAL: Virtual area found at 0x7f0a5c000000 (size = 0x200000) + EAL: Requesting 1024 pages of size 2MB from socket 0 + EAL: Requesting 1024 pages of size 2MB from socket 1 + EAL: Master core 0 is ready (tid=de25b700) + EAL: Core 1 is ready (tid=5b7fe700) + EAL: Core 3 is ready (tid=5a7fc700) + EAL: Core 2 is ready (tid=5affd700) + hello from core 1 + hello from core 2 + hello from core 3 + hello from core 0 diff --git a/src/spdk/dpdk/doc/guides/linux_gsg/sys_reqs.rst b/src/spdk/dpdk/doc/guides/linux_gsg/sys_reqs.rst new file mode 100644 index 000000000..a124656bc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/linux_gsg/sys_reqs.rst @@ -0,0 +1,219 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +System Requirements +=================== + +This chapter describes the packages required to compile the DPDK. + +.. note:: + + If the DPDK is being used on an Intel® Communications Chipset 89xx Series platform, + please consult the *Intel® Communications Chipset 89xx Series Software for Linux Getting Started Guide*. + +BIOS Setting Prerequisite on x86 +-------------------------------- + +For the majority of platforms, no special BIOS settings are needed to use basic DPDK functionality. +However, for additional HPET timer and power management functionality, +and high performance of small packets, BIOS setting changes may be needed. +Consult the section on :ref:`Enabling Additional Functionality ` +for more information on the required changes. + +.. note:: + + If UEFI secure boot is enabled, the Linux kernel may disallow the use of + UIO on the system. Therefore, devices for use by DPDK should be bound to the + ``vfio-pci`` kernel module rather than ``igb_uio`` or ``uio_pci_generic``. + For more details see :ref:`linux_gsg_binding_kernel`. + +Compilation of the DPDK +----------------------- + +**Required Tools and Libraries:** + +.. note:: + + The setup commands and installed packages needed on various systems may be different. + For details on Linux distributions and the versions tested, please consult the DPDK Release Notes. + +* General development tools including ``make``, and a supported C compiler such as ``gcc`` (version 4.9+) or ``clang`` (version 3.4+). + + * For RHEL/Fedora systems these can be installed using ``dnf groupinstall "Development Tools"`` + + * For Ubuntu/Debian systems these can be installed using ``apt install build-essential`` + +* Python, recommended version 3.5+. + + * Python v3.5+ is needed to build DPDK using meson and ninja + + * Python 2.7+ or 3.2+, to use various helper scripts included in the DPDK package. + +* Meson (version 0.47.1+) and ninja + + * ``meson`` & ``ninja-build`` packages in most Linux distributions + + * If the packaged version is below the minimum version, the latest versions + can be installed from Python's "pip" repository: ``pip3 install meson ninja`` + +* Library for handling NUMA (Non Uniform Memory Access). + + * ``numactl-devel`` in RHEL/Fedora; + + * ``libnuma-dev`` in Debian/Ubuntu; + +* Linux kernel headers or sources required to build kernel modules. + +.. note:: + + Please ensure that the latest patches are applied to third party libraries + and software to avoid any known vulnerabilities. + + +**Optional Tools:** + +* Intel® C++ Compiler (icc). For installation, additional libraries may be required. + See the icc Installation Guide found in the Documentation directory under the compiler installation. + +* IBM® Advance ToolChain for Powerlinux. This is a set of open source development tools and runtime libraries + which allows users to take leading edge advantage of IBM's latest POWER hardware features on Linux. To install + it, see the IBM official installation document. + +**Additional Libraries** + +A number of DPDK components, such as libraries and poll-mode drivers (PMDs) have additional dependencies. +For DPDK builds using meson, the presence or absence of these dependencies will be +automatically detected enabling or disabling the relevant components appropriately. + +For builds using make, these components are disabled in the default configuration and +need to be enabled manually by changing the relevant setting to "y" in the build configuration file +i.e. the ``.config`` file in the build folder. + +In each case, the relevant library development package (``-devel`` or ``-dev``) is needed to build the DPDK components. + +For libraries the additional dependencies include: + +* libarchive: for some unit tests using tar to get their resources. + +* libelf: to compile and use the bpf library. + +For poll-mode drivers, the additional dependencies for each driver can be +found in that driver's documentation in the relevant DPDK guide document, +e.g. :doc:`../nics/index` + + +Running DPDK Applications +------------------------- + +To run an DPDK application, some customization may be required on the target machine. + +System Software +~~~~~~~~~~~~~~~ + +**Required:** + +* Kernel version >= 3.16 + + The kernel version required is based on the oldest long term stable kernel available + at kernel.org when the DPDK version is in development. + Compatibility for recent distribution kernels will be kept, notably RHEL/CentOS 7. + + The kernel version in use can be checked using the command:: + + uname -r + +* glibc >= 2.7 (for features related to cpuset) + + The version can be checked using the ``ldd --version`` command. + +* Kernel configuration + + In the Fedora OS and other common distributions, such as Ubuntu, or Red Hat Enterprise Linux, + the vendor supplied kernel configurations can be used to run most DPDK applications. + + For other kernel builds, options which should be enabled for DPDK include: + + * HUGETLBFS + + * PROC_PAGE_MONITOR support + + * HPET and HPET_MMAP configuration options should also be enabled if HPET support is required. + See the section on :ref:`High Precision Event Timer (HPET) Functionality ` for more details. + +.. _linux_gsg_hugepages: + +Use of Hugepages in the Linux Environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Hugepage support is required for the large memory pool allocation used for packet buffers +(the HUGETLBFS option must be enabled in the running kernel as indicated the previous section). +By using hugepage allocations, performance is increased since fewer pages are needed, +and therefore less Translation Lookaside Buffers (TLBs, high speed translation caches), +which reduce the time it takes to translate a virtual page address to a physical page address. +Without hugepages, high TLB miss rates would occur with the standard 4k page size, slowing performance. + +Reserving Hugepages for DPDK Use +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The allocation of hugepages should be done at boot time or as soon as possible after system boot +to prevent memory from being fragmented in physical memory. +To reserve hugepages at boot time, a parameter is passed to the Linux kernel on the kernel command line. + +For 2 MB pages, just pass the hugepages option to the kernel. For example, to reserve 1024 pages of 2 MB, use:: + + hugepages=1024 + +For other hugepage sizes, for example 1G pages, the size must be specified explicitly and +can also be optionally set as the default hugepage size for the system. +For example, to reserve 4G of hugepage memory in the form of four 1G pages, the following options should be passed to the kernel:: + + default_hugepagesz=1G hugepagesz=1G hugepages=4 + +.. note:: + + The hugepage sizes that a CPU supports can be determined from the CPU flags on Intel architecture. + If pse exists, 2M hugepages are supported; if pdpe1gb exists, 1G hugepages are supported. + On IBM Power architecture, the supported hugepage sizes are 16MB and 16GB. + +.. note:: + + For 64-bit applications, it is recommended to use 1 GB hugepages if the platform supports them. + +In the case of a dual-socket NUMA system, +the number of hugepages reserved at boot time is generally divided equally between the two sockets +(on the assumption that sufficient memory is present on both sockets). + +See the Documentation/admin-guide/kernel-parameters.txt file in your Linux source tree for further details of these and other kernel options. + +**Alternative:** + +For 2 MB pages, there is also the option of allocating hugepages after the system has booted. +This is done by echoing the number of hugepages required to a nr_hugepages file in the ``/sys/devices/`` directory. +For a single-node system, the command to use is as follows (assuming that 1024 pages are required):: + + echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + +On a NUMA machine, pages should be allocated explicitly on separate nodes:: + + echo 1024 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages + echo 1024 > /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages + +.. note:: + + For 1G pages, it is not possible to reserve the hugepage memory after the system has booted. + +Using Hugepages with the DPDK +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Once the hugepage memory is reserved, to make the memory available for DPDK use, perform the following steps:: + + mkdir /mnt/huge + mount -t hugetlbfs nodev /mnt/huge + +The mount point can be made permanent across reboots, by adding the following line to the ``/etc/fstab`` file:: + + nodev /mnt/huge hugetlbfs defaults 0 0 + +For 1GB pages, the page size must be specified as a mount option:: + + nodev /mnt/huge_1GB hugetlbfs pagesize=1GB 0 0 diff --git a/src/spdk/dpdk/doc/guides/mempool/index.rst b/src/spdk/dpdk/doc/guides/mempool/index.rst new file mode 100644 index 000000000..756610264 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/mempool/index.rst @@ -0,0 +1,15 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Cavium, Inc + +Mempool Device Driver +===================== + +The following are a list of mempool PMDs, which can be used from an +application through the mempool API. + +.. toctree:: + :maxdepth: 2 + :numbered: + + octeontx + octeontx2 diff --git a/src/spdk/dpdk/doc/guides/mempool/octeontx.rst b/src/spdk/dpdk/doc/guides/mempool/octeontx.rst new file mode 100644 index 000000000..dfa1993e1 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/mempool/octeontx.rst @@ -0,0 +1,74 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Cavium, Inc + +OCTEON TX FPAVF Mempool Driver +============================== + +The OCTEON TX FPAVF PMD (**librte_mempool_octeontx**) is a mempool +driver for offload mempool device found in **Cavium OCTEON TX** SoC +family. + +More information can be found at `Cavium, Inc Official Website +`_. + +Features +-------- + +Features of the OCTEON TX FPAVF PMD are: + +- 32 SR-IOV Virtual functions +- 32 Pools +- HW mempool manager + +Supported OCTEON TX SoCs +------------------------ + +- CN83xx + +Prerequisites +------------- + +See :doc: `../platform/octeontx.rst` for setup information. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_MBUF_DEFAULT_MEMPOOL_OPS`` ( set to ``octeontx_fpavf``) + + Set default mempool ops to octeontx_fpavf. + +- ``CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL`` (default ``y``) + + Toggle compilation of the ``librte_mempool_octeontx`` driver. + +Driver Compilation +~~~~~~~~~~~~~~~~~~ + +To compile the OCTEON TX FPAVF MEMPOOL PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-thunderx-linux-gcc + + +Initialization +-------------- + +The OCTEON TX fpavf mempool initialization similar to other mempool +drivers like ring. However user need to pass --base-virtaddr as +command line input to application example test_mempool.c application. + +Example: + +.. code-block:: console + + ./build/app/test -c 0xf --base-virtaddr=0x100000000000 \ + --mbuf-pool-ops-name="octeontx_fpavf" diff --git a/src/spdk/dpdk/doc/guides/mempool/octeontx2.rst b/src/spdk/dpdk/doc/guides/mempool/octeontx2.rst new file mode 100644 index 000000000..49b45a04e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/mempool/octeontx2.rst @@ -0,0 +1,100 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Marvell International Ltd. + +OCTEON TX2 NPA Mempool Driver +============================= + +The OCTEON TX2 NPA PMD (**librte_mempool_octeontx2**) provides mempool +driver support for the integrated mempool device found in **Marvell OCTEON TX2** SoC family. + +More information about OCTEON TX2 SoC can be found at `Marvell Official Website +`_. + +Features +-------- + +OCTEON TX2 NPA PMD supports: + +- Up to 128 NPA LFs +- 1M Pools per LF +- HW mempool manager +- Ethdev Rx buffer allocation in HW to save CPU cycles in the Rx path. +- Ethdev Tx buffer recycling in HW to save CPU cycles in the Tx path. + +Prerequisites and Compilation procedure +--------------------------------------- + + See :doc:`../platform/octeontx2` for setup information. + +Pre-Installation Configuration +------------------------------ + +Compile time Config Options +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following option can be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_OCTEONTX2_MEMPOOL`` (default ``y``) + + Toggle compilation of the ``librte_mempool_octeontx2`` driver. + +Runtime Config Options +~~~~~~~~~~~~~~~~~~~~~~ + +- ``Maximum number of mempools per application`` (default ``128``) + + The maximum number of mempools per application needs to be configured on + HW during mempool driver initialization. HW can support up to 1M mempools, + Since each mempool costs set of HW resources, the ``max_pools`` ``devargs`` + parameter is being introduced to configure the number of mempools required + for the application. + For example:: + + -w 0002:02:00.0,max_pools=512 + + With the above configuration, the driver will set up only 512 mempools for + the given application to save HW resources. + +.. note:: + + Since this configuration is per application, the end user needs to + provide ``max_pools`` parameter to the first PCIe device probed by the given + application. + +- ``Lock NPA contexts in NDC`` + + Lock NPA aura and pool contexts in NDC cache. + The device args take hexadecimal bitmask where each bit represent the + corresponding aura/pool id. + + For example:: + + -w 0002:02:00.0,npa_lock_mask=0xf + +Debugging Options +~~~~~~~~~~~~~~~~~ + +.. _table_octeontx2_mempool_debug_options: + +.. table:: OCTEON TX2 mempool debug options + + +---+------------+-------------------------------------------------------+ + | # | Component | EAL log command | + +===+============+=======================================================+ + | 1 | NPA | --log-level='pmd\.mempool.octeontx2,8' | + +---+------------+-------------------------------------------------------+ + +Standalone mempool device +~~~~~~~~~~~~~~~~~~~~~~~~~ + + The ``usertools/dpdk-devbind.py`` script shall enumerate all the mempool devices + available in the system. In order to avoid, the end user to bind the mempool + device prior to use ethdev and/or eventdev device, the respective driver + configures an NPA LF and attach to the first probed ethdev or eventdev device. + In case, if end user need to run mempool as a standalone device + (without ethdev or eventdev), end user needs to bind a mempool device using + ``usertools/dpdk-devbind.py`` + + Example command to run ``mempool_autotest`` test with standalone OCTEONTX2 NPA device:: + + echo "mempool_autotest" | build/app/test -c 0xf0 --mbuf-pool-ops-name="octeontx2_npa" diff --git a/src/spdk/dpdk/doc/guides/meson.build b/src/spdk/dpdk/doc/guides/meson.build new file mode 100644 index 000000000..732e7ad3a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/meson.build @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +sphinx = find_program('sphinx-build', required: get_option('enable_docs')) + +if not sphinx.found() + subdir_done() +endif + +htmldir = join_paths(get_option('datadir'), 'doc', 'dpdk') +html_guides = custom_target('html_guides', + input: files('index.rst'), + output: 'html', + command: [sphinx_wrapper, sphinx, meson.current_source_dir(), meson.current_build_dir()], + depfile: '.html.d', + build_by_default: get_option('enable_docs'), + install: get_option('enable_docs'), + install_dir: htmldir) + +install_data(files('custom.css'), + install_dir: join_paths(htmldir,'_static', 'css')) + +doc_targets += html_guides +doc_target_names += 'HTML_Guides' diff --git a/src/spdk/dpdk/doc/guides/nics/af_packet.rst b/src/spdk/dpdk/doc/guides/nics/af_packet.rst new file mode 100644 index 000000000..efd6f1ca7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/af_packet.rst @@ -0,0 +1,67 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +AF_PACKET Poll Mode Driver +========================== + +The AF_PACKET socket in Linux allows an application to receive and send raw +packets. This Linux-specific PMD driver binds to an AF_PACKET socket and allows +a DPDK application to send and receive raw packets through the Kernel. + +In order to improve Rx and Tx performance this implementation makes use of +PACKET_MMAP, which provides a mmap'ed ring buffer, shared between user space +and kernel, that's used to send and receive packets. This helps reducing system +calls and the copies needed between user space and Kernel. + +The PACKET_FANOUT_HASH behavior of AF_PACKET is used for frame reception. + +Options and inherent limitations +-------------------------------- + +The following options can be provided to set up an af_packet port in DPDK. +Some of these, in turn, will be used to configure the PACKET_MMAP settings. + +* ``iface`` - name of the Kernel interface to attach to (required); +* ``qpairs`` - number of Rx and Tx queues (optional, default 1); +* ``qdisc_bypass`` - set PACKET_QDISC_BYPASS option in AF_PACKET (optional, + disabled by default); +* ``blocksz`` - PACKET_MMAP block size (optional, default 4096); +* ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple + of 16B); +* ``framecnt`` - PACKET_MMAP frame count (optional, default 512). + +Because this implementation is based on PACKET_MMAP, and PACKET_MMAP has its +own pre-requisites, it should be noted that the inner workings of PACKET_MMAP +should be carefully considered before modifying some of these options (namely, +``blocksz``, ``framesz`` and ``framecnt`` above). + +As an example, if one changes ``framesz`` to be 1024B, it is expected that +``blocksz`` is set to at least 1024B as well (although 2048B in this case would +allow two "frames" per "block"). + +This restriction happens because PACKET_MMAP expects each single "frame" to fit +inside of a "block". And although multiple "frames" can fit inside of a single +"block", a "frame" may not span across two "blocks". + +For the full details behind PACKET_MMAP's structures and settings, consider +reading the `PACKET_MMAP documentation in the Kernel +`_. + +Prerequisites +------------- + +This is a Linux-specific PMD, thus the following prerequisites apply: + +* A Linux Kernel; +* A Kernel bound interface to attach to (e.g. a tap interface). + +Set up an af_packet interface +----------------------------- + +The following example will set up an af_packet interface in DPDK with the +default options described above (blocksz=4096B, framesz=2048B and +framecnt=512): + +.. code-block:: console + + --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0 diff --git a/src/spdk/dpdk/doc/guides/nics/af_xdp.rst b/src/spdk/dpdk/doc/guides/nics/af_xdp.rst new file mode 100644 index 000000000..07bdd29e2 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/af_xdp.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Intel Corporation. + +AF_XDP Poll Mode Driver +========================== + +AF_XDP is an address family that is optimized for high performance +packet processing. AF_XDP sockets enable the possibility for XDP program to +redirect packets to a memory buffer in userspace. + +For the full details behind AF_XDP socket, you can refer to +`AF_XDP documentation in the Kernel +`_. + +This Linux-specific PMD driver creates the AF_XDP socket and binds it to a +specific netdev queue, it allows a DPDK application to send and receive raw +packets through the socket which would bypass the kernel network stack. +Current implementation only supports single queue, multi-queues feature will +be added later. + +AF_XDP PMD enables need_wakeup flag by default if it is supported. This +need_wakeup feature is used to support executing application and driver on the +same core efficiently. This feature not only has a large positive performance +impact for the one core case, but also does not degrade 2 core performance and +actually improves it for Tx heavy workloads. + +Options +------- + +The following options can be provided to set up an af_xdp port in DPDK. + +* ``iface`` - name of the Kernel interface to attach to (required); +* ``start_queue`` - starting netdev queue id (optional, default 0); +* ``queue_count`` - total netdev queue number (optional, default 1); + +Prerequisites +------------- + +This is a Linux-specific PMD, thus the following prerequisites apply: + +* A Linux Kernel (version > v4.18) with XDP sockets configuration enabled; +* libbpf (within kernel version > v5.1-rc4) with latest af_xdp support installed, + User can install libbpf via `make install_lib` && `make install_headers` in + /tools/lib/bpf; +* A Kernel bound interface to attach to; +* For need_wakeup feature, it requires kernel version later than v5.3-rc1; +* For PMD zero copy, it requires kernel version later than v5.4-rc1; + +Set up an af_xdp interface +----------------------------- + +The following example will set up an af_xdp interface in DPDK: + +.. code-block:: console + + --vdev net_af_xdp,iface=ens786f1 + +Limitations +----------- + +- **MTU** + + The MTU of the AF_XDP PMD is limited due to the XDP requirement of one packet + per page. In the PMD we report the maximum MTU for zero copy to be equal + to the page size less the frame overhead introduced by AF_XDP (XDP HR = 256) + and DPDK (frame headroom = 320). With a 4K page size this works out at 3520. + However in practice this value may be even smaller, due to differences between + the supported RX buffer sizes of the underlying kernel netdev driver. + + For example, the largest RX buffer size supported by the underlying kernel driver + which is less than the page size (4096B) may be 3072B. In this case, the maximum + MTU value will be at most 3072, but likely even smaller than this, once relevant + headers are accounted for eg. Ethernet and VLAN. + + To determine the actual maximum MTU value of the interface you are using with the + AF_XDP PMD, consult the documentation for the kernel driver. + + Note: The AF_XDP PMD will fail to initialise if an MTU which violates the driver's + conditions as above is set prior to launching the application. diff --git a/src/spdk/dpdk/doc/guides/nics/ark.rst b/src/spdk/dpdk/doc/guides/nics/ark.rst new file mode 100644 index 000000000..06e8c3374 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/ark.rst @@ -0,0 +1,231 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright (c) 2015-2017 Atomic Rules LLC + All rights reserved. + +ARK Poll Mode Driver +==================== + +The ARK PMD is a DPDK poll-mode driver for the Atomic Rules Arkville +(ARK) family of devices. + +More information can be found at the `Atomic Rules website +`_. + +Overview +-------- + +The Atomic Rules Arkville product is DPDK and AXI compliant product +that marshals packets across a PCIe conduit between host DPDK mbufs and +FPGA AXI streams. + +The ARK PMD, and the spirit of the overall Arkville product, +has been to take the DPDK API/ABI as a fixed specification; +then implement much of the business logic in FPGA RTL circuits. +The approach of *working backwards* from the DPDK API/ABI and having +the GPP host software *dictate*, while the FPGA hardware *copes*, +results in significant performance gains over a naive implementation. + +While this document describes the ARK PMD software, it is helpful to +understand what the FPGA hardware is and is not. The Arkville RTL +component provides a single PCIe Physical Function (PF) supporting +some number of RX/Ingress and TX/Egress Queues. The ARK PMD controls +the Arkville core through a dedicated opaque Core BAR (CBAR). +To allow users full freedom for their own FPGA application IP, +an independent FPGA Application BAR (ABAR) is provided. + +One popular way to imagine Arkville's FPGA hardware aspect is as the +FPGA PCIe-facing side of a so-called Smart NIC. The Arkville core does +not contain any MACs, and is link-speed independent, as well as +agnostic to the number of physical ports the application chooses to +use. The ARK driver exposes the familiar PMD interface to allow packet +movement to and from mbufs across multiple queues. + +However FPGA RTL applications could contain a universe of added +functionality that an Arkville RTL core does not provide or can +not anticipate. To allow for this expectation of user-defined +innovation, the ARK PMD provides a dynamic mechanism of adding +capabilities without having to modify the ARK PMD. + +The ARK PMD is intended to support all instances of the Arkville +RTL Core, regardless of configuration, FPGA vendor, or target +board. While specific capabilities such as number of physical +hardware queue-pairs are negotiated; the driver is designed to +remain constant over a broad and extendable feature set. + +Intentionally, Arkville by itself DOES NOT provide common NIC +capabilities such as offload or receive-side scaling (RSS). +These capabilities would be viewed as a gate-level "tax" on +Green-box FPGA applications that do not require such function. +Instead, they can be added as needed with essentially no +overhead to the FPGA Application. + +The ARK PMD also supports optional user extensions, through dynamic linking. +The ARK PMD user extensions are a feature of Arkville’s DPDK +net/ark poll mode driver, allowing users to add their +own code to extend the net/ark functionality without +having to make source code changes to the driver. One motivation for +this capability is that while DPDK provides a rich set of functions +to interact with NIC-like capabilities (e.g. MAC addresses and statistics), +the Arkville RTL IP does not include a MAC. Users can supply their +own MAC or custom FPGA applications, which may require control from +the PMD. The user extension is the means providing the control +between the user's FPGA application and the existing DPDK features via +the PMD. + +Device Parameters +------------------- + +The ARK PMD supports device parameters that are used for packet +routing and for internal packet generation and packet checking. This +section describes the supported parameters. These features are +primarily used for diagnostics, testing, and performance verification +under the guidance of an Arkville specialist. The nominal use of +Arkville does not require any configuration using these parameters. + +"Pkt_dir" + +The Packet Director controls connectivity between Arkville's internal +hardware components. The features of the Pkt_dir are only used for +diagnostics and testing; it is not intended for nominal use. The full +set of features are not published at this level. + +Format: +Pkt_dir=0x00110F10 + +"Pkt_gen" + +The packet generator parameter takes a file as its argument. The file +contains configuration parameters used internally for regression +testing and are not intended to be published at this level. The +packet generator is an internal Arkville hardware component. + +Format: +Pkt_gen=./config/pg.conf + +"Pkt_chkr" + +The packet checker parameter takes a file as its argument. The file +contains configuration parameters used internally for regression +testing and are not intended to be published at this level. The +packet checker is an internal Arkville hardware component. + +Format: +Pkt_chkr=./config/pc.conf + + +Data Path Interface +------------------- + +Ingress RX and Egress TX operation is by the nominal DPDK API . +The driver supports single-port, multi-queue for both RX and TX. + +Configuration Information +------------------------- + +**DPDK Configuration Parameters** + + The following configuration options are available for the ARK PMD: + + * **CONFIG_RTE_LIBRTE_ARK_PMD** (default y): Enables or disables inclusion + of the ARK PMD driver in the DPDK compilation. + + * **CONFIG_RTE_LIBRTE_ARK_PAD_TX** (default y): When enabled TX + packets are padded to 60 bytes to support downstream MACS. + + * **CONFIG_RTE_LIBRTE_ARK_DEBUG_RX** (default n): Enables or disables debug + logging and internal checking of RX ingress logic within the ARK PMD driver. + + * **CONFIG_RTE_LIBRTE_ARK_DEBUG_TX** (default n): Enables or disables debug + logging and internal checking of TX egress logic within the ARK PMD driver. + + * **CONFIG_RTE_LIBRTE_ARK_DEBUG_STATS** (default n): Enables or disables debug + logging of detailed packet and performance statistics gathered in + the PMD and FPGA. + + * **CONFIG_RTE_LIBRTE_ARK_DEBUG_TRACE** (default n): Enables or disables debug + logging of detailed PMD events and status. + + +Building DPDK +------------- + +See the :ref:`DPDK Getting Started Guide for Linux ` for +instructions on how to build DPDK. + +By default the ARK PMD library will be built into the DPDK library. + +For configuring and using UIO and VFIO frameworks, please also refer :ref:`the +documentation that comes with DPDK suite `. + +Supported ARK RTL PCIe Instances +-------------------------------- + +ARK PMD supports the following Arkville RTL PCIe instances including: + +* ``1d6c:100d`` - AR-ARKA-FX0 [Arkville 32B DPDK Data Mover] +* ``1d6c:100e`` - AR-ARKA-FX1 [Arkville 64B DPDK Data Mover] + +Supported Operating Systems +--------------------------- + +Any Linux distribution fulfilling the conditions described in ``System Requirements`` +section of :ref:`the DPDK documentation ` or refer to *DPDK +Release Notes*. ARM and PowerPC architectures are not supported at this time. + + +Supported Features +------------------ + +* Dynamic ARK PMD extensions +* Multiple receive and transmit queues +* Jumbo frames up to 9K +* Hardware Statistics + +Unsupported Features +-------------------- + +Features that may be part of, or become part of, the Arkville RTL IP that are +not currently supported or exposed by the ARK PMD include: + +* PCIe SR-IOV Virtual Functions (VFs) +* Arkville's Packet Generator Control and Status +* Arkville's Packet Director Control and Status +* Arkville's Packet Checker Control and Status +* Arkville's Timebase Management + +Pre-Requisites +-------------- + +#. Prepare the system as recommended by DPDK suite. This includes environment + variables, hugepages configuration, tool-chains and configuration + +#. Insert igb_uio kernel module using the command 'modprobe igb_uio' + +#. Bind the intended ARK device to igb_uio module + +At this point the system should be ready to run DPDK applications. Once the +application runs to completion, the ARK PMD can be detached from igb_uio if necessary. + +Usage Example +------------- + +Follow instructions available in the document +:ref:`compiling and testing a PMD for a NIC ` to launch +**testpmd** with Atomic Rules ARK devices managed by librte_pmd_ark. + +Example output: + +.. code-block:: console + + [...] + EAL: PCI device 0000:01:00.0 on NUMA socket -1 + EAL: probe driver: 1d6c:100e rte_ark_pmd + EAL: PCI memory mapped at 0x7f9b6c400000 + PMD: eth_ark_dev_init(): Initializing 0:2:0.1 + ARKP PMD CommitID: 378f3a67 + Configuring Port 0 (socket 0) + Port 0: DC:3C:F6:00:00:01 + Checking link statuses... + Port 0 Link Up - speed 100000 Mbps - full-duplex + Done + testpmd> diff --git a/src/spdk/dpdk/doc/guides/nics/atlantic.rst b/src/spdk/dpdk/doc/guides/nics/atlantic.rst new file mode 100644 index 000000000..3f3f2949f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/atlantic.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Aquantia Corporation. + +Aquantia Atlantic DPDK Driver +============================= + +Atlantic DPDK driver provides DPDK support for Aquantia's AQtion family of chipsets: AQC107/AQC108/AQC109 + +More information can be found at `Aquantia Official Website +`_. + +Supported features +^^^^^^^^^^^^^^^^^^ + +- Base L2 features +- Promiscuous mode +- Multicast mode +- Port statistics +- RSS (Receive Side Scaling) +- Checksum offload +- Jumbo Frame up to 16K +- MACSEC offload + +Experimental API features +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- MACSEC PMD API is considered as experimental and is subject to change/removal in next DPDK releases. + +Configuration Information +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``CONFIG_RTE_LIBRTE_ATLANTIC_PMD`` (default ``y``) + +Application Programming Interface +--------------------------------- + +Limitations or Known issues +--------------------------- + +Statistics +~~~~~~~~~~ + +MTU setting +~~~~~~~~~~~ + +Atlantic NIC supports up to 16K jumbo frame size + +Supported Chipsets and NICs +--------------------------- + +- Aquantia AQtion AQC107 10 Gigabit Ethernet Controller +- Aquantia AQtion AQC108 5 Gigabit Ethernet Controller +- Aquantia AQtion AQC109 2.5 Gigabit Ethernet Controller diff --git a/src/spdk/dpdk/doc/guides/nics/avp.rst b/src/spdk/dpdk/doc/guides/nics/avp.rst new file mode 100644 index 000000000..1a194fc23 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/avp.rst @@ -0,0 +1,85 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Wind River Systems, Inc. + All rights reserved. + +AVP Poll Mode Driver +================================================================= + +The Accelerated Virtual Port (AVP) device is a shared memory based device +only available on `virtualization platforms `_ +from Wind River Systems. The Wind River Systems virtualization platform +currently uses QEMU/KVM as its hypervisor and as such provides support for all +of the QEMU supported virtual and/or emulated devices (e.g., virtio, e1000, +etc.). The platform offers the virtio device type as the default device when +launching a virtual machine or creating a virtual machine port. The AVP device +is a specialized device available to customers that require increased +throughput and decreased latency to meet the demands of their performance +focused applications. + +The AVP driver binds to any AVP PCI devices that have been exported by the Wind +River Systems QEMU/KVM hypervisor. As a user of the DPDK driver API it +supports a subset of the full Ethernet device API to enable the application to +use the standard device configuration functions and packet receive/transmit +functions. + +These devices enable optimized packet throughput by bypassing QEMU and +delivering packets directly to the virtual switch via a shared memory +mechanism. This provides DPDK applications running in virtual machines with +significantly improved throughput and latency over other device types. + +The AVP device implementation is integrated with the QEMU/KVM live-migration +mechanism to allow applications to seamlessly migrate from one hypervisor node +to another with minimal packet loss. + + +Features and Limitations of the AVP PMD +--------------------------------------- + +The AVP PMD driver provides the following functionality. + +* Receive and transmit of both simple and chained mbuf packets, + +* Chained mbufs may include up to 5 chained segments, + +* Up to 8 receive and transmit queues per device, + +* Only a single MAC address is supported, + +* The MAC address cannot be modified, + +* The maximum receive packet length is 9238 bytes, + +* VLAN header stripping and inserting, + +* Promiscuous mode + +* VM live-migration + +* PCI hotplug insertion and removal + + +Prerequisites +------------- + +The following prerequisites apply: + +* A virtual machine running in a Wind River Systems virtualization + environment and configured with at least one neutron port defined with a + vif-model set to "avp". + + +Launching a VM with an AVP type network attachment +-------------------------------------------------- + +The following example will launch a VM with three network attachments. The +first attachment will have a default vif-model of "virtio". The next two +network attachments will have a vif-model of "avp" and may be used with a DPDK +application which is built to include the AVP PMD driver. + +.. code-block:: console + + nova boot --flavor small --image my-image \ + --nic net-id=${NETWORK1_UUID} \ + --nic net-id=${NETWORK2_UUID},vif-model=avp \ + --nic net-id=${NETWORK3_UUID},vif-model=avp \ + --security-group default my-instance1 diff --git a/src/spdk/dpdk/doc/guides/nics/axgbe.rst b/src/spdk/dpdk/doc/guides/nics/axgbe.rst new file mode 100644 index 000000000..9b270a422 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/axgbe.rst @@ -0,0 +1,89 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +AXGBE Poll Mode Driver +====================== + +The AXGBE poll mode driver library (**librte_pmd_axgbe**) implements support +for AMD 10 Gbps family of adapters. It is compiled and tested in standard linux distro like Ubuntu. + +Detailed information about SoCs that use these devices can be found here: + +- `AMD EPYC™ EMBEDDED 3000 family `_. + + +Supported Features +------------------ + +AXGBE PMD has support for: + +- Base L2 features +- TSS (Transmit Side Scaling) +- Promiscuous mode +- Port statistics +- Multicast mode +- RSS (Receive Side Scaling) +- Checksum offload +- Jumbo Frame up to 9K + + +Configuration Information +------------------------- + +The following options can be modified in the ``.config`` file. Please note that +enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_AXGBE_PMD`` (default **y**) + + Toggle compilation of axgbe PMD. + +- ``CONFIG_RTE_LIBRTE_AXGBE_PMD_DEBUG`` (default **n**) + + Toggle display for PMD debug related messages. + + +Building DPDK +------------- + +See the :ref:`DPDK Getting Started Guide for Linux ` for +instructions on how to build DPDK. + +By default the AXGBE PMD library will be built into the DPDK library. + +For configuring and using UIO frameworks, please also refer :ref:`the +documentation that comes with DPDK suite `. + + +Prerequisites and Pre-conditions +-------------------------------- +- Prepare the system as recommended by DPDK suite. + +- Bind the intended AMD device to ``igb_uio`` or ``vfio-pci`` module. + +Now system is ready to run DPDK application. + + +Usage Example +------------- + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Example output: + +.. code-block:: console + + [...] + EAL: PCI device 0000:02:00.4 on NUMA socket 0 + EAL: probe driver: 1022:1458 net_axgbe + Interactive-mode selected + USER1: create a new mbuf pool : n=171456, size=2176, socket=0 + USER1: create a new mbuf pool : n=171456, size=2176, socket=1 + USER1: create a new mbuf pool : n=171456, size=2176, socket=2 + USER1: create a new mbuf pool : n=171456, size=2176, socket=3 + Configuring Port 0 (socket 0) + Port 0: 00:00:1A:1C:6A:17 + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> diff --git a/src/spdk/dpdk/doc/guides/nics/bnx2x.rst b/src/spdk/dpdk/doc/guides/nics/bnx2x.rst new file mode 100644 index 000000000..ab90d8ae5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/bnx2x.rst @@ -0,0 +1,234 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright (c) 2015 QLogic Corporation + +BNX2X Poll Mode Driver +====================== + +The BNX2X poll mode driver library (**librte_pmd_bnx2x**) implements support +for **QLogic 578xx** 10/20 Gbps family of adapters as well as their virtual +functions (VF) in SR-IOV context. It is supported on several standard Linux +distros like RHEL and SLES. It is compile-tested under FreeBSD OS. + +More information can be found at `QLogic Corporation's Official Website +`_. + +Supported Features +------------------ + +BNX2X PMD has support for: + +- Base L2 features +- Unicast/multicast filtering +- Promiscuous mode +- Port hardware statistics +- SR-IOV VF + +Non-supported Features +---------------------- + +The features not yet supported include: + +- TSS (Transmit Side Scaling) +- RSS (Receive Side Scaling) +- LRO/TSO offload +- Checksum offload +- SR-IOV PF +- Rx TX scatter gather + +Co-existence considerations +--------------------------- + +- QLogic 578xx CNAs support Ethernet, iSCSI and FCoE functionalities. + These functionalities are supported using QLogic Linux kernel + drivers bnx2x, cnic, bnx2i and bnx2fc. DPDK is supported on these + adapters using bnx2x PMD. + +- When SR-IOV is not enabled on the adapter, + QLogic Linux kernel drivers (bnx2x, cnic, bnx2i and bnx2fc) and bnx2x + PMD can’t be attached to different PFs on a given QLogic 578xx + adapter. + A given adapter needs to be completely used by DPDK or Linux drivers. + Before binding DPDK driver to one or more PFs on the adapter, + please make sure to unbind Linux drivers from all PFs of the adapter. + If there are multiple adapters on the system, one or more adapters + can be used by DPDK driver completely and other adapters can be used + by Linux drivers completely. + +- When SR-IOV is enabled on the adapter, + Linux kernel drivers (bnx2x, cnic, bnx2i and bnx2fc) can be bound + to the PFs of a given adapter and either bnx2x PMD or Linux drivers + bnx2x can be bound to the VFs of the adapter. + +Supported QLogic NICs +--------------------- + +- 578xx + +Prerequisites +------------- + +- Requires firmware version **7.13.11.0**. It is included in most of the + standard Linux distros. If it is not available visit + `linux-firmware git repository `_ + to get the required firmware. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``.config`` file. Please note that +enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_BNX2X_PMD`` (default **n**) + + Toggle compilation of bnx2x driver. To use bnx2x PMD set this config parameter + to 'y'. Also, in order for firmware binary to load user will need zlib devel + package installed. + +- ``CONFIG_RTE_LIBRTE_BNX2X_DEBUG_TX`` (default **n**) + + Toggle display of transmit fast path run-time messages. + +- ``CONFIG_RTE_LIBRTE_BNX2X_DEBUG_RX`` (default **n**) + + Toggle display of receive fast path run-time messages. + +- ``CONFIG_RTE_LIBRTE_BNX2X_DEBUG_PERIODIC`` (default **n**) + + Toggle display of register reads and writes. + + +.. _bnx2x_driver-compilation: + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Jumbo: Limitation +----------------- + +Rx descriptor limit for number of segments per MTU is set to 1. +PMD doesn't support Jumbo Rx scatter gather. Some applciations can +adjust mbuf_size based on this param and max_pkt_len. + +For others, PMD detects the condition where Rx packet length cannot +be held by configured mbuf size and logs the message. + +Example output: + + .. code-block:: console + + [...] + [bnx2x_recv_pkts:397(04:00.0:dpdk-port-0)] mbuf size 2048 is not enough to hold Rx packet length more than 2046 + +SR-IOV: Prerequisites and sample Application Notes +-------------------------------------------------- + +This section provides instructions to configure SR-IOV with Linux OS. + +#. Verify SR-IOV and ARI capabilities are enabled on the adapter using ``lspci``: + + .. code-block:: console + + lspci -s -vvv + + Example output: + + .. code-block:: console + + [...] + Capabilities: [1b8 v1] Alternative Routing-ID Interpretation (ARI) + [...] + Capabilities: [1c0 v1] Single Root I/O Virtualization (SR-IOV) + [...] + Kernel driver in use: igb_uio + +#. Load the kernel module: + + .. code-block:: console + + modprobe bnx2x + + Example output: + + .. code-block:: console + + systemd-udevd[4848]: renamed network interface eth0 to ens5f0 + systemd-udevd[4848]: renamed network interface eth1 to ens5f1 + +#. Bring up the PF ports: + + .. code-block:: console + + ifconfig ens5f0 up + ifconfig ens5f1 up + +#. Create VF device(s): + + Echo the number of VFs to be created into "sriov_numvfs" sysfs entry + of the parent PF. + + Example output: + + .. code-block:: console + + echo 2 > /sys/devices/pci0000:00/0000:00:03.0/0000:81:00.0/sriov_numvfs + +#. Assign VF MAC address: + + Assign MAC address to the VF using iproute2 utility. The syntax is: + ip link set vf mac + + Example output: + + .. code-block:: console + + ip link set ens5f0 vf 0 mac 52:54:00:2f:9d:e8 + +#. PCI Passthrough: + + The VF devices may be passed through to the guest VM using virt-manager or + virsh etc. bnx2x PMD should be used to bind the VF devices in the guest VM + using the instructions outlined in the Application notes below. + +#. Running testpmd: + (Supply ``--log-level="pmd.net.bnx2x.driver",7`` to view informational messages): + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to run testpmd. + + Example output: + + .. code-block:: console + + [...] + EAL: PCI device 0000:84:00.0 on NUMA socket 1 + EAL: probe driver: 14e4:168e rte_bnx2x_pmd + EAL: PCI memory mapped at 0x7f14f6fe5000 + EAL: PCI memory mapped at 0x7f14f67e5000 + EAL: PCI memory mapped at 0x7f15fbd9b000 + EAL: PCI device 0000:84:00.1 on NUMA socket 1 + EAL: probe driver: 14e4:168e rte_bnx2x_pmd + EAL: PCI memory mapped at 0x7f14f5fe5000 + EAL: PCI memory mapped at 0x7f14f57e5000 + EAL: PCI memory mapped at 0x7f15fbd4f000 + Interactive-mode selected + Configuring Port 0 (socket 0) + PMD: bnx2x_dev_tx_queue_setup(): fp[00] req_bd=512, thresh=512, + usable_bd=1020, total_bd=1024, + tx_pages=4 + PMD: bnx2x_dev_rx_queue_setup(): fp[00] req_bd=128, thresh=0, + usable_bd=510, total_bd=512, + rx_pages=1, cq_pages=8 + PMD: bnx2x_print_adapter_info(): + [...] + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> diff --git a/src/spdk/dpdk/doc/guides/nics/bnxt.rst b/src/spdk/dpdk/doc/guides/nics/bnxt.rst new file mode 100644 index 000000000..ed650187e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/bnxt.rst @@ -0,0 +1,897 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2020 Broadcom Inc. + +BNXT Poll Mode Driver +===================== + +The Broadcom BNXT PMD (**librte_pmd_bnxt**) implements support for adapters +based on Ethernet controllers and SoCs belonging to the Broadcom +BCM574XX/BCM575XX NetXtreme-E® Family of Ethernet Network Controllers, +the Broadcom BCM588XX Stingray Family of Smart NIC Adapters, and the Broadcom +StrataGX® BCM5873X Series of Communications Processors. + +A complete list with links to reference material is in the Appendix section. + +CPU Support +----------- + +BNXT PMD supports multiple CPU architectures, including x86-32, x86-64, and ARMv8. + +Kernel Dependency +----------------- + +BNXT PMD requires a kernel module (VFIO or UIO) for setting up a device, mapping +device memory to userspace, registering interrupts, etc. +VFIO is more secure than UIO, relying on IOMMU protection. +UIO requires the IOMMU disabled or configured to pass-through mode. + +Operating Systems supported: + +* Red Hat Enterprise Linux release 8.1 (Ootpa) +* Red Hat Enterprise Linux release 8.0 (Ootpa) +* Red Hat Enterprise Linux Server release 7.7 (Maipo) +* Red Hat Enterprise Linux Server release 7.6 (Maipo) +* Red Hat Enterprise Linux Server release 7.5 (Maipo) +* Red Hat Enterprise Linux Server release 7.4 (Maipo) +* Red Hat Enterprise Linux Server release 7.3 (Maipo) +* Red Hat Enterprise Linux Server release 7.2 (Maipo) +* CentOS Linux release 8.0 +* CentOS Linux release 7.7 +* CentOS Linux release 7.6.1810 +* CentOS Linux release 7.5.1804 +* CentOS Linux release 7.4.1708 +* Fedora 31 +* FreeBSD 12.1 +* Suse 15SP1 +* Ubuntu 19.04 +* Ubuntu 18.04 +* Ubuntu 16.10 +* Ubuntu 16.04 +* Ubuntu 14.04 + +The BNXT PMD supports operating with: + +* Linux vfio-pci +* Linux uio_pci_generic +* Linux igb_uio +* BSD nic_uio + +Compiling BNXT PMD +------------------ + +To compile the BNXT PMD: + +.. code-block:: console + + make config T=x86_64-native-linux-gcc && make // for x86-64 + make config T=x86_32-native-linux-gcc && make // for x86-32 + make config T=armv8a-linux-gcc && make // for ARMv8 + +Bind the device to one of the kernel modules listed above + +.. code-block:: console + + ./dpdk-devbind.py -b vfio-pci|igb_uio|uio_pci_generic bus_id:device_id.function_id + +Load an application (e.g. testpmd) with a default configuration (e.g. a single +TX /RX queue): + +.. code-block:: console + + ./testpmd -c 0xF -n 4 -- -i --portmask=0x1 --nb-cores=2 + +Running BNXT PMD +---------------- + +The BNXT PMD can run on PF or VF. + +PCI-SIG Single Root I/O Virtualization (SR-IOV) involves the direct assignment +of part of the network port resources to guest operating systems using the +SR-IOV standard. +NIC is logically distributed among multiple virtual machines (VMs), while still +having global data in common to share with the PF and other VFs. + +Sysadmin can create and configure VFs: + +.. code-block:: console + + echo num_vfs > /sys/bus/pci/devices/domain_id:bus_id:device_id:function_id/sriov_numvfs + (ex) echo 4 > /sys/bus/pci/devices/0000:82:00:0/sriov_numvfs + +Sysadmin also can change the VF property such as MAC address, transparent VLAN, +TX rate limit, and trusted VF: + +.. code-block:: console + + ip link set pf_id vf vf_id mac (mac_address) vlan (vlan_id) txrate (rate_value) trust (enable|disable) + (ex) ip link set 0 vf 0 mac 00:11:22:33:44:55 vlan 0x100 txrate 100 trust disable + +Running on VF +~~~~~~~~~~~~~ + +Flow Bifurcation +^^^^^^^^^^^^^^^^ + +The Flow Bifurcation splits the incoming data traffic to user space applications +(such as DPDK applications) and/or kernel space programs (such as the Linux +kernel stack). +It can direct some traffic, for example data plane traffic, to DPDK. +Rest of the traffic, for example control plane traffic, would be redirected to +the traditional Linux networking stack. + +Refer to https://doc.dpdk.org/guides/howto/flow_bifurcation.html + +Benefits of the flow bifurcation include: + +* Better performance with less CPU overhead, as user application can directly + access the NIC for data path +* NIC is still being controlled by the kernel, as control traffic is forwarded + only to the kernel driver +* Control commands, e.g. ethtool, will work as usual + +Running on a VF, the BXNT PMD supports the flow bifurcation with a combination +of SR-IOV and packet classification and/or forwarding capability. +In the simplest case of flow bifurcation, a PF driver configures a NIC to +forward all user traffic directly to VFs with matching destination MAC address, +while the rest of the traffic is forwarded to a PF. +Note that the broadcast packets will be forwarded to both PF and VF. + +.. code-block:: console + + (ex) ethtool --config-ntuple ens2f0 flow-type ether dst 00:01:02:03:00:01 vlan 10 vlan-mask 0xf000 action 0x100000000 + +Trusted VF +^^^^^^^^^^ + +By default, VFs are *not* allowed to perform privileged operations, such as +modifying the VF’s MAC address in the guest. These security measures are +designed to prevent possible attacks. +However, when a DPDK application can be trusted (e.g., OVS-DPDK, here), these +operations performed by a VF would be legitimate and can be allowed. + +To enable VF to request "trusted mode," a new trusted VF concept was introduced +in Linux kernel 4.4 and allowed VFs to become “trusted” and perform some +privileged operations. + +The BNXT PMD supports the trusted VF mode of operation. Only a PF can enable the +trusted attribute on the VF. It is preferable to enable the Trusted setting on a +VF before starting applications. +However, the BNXT PMD handles dynamic changes in trusted settings as well. + +Note that control commands, e.g., ethtool, will work via the kernel PF driver, +*not* via the trusted VF driver. + +Operations supported by trusted VF: + +* MAC address configuration +* Flow rule creation + +Operations *not* supported by trusted VF: + +* Firmware upgrade +* Promiscuous mode setting + +Running on PF +~~~~~~~~~~~~~ + +Unlike the VF when BNXT PMD runs on a PF there are no restrictions placed on the +features which the PF can enable or request. In a multiport NIC, each port will +have a corresponding PF. Also depending on the configuration of the NIC there +can be more than one PF associated per port. +A sysadmin can load the kernel driver on one PF, and run BNXT PMD on the other +PF or run the PMD on both the PFs. In such cases, the firmware picks one of the +PFs as a master PF. + +Much like in the trusted VF, the DPDK application must be *trusted* and expected +to be *well-behaved*. + +Features +-------- + +The BNXT PMD supports the following features: + +* Port Control + * Port MTU + * LED + * Flow Control and Autoneg +* Packet Filtering + * Unicast MAC Filter + * Multicast MAC Filter + * VLAN Filtering + * Allmulticast Mode + * Promiscuous Mode +* Stateless Offloads + * CRC Offload + * Checksum Offload (IPv4, TCP, and UDP) + * Multi-Queue (TSS and RSS) + * Segmentation and Reassembly (TSO and LRO) +* VLAN insert strip +* Stats Collection +* Generic Flow Offload + +Port Control +~~~~~~~~~~~~ + +**Port MTU**: BNXT PMD supports the MTU (Maximum Transmission Unit) up to 9,574 +bytes: + +.. code-block:: console + + testpmd> port config mtu (port_id) mtu_value + testpmd> show port info (port_id) + +**LED**: Application tunes on (or off) a port LED, typically for a port +identification: + +.. code-block:: console + + int rte_eth_led_on (uint16_t port_id) + int rte_eth_led_off (uint16_t port_id) + +**Flow Control and Autoneg**: Application tunes on (or off) flow control and/or +auto-negotiation on a port: + +.. code-block:: console + + testpmd> set flow_ctrl rx (on|off) (port_id) + testpmd> set flow_ctrl tx (on|off) (port_id) + testpmd> set flow_ctrl autoneg (on|off) (port_id) + +Note that the BNXT PMD does *not* support some options and ignores them when +requested: + +* high_water +* low_water +* pause_time +* mac_ctrl_frame_fwd +* send_xon + +Packet Filtering +~~~~~~~~~~~~~~~~ + +Applications control the packet-forwarding behaviors with packet filters. + +The BNXT PMD supports hardware-based packet filtering: + +* UC (Unicast) MAC Filters + * No unicast packets are forwarded to an application except the one with + DMAC address added to the port + * At initialization, the station MAC address is added to the port +* MC (Multicast) MAC Filters + * No multicast packets are forwarded to an application except the one with + MC address added to the port + * When the application listens to a multicast group, it adds the MC address + to the port +* VLAN Filtering Mode + * When enabled, no packets are forwarded to an application except the ones + with the VLAN tag assigned to the port +* Allmulticast Mode + * When enabled, every multicast packet received on the port is forwarded to + the application + * Typical usage is routing applications +* Promiscuous Mode + * When enabled, every packet received on the port is forwarded to the + application + +Unicast MAC Filter +^^^^^^^^^^^^^^^^^^ + +The application adds (or removes) MAC addresses to enable (or disable) +whitelist filtering to accept packets. + +.. code-block:: console + + testpmd> show port (port_id) macs + testpmd> mac_addr (add|remove) (port_id) (XX:XX:XX:XX:XX:XX) + +Multicast MAC Filter +^^^^^^^^^^^^^^^^^^^^ + +Application adds (or removes) Multicast addresses to enable (or disable) +whitelist filtering to accept packets. + +.. code-block:: console + + testpmd> show port (port_id) mcast_macs + testpmd> mcast_addr (add|remove) (port_id) (XX:XX:XX:XX:XX:XX) + +Application adds (or removes) Multicast addresses to enable (or disable) +whitelist filtering to accept packets. + +Note that the BNXT PMD supports up to 16 MC MAC filters. if the user adds more +than 16 MC MACs, the BNXT PMD puts the port into the Allmulticast mode. + +VLAN Filtering +^^^^^^^^^^^^^^ + +The application enables (or disables) VLAN filtering mode. When the mode is +enabled, no packets are forwarded to an application except ones with VLAN tag +assigned for the application. + +.. code-block:: console + +   testpmd> vlan set filter (on|off) (port_id) +   testpmd> rx_vlan (add|rm) (vlan_id) (port_id) + +Allmulticast Mode +^^^^^^^^^^^^^^^^^ + +The application enables (or disables) the allmulticast mode. When the mode is +enabled, every multicast packet received is forwarded to the application. + +.. code-block:: console + +   testpmd> show port info (port_id) +   testpmd> set allmulti (port_id) (on|off) + +Promiscuous Mode +^^^^^^^^^^^^^^^^ + +The application enables (or disables) the promiscuous mode. When the mode is +enabled on a port, every packet received on the port is forwarded to the +application. + +.. code-block:: console + +   testpmd> show port info (port_id) +   testpmd> set promisc port_id (on|off) + +Stateless Offloads +~~~~~~~~~~~~~~~~~~ + +Like Linux, DPDK provides enabling hardware offload of some stateless processing +(such as checksum calculation) of the stack, alleviating the CPU from having to +burn cycles on every packet. + +Listed below are the stateless offloads supported by the BNXT PMD: + +* CRC offload (for both TX and RX packets) +* Checksum Offload (for both TX and RX packets) + * IPv4 Checksum Offload + * TCP Checksum Offload +   * UDP Checksum Offload +* Segmentation/Reassembly Offloads +   * TCP Segmentation Offload (TSO) +   * Large Receive Offload (LRO) +* Multi-Queue +   * Transmit Side Scaling (TSS) +   * Receive Side Scaling (RSS) + +Also, the BNXT PMD supports stateless offloads on inner frames for tunneled +packets. Listed below are the tunneling protocols supported by the BNXT PMD: + +* VXLAN +* GRE +* NVGRE + +Note that enabling (or disabling) stateless offloads requires applications to +stop DPDK before changing configuration. + +CRC Offload +^^^^^^^^^^^ + +The FCS (Frame Check Sequence) in the Ethernet frame is a four-octet CRC (Cyclic +Redundancy Check) that allows detection of corrupted data within the entire +frame as received on the receiver side. + +The BNXT PMD supports hardware-based CRC offload: + +* TX: calculate and insert CRC +* RX: check and remove CRC, notify the application on CRC error + +Note that the CRC offload is always turned on. + +Checksum Offload +^^^^^^^^^^^^^^^^ + +The application enables hardware checksum calculation for IPv4, TCP, and UDP. + +.. code-block:: console + + testpmd> port stop (port_id) + testpmd> csum set (ip|tcp|udp|outer-ip|outer-udp) (sw|hw) (port_id) + testpmd> set fwd csum + +Multi-Queue +^^^^^^^^^^^ + +Multi-Queue, also known as TSS (Transmit Side Scaling) or RSS (Receive Side +Scaling), is a common networking technique that allows for more efficient load +balancing across multiple CPU cores. + +The application enables multiple TX and RX queues when it is started. + +.. code-block:: console + +   testpmd -l 1,3,5 --master-lcore 1 --txq=2 –rxq=2 --nb-cores=2 + +**TSS** + +TSS distributes network transmit processing across several hardware-based +transmit queues, allowing outbound network traffic to be processed by multiple +CPU cores. + +**RSS** + +RSS distributes network receive processing across several hardware-based receive +queues, allowing inbound network traffic to be processed by multiple CPU cores. + +The application can select the RSS mode, i.e. select the header fields that are +included for hash calculation. The BNXT PMD supports the RSS mode of +``default|ip|tcp|udp|none``, where default mode is L3 and L4. + +For tunneled packets, RSS hash is calculated over inner frame header fields. +Applications may want to select the tunnel header fields for hash calculation, +and it will be supported in 20.08 using RSS level. + +.. code-block:: console + +   testpmd> port config (port_id) rss (all|default|ip|tcp|udp|none) + + // note that the testpmd defaults the RSS mode to ip + // ensure to issue the command below to enable L4 header (TCP or UDP) along with IPv4 header +   testpmd> port config (port_id) rss default + + // to check the current RSS configuration, such as RSS function and RSS key +   testpmd> show port (port_id) rss-hash key + + // RSS is enabled by default. However, application can disable RSS as follows +   testpmd> port config (port_id) rss none + +Application can change the flow distribution, i.e. remap the received traffic to +CPU cores, using RSS RETA (Redirection Table). + +.. code-block:: console + + // application queries the current RSS RETA configuration + testpmd> show port (port_id) rss reta size (mask0, mask1) + + // application changes the RSS RETA configuration + testpmd> port config (port_id) rss reta (hash, queue) [, (hash, queue)] + +TSO +^^^ + +TSO (TCP Segmentation Offload), also known as LSO (Large Send Offload), enables +the TCP/IP stack to pass to the NIC a larger datagram than the MTU (Maximum +Transmit Unit). NIC breaks it into multiple segments before sending it to the +network. + +The BNXT PMD supports hardware-based TSO. + +.. code-block:: console + + // display the status of TSO +   testpmd> tso show (port_id) + + // enable/disable TSO +   testpmd> port config (port_id) tx_offload tcp_tso (on|off) + + // set TSO segment size +   testpmd> tso set segment_size (port_id) + +The BNXT PMD also supports hardware-based tunneled TSO. + +.. code-block:: console + + // display the status of tunneled TSO + testpmd> tunnel_tso show (port_id) + + // enable/disable tunneled TSO + testpmd> port config (port_id) tx_offload vxlan_tnl_tso|gre_tnl_tso (on|off) + + // set tunneled TSO segment size + testpmd> tunnel_tso set segment_size (port_id) + +Note that the checksum offload is always assumed to be enabled for TSO. + +LRO +^^^ + +LRO (Large Receive Offload) enables NIC to aggregate multiple incoming TCP/IP +packets from a single stream into a larger buffer, before passing to the +networking stack. + +The BNXT PMD supports hardware-based LRO. + +.. code-block:: console + + // display the status of LRO +   testpmd> show port (port_id) rx_offload capabilities +   testpmd> show port (port_id) rx_offload configuration + + // enable/disable LRO +   testpmd> port config (port_id) rx_offload tcp_lro (on|off) + + // set max LRO packet (datagram) size +   testpmd> port config (port_id) max-lro-pkt-size (max_size) + +The BNXT PMD also supports tunneled LRO. + +Some applications, such as routing, should *not* change the packet headers as +they pass through (i.e. received from and sent back to the network). In such a +case, GRO (Generic Receive Offload) should be used instead of LRO. + +VLAN Insert/Strip +~~~~~~~~~~~~~~~~~ + +DPDK application offloads VLAN insert/strip to improve performance. The BNXT PMD +supports hardware-based VLAN insert/strip offload for both single and double +VLAN packets. + + +VLAN Insert +^^^^^^^^^^^ + +Application configures the VLAN TPID (Tag Protocol ID). By default, the TPID is +0x8100. + +.. code-block:: console + + // configure outer TPID value for a port + testpmd> vlan set outer tpid (tpid_value) (port_id) + +The inner TPID set will be rejected as the BNXT PMD supports inserting only an +outer VLAN. Note that when a packet has a single VLAN, the tag is considered as +outer, i.e. the inner VLAN is relevant only when a packet is double-tagged. + +The BNXT PMD supports various TPID values shown below. Any other values will be +rejected. + +* ``0x8100`` +* ``0x88a8`` +* ``0x9100`` +* ``0x9200`` +* ``0x9300`` + +The BNXT PMD supports the VLAN insert offload per-packet basis. The application +provides the TCI (Tag Control Info) for a packet via mbuf. In turn, the BNXT PMD +inserts the VLAN tag (via hardware) using the provided TCI along with the +configured TPID. + +.. code-block:: console + + // enable VLAN insert offload + testpmd> port config (port_id) rx_offload vlan_insert|qinq_insert (on|off) + + if (mbuf->ol_flags && PKT_TX_QINQ) // case-1: insert VLAN to single-tagged packet +     tci_value = mbuf->vlan_tci_outer + else if (mbuf->ol_flags && PKT_TX_VLAN) // case-2: insert VLAN to untagged packet +     tci_value = mbuf->vlan_tci + +VLAN Strip +^^^^^^^^^^ + +The application configures the per-port VLAN strip offload. + +.. code-block:: console + + // enable VLAN strip on a port + testpmd> port config (port_id) tx_offload vlan_strip (on|off) + + // notify application VLAN strip via mbuf + mbuf->ol_flags |= PKT_RX_VLAN | PKT_RX_STRIPPED // outer VLAN is found and stripped + mbuf->vlan_tci = tci_value // TCI of the stripped VLAN + +Time Synchronization +~~~~~~~~~~~~~~~~~~~~ + +System operators may run a PTP (Precision Time Protocol) client application to +synchronize the time on the NIC (and optionally, on the system) to a PTP master. + +The BNXT PMD supports a PTP client application to communicate with a PTP master +clock using DPDK IEEE1588 APIs. Note that the PTP client application needs to +run on PF and vector mode needs to be disabled. + +For the PTP time synchronization support, the BNXT PMD must be compiled with +``CONFIG_RTE_LIBRTE_IEEE1588=y`` (this compilation flag is currently pending). + +.. code-block:: console + + testpmd> set fwd ieee1588 // enable IEEE 1588 mode + +When enabled, the BNXT PMD configures hardware to insert IEEE 1588 timestamps to +the outgoing PTP packets and reports IEEE 1588 timestamps from the incoming PTP +packets to application via mbuf. + +.. code-block:: console + + // RX packet completion will indicate whether the packet is PTP + mbuf->ol_flags |= PKT_RX_IEEE1588_PTP + +Statistics Collection +~~~~~~~~~~~~~~~~~~~~~ + +In Linux, the *ethtool -S* enables us to query the NIC stats. DPDK provides the +similar functionalities via rte_eth_stats and rte_eth_xstats. + +The BNXT PMD supports both basic and extended stats collection: + +* Basic stats +* Extended stats + +Basic Stats +^^^^^^^^^^^ + +The application collects per-port and per-queue stats using rte_eth_stats APIs. + +.. code-block:: console + +   testpmd> show port stats (port_id) + +Basic stats include: + +* ipackets +* ibytes +* opackets +* obytes +* imissed +* ierrors +* oerrors + +By default, per-queue stats for 16 queues are supported. For more than 16 +queues, BNXT PMD should be compiled with ``CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS`` +set to the desired number of queues. + +Extended Stats +^^^^^^^^^^^^^^ + +Unlike basic stats, the extended stats are vendor-specific, i.e. each vendor +provides its own set of counters. + +The BNXT PMD provides a rich set of counters, including per-flow counters, +per-cos counters, per-priority counters, etc. + +.. code-block:: console + +   testpmd> show port xstats (port_id) + +Shown below is the elaborated sequence to retrieve extended stats: + +.. code-block:: console + + // application queries the number of xstats +   len = rte_eth_xstats_get(port_id, NULL, 0); + // BNXT PMD returns the size of xstats array (i.e. the number of entries) + // BNXT PMD returns 0, if the feature is compiled out or disabled + + // application allocates memory for xstats +   struct rte_eth_xstats_name *names; // name is 64 character or less +   struct rte_eth_xstats *xstats; +   names = calloc(len, sizeof(*names)); +   xstats = calloc(len, sizeof(*xstats)); + + // application retrieves xstats // names and values +   ret = rte_eth_xstats_get_names(port_id, *names, len); +   ret = rte_eth_xstats_get(port_id, *xstats, len); + + // application checks the xstats + // application may repeat the below: +   len = rte_eth_xstats_reset(port_id); // reset the xstats + + // reset can be skipped, if application wants to see accumulated stats +   // run traffic +   // probably stop the traffic +   // retrieve xstats // no need to retrieve xstats names again +   // check xstats + +Generic Flow Offload +~~~~~~~~~~~~~~~~~~~~ + +Applications can get benefit by offloading all or part of flow processing to +hardware. For example, applications can offload packet classification only +(partial offload) or whole match-action (full offload). + +DPDK offers the Generic Flow API (rte_flow API) to configure hardware to +perform flow processing. + +Listed below are the rte_flow APIs BNXT PMD supports: + +* rte_flow_validate +* rte_flow_create +* rte_flow_destroy +* rte_flow_flush + +Host Based Flow Table Management +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Starting with 20.05 BNXT PMD supports host based flow table management. This is +a new mechanism that should allow higher flow scalability than what is currently +supported. This new approach also defines a new rte_flow parser, and mapper +which currently supports basic packet classification in the receive path. + +The feature uses a newly implemented control-plane firmware interface which +optimizes flow insertions and deletions. + +This is a tech preview feature, and is disabled by default. It can be enabled +using bnxt devargs. For ex: "-w 0000:0d:00.0,host-based-truflow=1”. + +Application Support +------------------- + +Firmware +~~~~~~~~ + +The BNXT PMD supports the application to retrieve the firmware version. + +.. code-block:: console + +   testpmd> show port info (port_id) + +Note that the applications cannot update the firmware using BNXT PMD. + +Multiple Processes +~~~~~~~~~~~~~~~~~~ + +When two or more DPDK applications (e.g., testpmd and dpdk-pdump) share a single +instance of DPDK, the BNXT PMD supports a single primary application and one or +more secondary applications. Note that the DPDK-layer (not the PMD) ensures +there is only one primary application. + +There are two modes: + +Manual mode + +* Application notifies whether it is primary or secondary using *proc-type* flag +* 1st process should be spawned with ``--proc-type=primary`` +* All subsequent processes should be spawned with ``--proc-type=secondary`` + +Auto detection mode + +* Application is using ``proc-type=auto`` flag +* A process is spawned as a secondary if a primary is already running + +The BNXT PMD uses the info to skip a device initialization, i.e. performs a +device initialization only when being brought up by a primary application. + +Runtime Queue Setup +~~~~~~~~~~~~~~~~~~~ + +Typically, a DPDK application allocates TX and RX queues statically: i.e. queues +are allocated at start. However, an application may want to increase (or +decrease) the number of queues dynamically for various reasons, e.g. power +savings. + +The BNXT PMD supports applications to increase or decrease queues at runtime. + +.. code-block:: console + +   testpmd> port config all (rxq|txq) (num_queues) + +Note that a DPDK application must allocate default queues (one for TX and one +for RX at minimum) at initialization. + +Descriptor Status +~~~~~~~~~~~~~~~~~ + +Applications may use the descriptor status for various reasons, e.g. for power +savings. For example, an application may stop polling and change to interrupt +mode when the descriptor status shows no packets to service for a while. + +The BNXT PMD supports the application to retrieve both TX and RX descriptor +status. + +.. code-block:: console + +   testpmd> show port (port_id) (rxq|txq) (queue_id) desc (desc_id) status + +Bonding +~~~~~~~ + +DPDK implements a light-weight library to allow PMDs to be bonded together and provide a single logical PMD to the application. + +.. code-block:: console + +   testpmd -l 0-3 -n4 --vdev 'net_bonding0,mode=0,slave=,slave=,mac=XX:XX:XX:XX:XX:XX’ – --socket_num=1 – -i --port-topology=chained + (ex) testpmd -l 1,3,5,7,9 -n4 --vdev 'net_bonding0,mode=0,slave=0000:82:00.0,slave=0000:82:00.1,mac=00:1e:67:1d:fd:1d' – --socket-num=1 – -i --port-topology=chained + +Vector Processing +----------------- + +Vector processing provides significantly improved performance over scalar +processing (see Vector Processor, here). + +The BNXT PMD supports the vector processing using SSE (Streaming SIMD +Extensions) instructions on x86 platforms. The BNXT vPMD (vector mode PMD) is +currently limited to Intel/AMD CPU architecture. Support for ARM is *not* +currently implemented. + +This improved performance comes from several optimizations: + +* Batching +  * TX: processing completions in bulk +  * RX: allocating mbufs in bulk +* Chained mbufs are *not* supported, i.e. a packet should fit a single mbuf +* Some stateless offloads are *not* supported with vector processing +  * TX: no offloads will be supported +  * RX: reduced RX offloads (listed below) will be supported:: + +   DEV_RX_OFFLOAD_VLAN_STRIP +   DEV_RX_OFFLOAD_KEEP_CRC +   DEV_RX_OFFLOAD_JUMBO_FRAME +   DEV_RX_OFFLOAD_IPV4_CKSUM +   DEV_RX_OFFLOAD_UDP_CKSUM +   DEV_RX_OFFLOAD_TCP_CKSUM +   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM +   DEV_RX_OFFLOAD_RSS_HASH +   DEV_RX_OFFLOAD_VLAN_FILTER + +The BNXT Vector PMD is enabled in DPDK builds by default. + +However, a decision to enable vector mode will be made when the port transitions +from stopped to started. Any TX offloads or some RX offloads (other than listed +above) will disable the vector mode. +Offload configuration changes that impact vector mode must be made when the port +is stopped. + +Note that TX (or RX) vector mode can be enabled independently from RX (or TX) +vector mode. + +Appendix +-------- + +Supported Chipsets and Adapters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +BCM5730x NetXtreme-C® Family of Ethernet Network Controllers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Information about Ethernet adapters in the NetXtreme family of adapters can be +found in the `NetXtreme® Brand section `_ of the `Broadcom website `_. + +* ``M150c ... Single-port 40/50 Gigabit Ethernet Adapter`` +* ``P150c ... Single-port 40/50 Gigabit Ethernet Adapter`` +* ``P225c ... Dual-port 10/25 Gigabit Ethernet Adapter`` + +BCM574xx/575xx NetXtreme-E® Family of Ethernet Network Controllers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Information about Ethernet adapters in the NetXtreme family of adapters can be +found in the `NetXtreme® Brand section `_ of the `Broadcom website `_. + +* ``M125P .... Single-port OCP 2.0 10/25 Gigabit Ethernet Adapter`` +* ``M150P .... Single-port OCP 2.0 50 Gigabit Ethernet Adapter`` +* ``M150PM ... Single-port OCP 2.0 Multi-Host 50 Gigabit Ethernet Adapter`` +* ``M210P .... Dual-port OCP 2.0 10 Gigabit Ethernet Adapter`` +* ``M210TP ... Dual-port OCP 2.0 10 Gigabit Ethernet Adapter`` +* ``M1100G ... Single-port OCP 2.0 10/25/50/100 Gigabit Ethernet Adapter`` +* ``N150G .... Single-port OCP 3.0 50 Gigabit Ethernet Adapter`` +* ``M225P .... Dual-port OCP 2.0 10/25 Gigabit Ethernet Adapter`` +* ``N210P .... Dual-port OCP 3.0 10 Gigabit Ethernet Adapter`` +* ``N210TP ... Dual-port OCP 3.0 10 Gigabit Ethernet Adapter`` +* ``N225P .... Dual-port OCP 3.0 10/25 Gigabit Ethernet Adapter`` +* ``N250G .... Dual-port OCP 3.0 50 Gigabit Ethernet Adapter`` +* ``N410SG ... Quad-port OCP 3.0 10 Gigabit Ethernet Adapter`` +* ``N410SGBT . Quad-port OCP 3.0 10 Gigabit Ethernet Adapter`` +* ``N425G .... Quad-port OCP 3.0 10/25 Gigabit Ethernet Adapter`` +* ``N1100G ... Single-port OCP 3.0 10/25/50/100 Gigabit Ethernet Adapter`` +* ``N2100G ... Dual-port OCP 3.0 10/25/50/100 Gigabit Ethernet Adapter`` +* ``N2200G ... Dual-port OCP 3.0 10/25/50/100/200 Gigabit Ethernet Adapter`` +* ``P150P .... Single-port 50 Gigabit Ethernet Adapter`` +* ``P210P .... Dual-port 10 Gigabit Ethernet Adapter`` +* ``P210TP ... Dual-port 10 Gigabit Ethernet Adapter`` +* ``P225P .... Dual-port 10/25 Gigabit Ethernet Adapter`` +* ``P410SG ... Quad-port 10 Gigabit Ethernet Adapter`` +* ``P410SGBT . Quad-port 10 Gigabit Ethernet Adapter`` +* ``P425G .... Quad-port 10/25 Gigabit Ethernet Adapter`` +* ``P1100G ... Single-port 10/25/50/100 Gigabit Ethernet Adapter`` +* ``P2100G ... Dual-port 10/25/50/100 Gigabit Ethernet Adapter`` +* ``P2200G ... Dual-port 10/25/50/100/200 Gigabit Ethernet Adapter`` + +BCM588xx NetXtreme-S® Family of SmartNIC Network Controllers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Information about the Stingray family of SmartNIC adapters can be found in the +`Stingray® Brand section `_ of the `Broadcom website `_. + +* ``PS225 ... Dual-port 25 Gigabit Ethernet SmartNIC`` + +BCM5873x StrataGX® Family of Communications Processors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These ARM-based processors target a broad range of networking applications, +including virtual CPE (vCPE) and NFV appliances, 10G service routers and +gateways, control plane processing for Ethernet switches, and network-attached +storage (NAS). + +* ``StrataGX BCM58732 ... Octal-Core 3.0GHz 64-bit ARM®v8 Cortex®-A72 based SoC`` diff --git a/src/spdk/dpdk/doc/guides/nics/build_and_test.rst b/src/spdk/dpdk/doc/guides/nics/build_and_test.rst new file mode 100644 index 000000000..f99e019ff --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/build_and_test.rst @@ -0,0 +1,157 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Cavium, Inc + +.. _pmd_build_and_test: + +Compiling and testing a PMD for a NIC +===================================== + +This section demonstrates how to compile and run a Poll Mode Driver (PMD) for +the available Network Interface Cards in DPDK using TestPMD. + +TestPMD is one of the reference applications distributed with the DPDK. Its main +purpose is to forward packets between Ethernet ports on a network interface and +as such is the best way to test a PMD. + +Refer to the :ref:`testpmd application user guide ` for detailed +information on how to build and run testpmd. + +Driver Compilation +------------------ + +To compile a PMD for a platform, run make with appropriate target as shown below. +Use "make" command in Linux and "gmake" in FreeBSD. This will also build testpmd. + +To check available targets: + +.. code-block:: console + + cd + make showconfigs + +Example output: + +.. code-block:: console + + arm-armv7a-linux-gcc + arm64-armv8a-linux-gcc + arm64-dpaa-linux-gcc + arm64-thunderx-linux-gcc + arm64-xgene1-linux-gcc + i686-native-linux-gcc + i686-native-linux-icc + ppc_64-power8-linux-gcc + x86_64-native-freebsd-clang + x86_64-native-freebsd-gcc + x86_64-native-linux-clang + x86_64-native-linux-gcc + x86_64-native-linux-icc + x86_x32-native-linux-gcc + +To compile a PMD for Linux x86_64 gcc target, run the following "make" command: + +.. code-block:: console + + make install T=x86_64-native-linux-gcc + +Use ARM (ThunderX, DPAA, X-Gene) or PowerPC target for respective platform. + +For more information, refer to the :ref:`Getting Started Guide for Linux ` +or :ref:`Getting Started Guide for FreeBSD ` depending on your platform. + +Running testpmd in Linux +------------------------ + +This section demonstrates how to setup and run ``testpmd`` in Linux. + +#. Mount huge pages: + + .. code-block:: console + + mkdir /mnt/huge + mount -t hugetlbfs nodev /mnt/huge + +#. Request huge pages: + + Hugepage memory should be reserved as per application requirement. Check + hugepage size configured in the system and calculate the number of pages + required. + + To reserve 1024 pages of 2MB: + + .. code-block:: console + + echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + + .. note:: + + Check ``/proc/meminfo`` to find system hugepage size: + + .. code-block:: console + + grep "Hugepagesize:" /proc/meminfo + + Example output: + + .. code-block:: console + + Hugepagesize: 2048 kB + +#. Load ``igb_uio`` or ``vfio-pci`` driver: + + .. code-block:: console + + modprobe uio + insmod ./x86_64-native-linux-gcc/kmod/igb_uio.ko + + or + + .. code-block:: console + + modprobe vfio-pci + +#. Setup VFIO permissions for regular users before binding to ``vfio-pci``: + + .. code-block:: console + + sudo chmod a+x /dev/vfio + + sudo chmod 0666 /dev/vfio/* + +#. Bind the adapters to ``igb_uio`` or ``vfio-pci`` loaded in the previous step: + + .. code-block:: console + + ./usertools/dpdk-devbind.py --bind igb_uio DEVICE1 DEVICE2 ... + + Or setup VFIO permissions for regular users and then bind to ``vfio-pci``: + + .. code-block:: console + + ./usertools/dpdk-devbind.py --bind vfio-pci DEVICE1 DEVICE2 ... + + .. note:: + + DEVICE1, DEVICE2 are specified via PCI "domain:bus:slot.func" syntax or + "bus:slot.func" syntax. + +#. Start ``testpmd`` with basic parameters: + + .. code-block:: console + + ./x86_64-native-linux-gcc/app/testpmd -l 0-3 -n 4 -- -i + + Successful execution will show initialization messages from EAL, PMD and + testpmd application. A prompt will be displayed at the end for user commands + as interactive mode (``-i``) is on. + + .. code-block:: console + + testpmd> + + Refer to the :ref:`testpmd runtime functions ` for a list + of available commands. + + .. note:: + When ``testpmd`` is built with shared library, use option ``-d`` to load + the dynamic PMD for ``rte_eal_init``. diff --git a/src/spdk/dpdk/doc/guides/nics/cxgbe.rst b/src/spdk/dpdk/doc/guides/nics/cxgbe.rst new file mode 100644 index 000000000..54a4c1389 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/cxgbe.rst @@ -0,0 +1,856 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2014-2018 Chelsio Communications. + All rights reserved. + +CXGBE Poll Mode Driver +====================== + +The CXGBE PMD (**librte_pmd_cxgbe**) provides poll mode driver support +for **Chelsio Terminator** 10/25/40/100 Gbps family of adapters. CXGBE PMD +has support for the latest Linux and FreeBSD operating systems. + +CXGBEVF PMD provides poll mode driver support for SR-IOV Virtual functions +and has support for the latest Linux operating systems. + +More information can be found at `Chelsio Communications Official Website +`_. + +Features +-------- + +CXGBE and CXGBEVF PMD has support for: + +- Multiple queues for TX and RX +- Receiver Side Steering (RSS) + Receiver Side Steering (RSS) on IPv4, IPv6, IPv4-TCP/UDP, IPv6-TCP/UDP. + For 4-tuple, enabling 'RSS on TCP' and 'RSS on TCP + UDP' is supported. +- VLAN filtering +- Checksum offload +- Promiscuous mode +- All multicast mode +- Port hardware statistics +- Jumbo frames +- Flow API - Support for both Wildcard (LE-TCAM) and Exact (HASH) match filters. + +Limitations +----------- + +The Chelsio Terminator series of devices provide two/four ports but +expose a single PCI bus address, thus, librte_pmd_cxgbe registers +itself as a PCI driver that allocates one Ethernet device per detected +port. + +For this reason, one cannot whitelist/blacklist a single port without +whitelisting/blacklisting the other ports on the same device. + +.. _t5-nics: + +Supported Chelsio T5 NICs +------------------------- + +- 1G NICs: T502-BT +- 10G NICs: T520-BT, T520-CR, T520-LL-CR, T520-SO-CR, T540-CR +- 40G NICs: T580-CR, T580-LP-CR, T580-SO-CR +- Other T5 NICs: T522-CR + +.. _t6-nics: + +Supported Chelsio T6 NICs +------------------------- + +- 25G NICs: T6425-CR, T6225-CR, T6225-LL-CR, T6225-SO-CR +- 100G NICs: T62100-CR, T62100-LP-CR, T62100-SO-CR + +Supported SR-IOV Chelsio NICs +----------------------------- + +SR-IOV virtual functions are supported on all the Chelsio NICs listed +in :ref:`t5-nics` and :ref:`t6-nics`. + +Prerequisites +------------- + +- Requires firmware version **1.24.11.0** and higher. Visit + `Chelsio Download Center `_ to get latest firmware + bundled with the latest Chelsio Unified Wire package. + + For Linux, installing and loading the latest cxgb4 kernel driver from the + Chelsio Unified Wire package should get you the latest firmware. More + information can be obtained from the User Guide that is bundled with the + Chelsio Unified Wire package. + + For FreeBSD, the latest firmware obtained from the Chelsio Unified Wire + package must be manually flashed via cxgbetool available in FreeBSD source + repository. + + Instructions on how to manually flash the firmware are given in section + :ref:`linux-installation` for Linux and section :ref:`freebsd-installation` + for FreeBSD. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``.config`` file. Please note that +enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_CXGBE_PMD`` (default **y**) + + Toggle compilation of librte_pmd_cxgbe driver. + + .. note:: + + This controls compilation of both CXGBE and CXGBEVF PMD. + +Runtime Options +~~~~~~~~~~~~~~~ + +The following ``devargs`` options can be enabled at runtime. They must +be passed as part of EAL arguments. For example, + +.. code-block:: console + + testpmd -w 02:00.4,keep_ovlan=1 -- -i + +Common Runtime Options +^^^^^^^^^^^^^^^^^^^^^^ + +- ``keep_ovlan`` (default **0**) + + Toggle behavior to keep/strip outer VLAN in Q-in-Q packets. If + enabled, the outer VLAN tag is preserved in Q-in-Q packets. Otherwise, + the outer VLAN tag is stripped in Q-in-Q packets. + +- ``tx_mode_latency`` (default **0**) + + When set to 1, Tx doesn't wait for max number of packets to get + coalesced and sends the packets immediately at the end of the + current Tx burst. When set to 0, Tx waits across multiple Tx bursts + until the max number of packets have been coalesced. In this case, + Tx only sends the coalesced packets to hardware once the max + coalesce limit has been reached. + +CXGBE VF Only Runtime Options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``force_link_up`` (default **0**) + + When set to 1, CXGBEVF PMD always forces link as up for all VFs on + underlying Chelsio NICs. This enables multiple VFs on the same NIC + to send traffic to each other even when the physical link is down. + +CXGBE PF Only Runtime Options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``filtermode`` (default **0**) + + Apart from the 4-tuple (IP src/dst addresses and TCP/UDP src/dst port + addresses), there are only 40-bits available to match other fields in + packet headers. So, ``filtermode`` devarg allows user to dynamically + select a 40-bit supported match field combination for LETCAM (wildcard) + filters. + + Default value of **0** makes driver pick the combination configured in + the firmware configuration file on the adapter. + + The supported flags and their corresponding values are shown in table below. + These flags can be OR'd to create 1 of the multiple supported combinations + for LETCAM filters. + + ================== ====== + FLAG VALUE + ================== ====== + Physical Port 0x1 + PFVF 0x2 + Destination MAC 0x4 + Ethertype 0x8 + Inner VLAN 0x10 + Outer VLAN 0x20 + IP TOS 0x40 + IP Protocol 0x80 + ================== ====== + + The supported ``filtermode`` combinations and their corresponding OR'd + values are shown in table below. + + +-----------------------------------+-----------+ + | FILTERMODE COMBINATIONS | VALUE | + +===================================+===========+ + | Protocol, TOS, Outer VLAN, Port | 0xE1 | + +-----------------------------------+-----------+ + | Protocol, TOS, Outer VLAN | 0xE0 | + +-----------------------------------+-----------+ + | Protocol, TOS, Inner VLAN, Port | 0xD1 | + +-----------------------------------+-----------+ + | Protocol, TOS, Inner VLAN | 0xD0 | + +-----------------------------------+-----------+ + | Protocol, TOS, PFVF, Port | 0xC3 | + +-----------------------------------+-----------+ + | Protocol, TOS, PFVF | 0xC2 | + +-----------------------------------+-----------+ + | Protocol, TOS, Port | 0xC1 | + +-----------------------------------+-----------+ + | Protocol, TOS | 0xC0 | + +-----------------------------------+-----------+ + | Protocol, Outer VLAN, Port | 0xA1 | + +-----------------------------------+-----------+ + | Protocol, Outer VLAN | 0xA0 | + +-----------------------------------+-----------+ + | Protocol, Inner VLAN, Port | 0x91 | + +-----------------------------------+-----------+ + | Protocol, Inner VLAN | 0x90 | + +-----------------------------------+-----------+ + | Protocol, Ethertype, DstMAC, Port | 0x8D | + +-----------------------------------+-----------+ + | Protocol, Ethertype, DstMAC | 0x8C | + +-----------------------------------+-----------+ + | Protocol, Ethertype, Port | 0x89 | + +-----------------------------------+-----------+ + | Protocol, Ethertype | 0x88 | + +-----------------------------------+-----------+ + | Protocol, DstMAC, PFVF, Port | 0x87 | + +-----------------------------------+-----------+ + | Protocol, DstMAC, PFVF | 0x86 | + +-----------------------------------+-----------+ + | Protocol, DstMAC, Port | 0x85 | + +-----------------------------------+-----------+ + | Protocol, DstMAC | 0x84 | + +-----------------------------------+-----------+ + | Protocol, PFVF, Port | 0x83 | + +-----------------------------------+-----------+ + | Protocol, PFVF | 0x82 | + +-----------------------------------+-----------+ + | Protocol, Port | 0x81 | + +-----------------------------------+-----------+ + | Protocol | 0x80 | + +-----------------------------------+-----------+ + | TOS, Outer VLAN, Port | 0x61 | + +-----------------------------------+-----------+ + | TOS, Outer VLAN | 0x60 | + +-----------------------------------+-----------+ + | TOS, Inner VLAN, Port | 0x51 | + +-----------------------------------+-----------+ + | TOS, Inner VLAN | 0x50 | + +-----------------------------------+-----------+ + | TOS, Ethertype, DstMAC, Port | 0x4D | + +-----------------------------------+-----------+ + | TOS, Ethertype, DstMAC | 0x4C | + +-----------------------------------+-----------+ + | TOS, Ethertype, Port | 0x49 | + +-----------------------------------+-----------+ + | TOS, Ethertype | 0x48 | + +-----------------------------------+-----------+ + | TOS, DstMAC, PFVF, Port | 0x47 | + +-----------------------------------+-----------+ + | TOS, DstMAC, PFVF | 0x46 | + +-----------------------------------+-----------+ + | TOS, DstMAC, Port | 0x45 | + +-----------------------------------+-----------+ + | TOS, DstMAC | 0x44 | + +-----------------------------------+-----------+ + | TOS, PFVF, Port | 0x43 | + +-----------------------------------+-----------+ + | TOS, PFVF | 0x42 | + +-----------------------------------+-----------+ + | TOS, Port | 0x41 | + +-----------------------------------+-----------+ + | TOS | 0x40 | + +-----------------------------------+-----------+ + | Outer VLAN, Inner VLAN, Port | 0x31 | + +-----------------------------------+-----------+ + | Outer VLAN, Ethertype, Port | 0x29 | + +-----------------------------------+-----------+ + | Outer VLAN, Ethertype | 0x28 | + +-----------------------------------+-----------+ + | Outer VLAN, DstMAC, Port | 0x25 | + +-----------------------------------+-----------+ + | Outer VLAN, DstMAC | 0x24 | + +-----------------------------------+-----------+ + | Outer VLAN, Port | 0x21 | + +-----------------------------------+-----------+ + | Outer VLAN | 0x20 | + +-----------------------------------+-----------+ + | Inner VLAN, Ethertype, Port | 0x19 | + +-----------------------------------+-----------+ + | Inner VLAN, Ethertype | 0x18 | + +-----------------------------------+-----------+ + | Inner VLAN, DstMAC, Port | 0x15 | + +-----------------------------------+-----------+ + | Inner VLAN, DstMAC | 0x14 | + +-----------------------------------+-----------+ + | Inner VLAN, Port | 0x11 | + +-----------------------------------+-----------+ + | Inner VLAN | 0x10 | + +-----------------------------------+-----------+ + | Ethertype, DstMAC, Port | 0xD | + +-----------------------------------+-----------+ + | Ethertype, DstMAC | 0xC | + +-----------------------------------+-----------+ + | Ethertype, PFVF, Port | 0xB | + +-----------------------------------+-----------+ + | Ethertype, PFVF | 0xA | + +-----------------------------------+-----------+ + | Ethertype, Port | 0x9 | + +-----------------------------------+-----------+ + | Ethertype | 0x8 | + +-----------------------------------+-----------+ + | DstMAC, PFVF, Port | 0x7 | + +-----------------------------------+-----------+ + | DstMAC, PFVF | 0x6 | + +-----------------------------------+-----------+ + | DstMAC, Port | 0x5 | + +-----------------------------------+-----------+ + | Destination MAC | 0x4 | + +-----------------------------------+-----------+ + | PFVF, Port | 0x3 | + +-----------------------------------+-----------+ + | PFVF | 0x2 | + +-----------------------------------+-----------+ + | Physical Port | 0x1 + + +-----------------------------------+-----------+ + + For example, to enable matching ``ethertype`` field in Ethernet + header, and ``protocol`` field in IPv4 header, the ``filtermode`` + combination must be given as: + + .. code-block:: console + + testpmd -w 02:00.4,filtermode=0x88 -- -i + +- ``filtermask`` (default **0**) + + ``filtermask`` devarg works similar to ``filtermode``, but is used + to configure a filter mode combination for HASH (exact-match) filters. + + .. note:: + + The combination chosen for ``filtermask`` devarg **must be a subset** of + the combination chosen for ``filtermode`` devarg. + + Default value of **0** makes driver pick the combination configured in + the firmware configuration file on the adapter. + + Note that the filter rule will only be inserted in HASH region, if the + rule contains **all** the fields specified in the ``filtermask`` combination. + Otherwise, the filter rule will get inserted in LETCAM region. + + The same combination list explained in the tables in ``filtermode`` devarg + section earlier applies for ``filtermask`` devarg, as well. + + For example, to enable matching only protocol field in IPv4 header, the + ``filtermask`` combination must be given as: + + .. code-block:: console + + testpmd -w 02:00.4,filtermode=0x88,filtermask=0x80 -- -i + +.. _driver-compilation: + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Linux +----- + +.. _linux-installation: + +Linux Installation +~~~~~~~~~~~~~~~~~~ + +Steps to manually install the latest firmware from the downloaded Chelsio +Unified Wire package for Linux operating system are as follows: + +#. Load the kernel module: + + .. code-block:: console + + modprobe cxgb4 + +#. Use ifconfig to get the interface name assigned to Chelsio card: + + .. code-block:: console + + ifconfig -a | grep "00:07:43" + + Example output: + + .. code-block:: console + + p1p1 Link encap:Ethernet HWaddr 00:07:43:2D:EA:C0 + p1p2 Link encap:Ethernet HWaddr 00:07:43:2D:EA:C8 + +#. Install cxgbtool: + + .. code-block:: console + + cd /tools/cxgbtool + make install + +#. Use cxgbtool to load the firmware config file onto the card: + + .. code-block:: console + + cxgbtool p1p1 loadcfg /src/network/firmware/t5-config.txt + +#. Use cxgbtool to load the firmware image onto the card: + + .. code-block:: console + + cxgbtool p1p1 loadfw /src/network/firmware/t5fw-*.bin + +#. Unload and reload the kernel module: + + .. code-block:: console + + modprobe -r cxgb4 + modprobe cxgb4 + +#. Verify with ethtool: + + .. code-block:: console + + ethtool -i p1p1 | grep "firmware" + + Example output: + + .. code-block:: console + + firmware-version: 1.24.11.0, TP 0.1.23.2 + +Running testpmd +~~~~~~~~~~~~~~~ + +This section demonstrates how to launch **testpmd** with Chelsio +devices managed by librte_pmd_cxgbe in Linux operating system. + +#. Load the kernel module: + + .. code-block:: console + + modprobe cxgb4 + +#. Get the PCI bus addresses of the interfaces bound to cxgb4 driver: + + .. code-block:: console + + dmesg | tail -2 + + Example output: + + .. code-block:: console + + cxgb4 0000:02:00.4 p1p1: renamed from eth0 + cxgb4 0000:02:00.4 p1p2: renamed from eth1 + + .. note:: + + Both the interfaces of a Chelsio 2-port adapter are bound to the + same PCI bus address. + +#. Unload the kernel module: + + .. code-block:: console + + modprobe -ar cxgb4 csiostor + +#. Running testpmd + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to run testpmd. + + .. note:: + + Currently, CXGBE PMD only supports the binding of PF4 for Chelsio NICs. + + Example output: + + .. code-block:: console + + [...] + EAL: PCI device 0000:02:00.4 on NUMA socket -1 + EAL: probe driver: 1425:5401 rte_cxgbe_pmd + EAL: PCI memory mapped at 0x7fd7c0200000 + EAL: PCI memory mapped at 0x7fd77cdfd000 + EAL: PCI memory mapped at 0x7fd7c10b7000 + PMD: rte_cxgbe_pmd: fw: 1.24.11.0, TP: 0.1.23.2 + PMD: rte_cxgbe_pmd: Coming up as MASTER: Initializing adapter + Interactive-mode selected + Configuring Port 0 (socket 0) + Port 0: 00:07:43:2D:EA:C0 + Configuring Port 1 (socket 0) + Port 1: 00:07:43:2D:EA:C8 + Checking link statuses... + PMD: rte_cxgbe_pmd: Port0: passive DA port module inserted + PMD: rte_cxgbe_pmd: Port1: passive DA port module inserted + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> + + .. note:: + + Flow control pause TX/RX is disabled by default and can be enabled via + testpmd. Refer section :ref:`flow-control` for more details. + +Configuring SR-IOV Virtual Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section demonstrates how to enable SR-IOV virtual functions +on Chelsio NICs and demonstrates how to run testpmd with SR-IOV +virtual functions. + +#. Load the kernel module: + + .. code-block:: console + + modprobe cxgb4 + +#. Get the PCI bus addresses of the interfaces bound to cxgb4 driver: + + .. code-block:: console + + dmesg | tail -2 + + Example output: + + .. code-block:: console + + cxgb4 0000:02:00.4 p1p1: renamed from eth0 + cxgb4 0000:02:00.4 p1p2: renamed from eth1 + + .. note:: + + Both the interfaces of a Chelsio 2-port adapter are bound to the + same PCI bus address. + +#. Use ifconfig to get the interface name assigned to Chelsio card: + + .. code-block:: console + + ifconfig -a | grep "00:07:43" + + Example output: + + .. code-block:: console + + p1p1 Link encap:Ethernet HWaddr 00:07:43:2D:EA:C0 + p1p2 Link encap:Ethernet HWaddr 00:07:43:2D:EA:C8 + +#. Bring up the interfaces: + + .. code-block:: console + + ifconfig p1p1 up + ifconfig p1p2 up + +#. Instantiate SR-IOV Virtual Functions. PF0..3 can be used for + SR-IOV VFs. Multiple VFs can be instantiated on each of PF0..3. + To instantiate one SR-IOV VF on each PF0 and PF1: + + .. code-block:: console + + echo 1 > /sys/bus/pci/devices/0000\:02\:00.0/sriov_numvfs + echo 1 > /sys/bus/pci/devices/0000\:02\:00.1/sriov_numvfs + +#. Get the PCI bus addresses of the virtual functions: + + .. code-block:: console + + lspci | grep -i "Chelsio" | grep -i "VF" + + Example output: + + .. code-block:: console + + 02:01.0 Ethernet controller: Chelsio Communications Inc T540-CR Unified Wire Ethernet Controller [VF] + 02:01.1 Ethernet controller: Chelsio Communications Inc T540-CR Unified Wire Ethernet Controller [VF] + +#. Running testpmd + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to bind virtual functions and run testpmd. + + Example output: + + .. code-block:: console + + [...] + EAL: PCI device 0000:02:01.0 on NUMA socket 0 + EAL: probe driver: 1425:5803 net_cxgbevf + PMD: rte_cxgbe_pmd: Firmware version: 1.24.11.0 + PMD: rte_cxgbe_pmd: TP Microcode version: 0.1.23.2 + PMD: rte_cxgbe_pmd: Chelsio rev 0 + PMD: rte_cxgbe_pmd: No bootstrap loaded + PMD: rte_cxgbe_pmd: No Expansion ROM loaded + PMD: rte_cxgbe_pmd: 0000:02:01.0 Chelsio rev 0 1G/10GBASE-SFP + EAL: PCI device 0000:02:01.1 on NUMA socket 0 + EAL: probe driver: 1425:5803 net_cxgbevf + PMD: rte_cxgbe_pmd: Firmware version: 1.24.11.0 + PMD: rte_cxgbe_pmd: TP Microcode version: 0.1.23.2 + PMD: rte_cxgbe_pmd: Chelsio rev 0 + PMD: rte_cxgbe_pmd: No bootstrap loaded + PMD: rte_cxgbe_pmd: No Expansion ROM loaded + PMD: rte_cxgbe_pmd: 0000:02:01.1 Chelsio rev 0 1G/10GBASE-SFP + Configuring Port 0 (socket 0) + Port 0: 06:44:29:44:40:00 + Configuring Port 1 (socket 0) + Port 1: 06:44:29:44:40:10 + Checking link statuses... + Done + testpmd> + +FreeBSD +------- + +.. _freebsd-installation: + +FreeBSD Installation +~~~~~~~~~~~~~~~~~~~~ + +Steps to manually install the latest firmware from the downloaded Chelsio +Unified Wire package for FreeBSD operating system are as follows: + +#. Load the kernel module: + + .. code-block:: console + + kldload if_cxgbe + +#. Use dmesg to get the t5nex instance assigned to the Chelsio card: + + .. code-block:: console + + dmesg | grep "t5nex" + + Example output: + + .. code-block:: console + + t5nex0: irq 16 at device 0.4 on pci2 + cxl0: on t5nex0 + cxl1: on t5nex0 + t5nex0: PCIe x8, 2 ports, 14 MSI-X interrupts, 31 eq, 13 iq + + In the example above, a Chelsio T520-CR card is bound to a t5nex0 instance. + +#. Install cxgbetool from FreeBSD source repository: + + .. code-block:: console + + cd /tools/tools/cxgbetool/ + make && make install + +#. Use cxgbetool to load the firmware image onto the card: + + .. code-block:: console + + cxgbetool t5nex0 loadfw /src/network/firmware/t5fw-*.bin + +#. Unload and reload the kernel module: + + .. code-block:: console + + kldunload if_cxgbe + kldload if_cxgbe + +#. Verify with sysctl: + + .. code-block:: console + + sysctl -a | grep "t5nex" | grep "firmware" + + Example output: + + .. code-block:: console + + dev.t5nex.0.firmware_version: 1.24.11.0 + +Running testpmd +~~~~~~~~~~~~~~~ + +This section demonstrates how to launch **testpmd** with Chelsio +devices managed by librte_pmd_cxgbe in FreeBSD operating system. + +#. Change to DPDK source directory where the target has been compiled in + section :ref:`driver-compilation`: + + .. code-block:: console + + cd + +#. Copy the contigmem kernel module to /boot/kernel directory: + + .. code-block:: console + + cp x86_64-native-freebsd-clang/kmod/contigmem.ko /boot/kernel/ + +#. Add the following lines to /boot/loader.conf: + + .. code-block:: console + + # reserve 2 x 1G blocks of contiguous memory using contigmem driver + hw.contigmem.num_buffers=2 + hw.contigmem.buffer_size=1073741824 + # load contigmem module during boot process + contigmem_load="YES" + + The above lines load the contigmem kernel module during boot process and + allocate 2 x 1G blocks of contiguous memory to be used for DPDK later on. + This is to avoid issues with potential memory fragmentation during later + system up time, which may result in failure of allocating the contiguous + memory required for the contigmem kernel module. + +#. Restart the system and ensure the contigmem module is loaded successfully: + + .. code-block:: console + + reboot + kldstat | grep "contigmem" + + Example output: + + .. code-block:: console + + 2 1 0xffffffff817f1000 3118 contigmem.ko + +#. Repeat step 1 to ensure that you are in the DPDK source directory. + +#. Load the cxgbe kernel module: + + .. code-block:: console + + kldload if_cxgbe + +#. Get the PCI bus addresses of the interfaces bound to t5nex driver: + + .. code-block:: console + + pciconf -l | grep "t5nex" + + Example output: + + .. code-block:: console + + t5nex0@pci0:2:0:4: class=0x020000 card=0x00001425 chip=0x54011425 rev=0x00 + + In the above example, the t5nex0 is bound to 2:0:4 bus address. + + .. note:: + + Both the interfaces of a Chelsio 2-port adapter are bound to the + same PCI bus address. + +#. Unload the kernel module: + + .. code-block:: console + + kldunload if_cxgbe + +#. Set the PCI bus addresses to hw.nic_uio.bdfs kernel environment parameter: + + .. code-block:: console + + kenv hw.nic_uio.bdfs="2:0:4" + + This automatically binds 2:0:4 to nic_uio kernel driver when it is loaded in + the next step. + + .. note:: + + Currently, CXGBE PMD only supports the binding of PF4 for Chelsio NICs. + +#. Load nic_uio kernel driver: + + .. code-block:: console + + kldload ./x86_64-native-freebsd-clang/kmod/nic_uio.ko + +#. Start testpmd with basic parameters: + + .. code-block:: console + + ./x86_64-native-freebsd-clang/app/testpmd -l 0-3 -n 4 -w 0000:02:00.4 -- -i + + Example output: + + .. code-block:: console + + [...] + EAL: PCI device 0000:02:00.4 on NUMA socket 0 + EAL: probe driver: 1425:5401 rte_cxgbe_pmd + EAL: PCI memory mapped at 0x8007ec000 + EAL: PCI memory mapped at 0x842800000 + EAL: PCI memory mapped at 0x80086c000 + PMD: rte_cxgbe_pmd: fw: 1.24.11.0, TP: 0.1.23.2 + PMD: rte_cxgbe_pmd: Coming up as MASTER: Initializing adapter + Interactive-mode selected + Configuring Port 0 (socket 0) + Port 0: 00:07:43:2D:EA:C0 + Configuring Port 1 (socket 0) + Port 1: 00:07:43:2D:EA:C8 + Checking link statuses... + PMD: rte_cxgbe_pmd: Port0: passive DA port module inserted + PMD: rte_cxgbe_pmd: Port1: passive DA port module inserted + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> + +.. note:: + + Flow control pause TX/RX is disabled by default and can be enabled via + testpmd. Refer section :ref:`flow-control` for more details. + +Sample Application Notes +------------------------ + +.. _flow-control: + +Enable/Disable Flow Control +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Flow control pause TX/RX is disabled by default and can be enabled via +testpmd as follows: + +.. code-block:: console + + testpmd> set flow_ctrl rx on tx on 0 0 0 0 mac_ctrl_frame_fwd off autoneg on 0 + testpmd> set flow_ctrl rx on tx on 0 0 0 0 mac_ctrl_frame_fwd off autoneg on 1 + +To disable again, run: + +.. code-block:: console + + testpmd> set flow_ctrl rx off tx off 0 0 0 0 mac_ctrl_frame_fwd off autoneg off 0 + testpmd> set flow_ctrl rx off tx off 0 0 0 0 mac_ctrl_frame_fwd off autoneg off 1 + +Jumbo Mode +~~~~~~~~~~ + +There are two ways to enable sending and receiving of jumbo frames via testpmd. +One method involves using the **mtu** command, which changes the mtu of an +individual port without having to stop the selected port. Another method +involves stopping all the ports first and then running **max-pkt-len** command +to configure the mtu of all the ports with a single command. + +- To configure each port individually, run the mtu command as follows: + + .. code-block:: console + + testpmd> port config mtu 0 9000 + testpmd> port config mtu 1 9000 + +- To configure all the ports at once, stop all the ports first and run the + max-pkt-len command as follows: + + .. code-block:: console + + testpmd> port stop all + testpmd> port config all max-pkt-len 9000 diff --git a/src/spdk/dpdk/doc/guides/nics/dpaa.rst b/src/spdk/dpdk/doc/guides/nics/dpaa.rst new file mode 100644 index 000000000..17839a920 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/dpaa.rst @@ -0,0 +1,310 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2017 NXP + + +DPAA Poll Mode Driver +===================== + +The DPAA NIC PMD (**librte_pmd_dpaa**) provides poll mode driver +support for the inbuilt NIC found in the **NXP DPAA** SoC family. + +More information can be found at `NXP Official Website +`_. + +NXP DPAA (Data Path Acceleration Architecture - Gen 1) +------------------------------------------------------ + +This section provides an overview of the NXP DPAA architecture +and how it is integrated into the DPDK. + +Contents summary + +- DPAA overview +- DPAA driver architecture overview + +.. _dpaa_overview: + +DPAA Overview +~~~~~~~~~~~~~ + +Reference: `FSL DPAA Architecture `_. + +The QorIQ Data Path Acceleration Architecture (DPAA) is a set of hardware +components on specific QorIQ series multicore processors. This architecture +provides the infrastructure to support simplified sharing of networking +interfaces and accelerators by multiple CPU cores, and the accelerators +themselves. + +DPAA includes: + +- Cores +- Network and packet I/O +- Hardware offload accelerators +- Infrastructure required to facilitate flow of packets between the components above + +Infrastructure components are: + +- The Queue Manager (QMan) is a hardware accelerator that manages frame queues. + It allows CPUs and other accelerators connected to the SoC datapath to + enqueue and dequeue ethernet frames, thus providing the infrastructure for + data exchange among CPUs and datapath accelerators. +- The Buffer Manager (BMan) is a hardware buffer pool management block that + allows software and accelerators on the datapath to acquire and release + buffers in order to build frames. + +Hardware accelerators are: + +- SEC - Cryptographic accelerator +- PME - Pattern matching engine + +The Network and packet I/O component: + +- The Frame Manager (FMan) is a key component in the DPAA and makes use of the + DPAA infrastructure (QMan and BMan). FMan is responsible for packet + distribution and policing. Each frame can be parsed, classified and results + may be attached to the frame. This meta data can be used to select + particular QMan queue, which the packet is forwarded to. + + +DPAA DPDK - Poll Mode Driver Overview +------------------------------------- + +This section provides an overview of the drivers for DPAA: + +* Bus driver and associated "DPAA infrastructure" drivers +* Functional object drivers (such as Ethernet). + +Brief description of each driver is provided in layout below as well as +in the following sections. + +.. code-block:: console + + +------------+ + | DPDK DPAA | + | PMD | + +-----+------+ + | + +-----+------+ +---------------+ + : Ethernet :.......| DPDK DPAA | + . . . . . . . . . : (FMAN) : | Mempool driver| + . +---+---+----+ | (BMAN) | + . ^ | +-----+---------+ + . | | . + . | | . + . +---+---V----+ . + . . . . . . . . . . .: Portal drv : . + . . : : . + . . +-----+------+ . + . . : QMAN : . + . . : Driver : . + +----+------+-------+ +-----+------+ . + | DPDK DPAA Bus | | . + | driver |....................|..................... + | /bus/dpaa | | + +-------------------+ | + | + ========================== HARDWARE =====|======================== + PHY + =========================================|======================== + +In the above representation, solid lines represent components which interface +with DPDK RTE Framework and dotted lines represent DPAA internal components. + +DPAA Bus driver +~~~~~~~~~~~~~~~ + +The DPAA bus driver is a ``rte_bus`` driver which scans the platform like bus. +Key functions include: + +- Scanning and parsing the various objects and adding them to their respective + device list. +- Performing probe for available drivers against each scanned device +- Creating necessary ethernet instance before passing control to the PMD + +DPAA NIC Driver (PMD) +~~~~~~~~~~~~~~~~~~~~~ + +DPAA PMD is traditional DPDK PMD which provides necessary interface between +RTE framework and DPAA internal components/drivers. + +- Once devices have been identified by DPAA Bus, each device is associated + with the PMD +- PMD is responsible for implementing necessary glue layer between RTE APIs + and lower level QMan and FMan blocks. + The Ethernet driver is bound to a FMAN port and implements the interfaces + needed to connect the DPAA network interface to the network stack. + Each FMAN Port corresponds to a DPDK network interface. + + +Features +^^^^^^^^ + + Features of the DPAA PMD are: + + - Multiple queues for TX and RX + - Receive Side Scaling (RSS) + - Packet type information + - Checksum offload + - Promiscuous mode + +DPAA Mempool Driver +~~~~~~~~~~~~~~~~~~~ + +DPAA has a hardware offloaded buffer pool manager, called BMan, or Buffer +Manager. + +- Using standard Mempools operations RTE API, the mempool driver interfaces + with RTE to service each mempool creation, deletion, buffer allocation and + deallocation requests. +- Each FMAN instance has a BMan pool attached to it during initialization. + Each Tx frame can be automatically released by hardware, if allocated from + this pool. + + +Whitelisting & Blacklisting +--------------------------- + +For blacklisting a DPAA device, following commands can be used. + + .. code-block:: console + + -b "dpaa_bus:fmX-macY" -- ... + e.g. "dpaa_bus:fm1-mac4" + +Supported DPAA SoCs +------------------- + +- LS1043A/LS1023A +- LS1046A/LS1026A + +Prerequisites +------------- + +See :doc:`../platform/dpaa` for setup information + + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` + to setup the basic DPDK environment. + +.. note:: + + Some part of dpaa bus code (qbman and fman - library) routines are + dual licensed (BSD & GPLv2), however they are used as BSD in DPDK in userspace. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_DPAA_BUS`` (default ``y``) + + Toggle compilation of the ``librte_bus_dpaa`` driver. + +- ``CONFIG_RTE_LIBRTE_DPAA_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_dpaa`` driver. + +- ``CONFIG_RTE_LIBRTE_DPAA_DEBUG_DRIVER`` (default ``n``) + + Toggles display of bus configurations and enables a debugging queue + to fetch error (Rx/Tx) packets to driver. By default, packets with errors + (like wrong checksum) are dropped by the hardware. + +- ``CONFIG_RTE_LIBRTE_DPAA_HWDEBUG`` (default ``n``) + + Enables debugging of the Queue and Buffer Manager layer which interacts + with the DPAA hardware. + + +Environment Variables +~~~~~~~~~~~~~~~~~~~~~ + +DPAA drivers uses the following environment variables to configure its +state during application initialization: + +- ``DPAA_NUM_RX_QUEUES`` (default 1) + + This defines the number of Rx queues configured for an application, per + port. Hardware would distribute across these many number of queues on Rx + of packets. + In case the application is configured to use lesser number of queues than + configured above, it might result in packet loss (because of distribution). + +- ``DPAA_PUSH_QUEUES_NUMBER`` (default 4) + + This defines the number of High performance queues to be used for ethdev Rx. + These queues use one private HW portal per queue configured, so they are + limited in the system. The first configured ethdev queues will be + automatically be assigned from the these high perf PUSH queues. Any queue + configuration beyond that will be standard Rx queues. The application can + choose to change their number if HW portals are limited. + The valid values are from '0' to '4'. The values shall be set to '0' if the + application want to use eventdev with DPAA device. + Currently these queues are not used for LS1023/LS1043 platform by default. + + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +#. Running testpmd: + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to run testpmd. + + Example output: + + .. code-block:: console + + ./arm64-dpaa-linux-gcc/testpmd -c 0xff -n 1 \ + -- -i --portmask=0x3 --nb-cores=1 --no-flush-rx + + ..... + EAL: Registered [pci] bus. + EAL: Registered [dpaa] bus. + EAL: Detected 4 lcore(s) + ..... + EAL: dpaa: Bus scan completed + ..... + Configuring Port 0 (socket 0) + Port 0: 00:00:00:00:00:01 + Configuring Port 1 (socket 0) + Port 1: 00:00:00:00:00:02 + ..... + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> + +Limitations +----------- + +Platform Requirement +~~~~~~~~~~~~~~~~~~~~ + +DPAA drivers for DPDK can only work on NXP SoCs as listed in the +``Supported DPAA SoCs``. + +Maximum packet length +~~~~~~~~~~~~~~~~~~~~~ + +The DPAA SoC family support a maximum of a 10240 jumbo frame. The value +is fixed and cannot be changed. So, even when the ``rxmode.max_rx_pkt_len`` +member of ``struct rte_eth_conf`` is set to a value lower than 10240, frames +up to 10240 bytes can still reach the host interface. + +Multiprocess Support +~~~~~~~~~~~~~~~~~~~~ + +Current version of DPAA driver doesn't support multi-process applications +where I/O is performed using secondary processes. This feature would be +implemented in subsequent versions. diff --git a/src/spdk/dpdk/doc/guides/nics/dpaa2.rst b/src/spdk/dpdk/doc/guides/nics/dpaa2.rst new file mode 100644 index 000000000..fdfa6fdd5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/dpaa2.rst @@ -0,0 +1,561 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2016 NXP + + +DPAA2 Poll Mode Driver +====================== + +The DPAA2 NIC PMD (**librte_pmd_dpaa2**) provides poll mode driver +support for the inbuilt NIC found in the **NXP DPAA2** SoC family. + +More information can be found at `NXP Official Website +`_. + +NXP DPAA2 (Data Path Acceleration Architecture Gen2) +---------------------------------------------------- + +This section provides an overview of the NXP DPAA2 architecture +and how it is integrated into the DPDK. + +Contents summary + +- DPAA2 overview +- Overview of DPAA2 objects +- DPAA2 driver architecture overview + +.. _dpaa2_overview: + +DPAA2 Overview +~~~~~~~~~~~~~~ + +Reference: `FSL MC BUS in Linux Kernel `_. + +DPAA2 is a hardware architecture designed for high-speed network +packet processing. DPAA2 consists of sophisticated mechanisms for +processing Ethernet packets, queue management, buffer management, +autonomous L2 switching, virtual Ethernet bridging, and accelerator +(e.g. crypto) sharing. + +A DPAA2 hardware component called the Management Complex (or MC) manages the +DPAA2 hardware resources. The MC provides an object-based abstraction for +software drivers to use the DPAA2 hardware. + +The MC uses DPAA2 hardware resources such as queues, buffer pools, and +network ports to create functional objects/devices such as network +interfaces, an L2 switch, or accelerator instances. + +The MC provides memory-mapped I/O command interfaces (MC portals) +which DPAA2 software drivers use to operate on DPAA2 objects: + +The diagram below shows an overview of the DPAA2 resource management +architecture: + +.. code-block:: console + + +--------------------------------------+ + | OS | + | DPAA2 drivers | + | | | + +-----------------------------|--------+ + | + | (create,discover,connect + | config,use,destroy) + | + DPAA2 | + +------------------------| mc portal |-+ + | | | + | +- - - - - - - - - - - - -V- - -+ | + | | | | + | | Management Complex (MC) | | + | | | | + | +- - - - - - - - - - - - - - - -+ | + | | + | Hardware Hardware | + | Resources Objects | + | --------- ------- | + | -queues -DPRC | + | -buffer pools -DPMCP | + | -Eth MACs/ports -DPIO | + | -network interface -DPNI | + | profiles -DPMAC | + | -queue portals -DPBP | + | -MC portals ... | + | ... | + | | + +--------------------------------------+ + +The MC mediates operations such as create, discover, +connect, configuration, and destroy. Fast-path operations +on data, such as packet transmit/receive, are not mediated by +the MC and are done directly using memory mapped regions in +DPIO objects. + +Overview of DPAA2 Objects +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The section provides a brief overview of some key DPAA2 objects. +A simple scenario is described illustrating the objects involved +in creating a network interfaces. + +DPRC (Datapath Resource Container) + + A DPRC is a container object that holds all the other + types of DPAA2 objects. In the example diagram below there + are 8 objects of 5 types (DPMCP, DPIO, DPBP, DPNI, and DPMAC) + in the container. + +.. code-block:: console + + +---------------------------------------------------------+ + | DPRC | + | | + | +-------+ +-------+ +-------+ +-------+ +-------+ | + | | DPMCP | | DPIO | | DPBP | | DPNI | | DPMAC | | + | +-------+ +-------+ +-------+ +---+---+ +---+---+ | + | | DPMCP | | DPIO | | + | +-------+ +-------+ | + | | DPMCP | | + | +-------+ | + | | + +---------------------------------------------------------+ + +From the point of view of an OS, a DPRC behaves similar to a plug and +play bus, like PCI. DPRC commands can be used to enumerate the contents +of the DPRC, discover the hardware objects present (including mappable +regions and interrupts). + +.. code-block:: console + + DPRC.1 (bus) + | + +--+--------+-------+-------+-------+ + | | | | | + DPMCP.1 DPIO.1 DPBP.1 DPNI.1 DPMAC.1 + DPMCP.2 DPIO.2 + DPMCP.3 + +Hardware objects can be created and destroyed dynamically, providing +the ability to hot plug/unplug objects in and out of the DPRC. + +A DPRC has a mappable MMIO region (an MC portal) that can be used +to send MC commands. It has an interrupt for status events (like +hotplug). + +All objects in a container share the same hardware "isolation context". +This means that with respect to an IOMMU the isolation granularity +is at the DPRC (container) level, not at the individual object +level. + +DPRCs can be defined statically and populated with objects +via a config file passed to the MC when firmware starts +it. There is also a Linux user space tool called "restool" +that can be used to create/destroy containers and objects +dynamically. + +DPAA2 Objects for an Ethernet Network Interface +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A typical Ethernet NIC is monolithic-- the NIC device contains TX/RX +queuing mechanisms, configuration mechanisms, buffer management, +physical ports, and interrupts. DPAA2 uses a more granular approach +utilizing multiple hardware objects. Each object provides specialized +functions. Groups of these objects are used by software to provide +Ethernet network interface functionality. This approach provides +efficient use of finite hardware resources, flexibility, and +performance advantages. + +The diagram below shows the objects needed for a simple +network interface configuration on a system with 2 CPUs. + +.. code-block:: console + + +---+---+ +---+---+ + CPU0 CPU1 + +---+---+ +---+---+ + | | + +---+---+ +---+---+ + DPIO DPIO + +---+---+ +---+---+ + \ / + \ / + \ / + +---+---+ + DPNI --- DPBP,DPMCP + +---+---+ + | + | + +---+---+ + DPMAC + +---+---+ + | + port/PHY + +Below the objects are described. For each object a brief description +is provided along with a summary of the kinds of operations the object +supports and a summary of key resources of the object (MMIO regions +and IRQs). + +DPMAC (Datapath Ethernet MAC): represents an Ethernet MAC, a +hardware device that connects to an Ethernet PHY and allows +physical transmission and reception of Ethernet frames. + +- MMIO regions: none +- IRQs: DPNI link change +- commands: set link up/down, link config, get stats, IRQ config, enable, reset + +DPNI (Datapath Network Interface): contains TX/RX queues, +network interface configuration, and RX buffer pool configuration +mechanisms. The TX/RX queues are in memory and are identified by +queue number. + +- MMIO regions: none +- IRQs: link state +- commands: port config, offload config, queue config, parse/classify config, IRQ config, enable, reset + +DPIO (Datapath I/O): provides interfaces to enqueue and dequeue +packets and do hardware buffer pool management operations. The DPAA2 +architecture separates the mechanism to access queues (the DPIO object) +from the queues themselves. The DPIO provides an MMIO interface to +enqueue/dequeue packets. To enqueue something a descriptor is written +to the DPIO MMIO region, which includes the target queue number. +There will typically be one DPIO assigned to each CPU. This allows all +CPUs to simultaneously perform enqueue/dequeued operations. DPIOs are +expected to be shared by different DPAA2 drivers. + +- MMIO regions: queue operations, buffer management +- IRQs: data availability, congestion notification, buffer pool depletion +- commands: IRQ config, enable, reset + +DPBP (Datapath Buffer Pool): represents a hardware buffer +pool. + +- MMIO regions: none +- IRQs: none +- commands: enable, reset + +DPMCP (Datapath MC Portal): provides an MC command portal. +Used by drivers to send commands to the MC to manage +objects. + +- MMIO regions: MC command portal +- IRQs: command completion +- commands: IRQ config, enable, reset + +Object Connections +~~~~~~~~~~~~~~~~~~ + +Some objects have explicit relationships that must +be configured: + +- DPNI <--> DPMAC +- DPNI <--> DPNI +- DPNI <--> L2-switch-port + +A DPNI must be connected to something such as a DPMAC, +another DPNI, or L2 switch port. The DPNI connection +is made via a DPRC command. + +.. code-block:: console + + +-------+ +-------+ + | DPNI | | DPMAC | + +---+---+ +---+---+ + | | + +==========+ + +- DPNI <--> DPBP + +A network interface requires a 'buffer pool' (DPBP object) which provides +a list of pointers to memory where received Ethernet data is to be copied. +The Ethernet driver configures the DPBPs associated with the network +interface. + +Interrupts +~~~~~~~~~~ + +All interrupts generated by DPAA2 objects are message +interrupts. At the hardware level message interrupts +generated by devices will normally have 3 components-- +1) a non-spoofable 'device-id' expressed on the hardware +bus, 2) an address, 3) a data value. + +In the case of DPAA2 devices/objects, all objects in the +same container/DPRC share the same 'device-id'. +For ARM-based SoC this is the same as the stream ID. + + +DPAA2 DPDK - Poll Mode Driver Overview +-------------------------------------- + +This section provides an overview of the drivers for +DPAA2-- 1) the bus driver and associated "DPAA2 infrastructure" +drivers and 2) functional object drivers (such as Ethernet). + +As described previously, a DPRC is a container that holds the other +types of DPAA2 objects. It is functionally similar to a plug-and-play +bus controller. + +Each object in the DPRC is a Linux "device" and is bound to a driver. +The diagram below shows the dpaa2 drivers involved in a networking +scenario and the objects bound to each driver. A brief description +of each driver follows. + +.. code-block: console + + + +------------+ + | DPDK DPAA2 | + | PMD | + +------------+ +------------+ + | Ethernet |.......| Mempool | + . . . . . . . . . | (DPNI) | | (DPBP) | + . +---+---+----+ +-----+------+ + . ^ | . + . | | . + . | | . + . +---+---V----+ . + . . . . . . . . . . .| DPIO driver| . + . . | (DPIO) | . + . . +-----+------+ . + . . | QBMAN | . + . . | Driver | . + +----+------+-------+ +-----+----- | . + | dpaa2 bus | | . + | VFIO fslmc-bus |....................|..................... + | | | + | /bus/fslmc | | + +-------------------+ | + | + ========================== HARDWARE =====|======================= + DPIO + | + DPNI---DPBP + | + DPMAC + | + PHY + =========================================|======================== + + +A brief description of each driver is provided below. + +DPAA2 bus driver +~~~~~~~~~~~~~~~~ + +The DPAA2 bus driver is a rte_bus driver which scans the fsl-mc bus. +Key functions include: + +- Reading the container and setting up vfio group +- Scanning and parsing the various MC objects and adding them to + their respective device list. + +Additionally, it also provides the object driver for generic MC objects. + +DPIO driver +~~~~~~~~~~~ + +The DPIO driver is bound to DPIO objects and provides services that allow +other drivers such as the Ethernet driver to enqueue and dequeue data for +their respective objects. +Key services include: + +- Data availability notifications +- Hardware queuing operations (enqueue and dequeue of data) +- Hardware buffer pool management + +To transmit a packet the Ethernet driver puts data on a queue and +invokes a DPIO API. For receive, the Ethernet driver registers +a data availability notification callback. To dequeue a packet +a DPIO API is used. + +There is typically one DPIO object per physical CPU for optimum +performance, allowing different CPUs to simultaneously enqueue +and dequeue data. + +The DPIO driver operates on behalf of all DPAA2 drivers +active -- Ethernet, crypto, compression, etc. + +DPBP based Mempool driver +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The DPBP driver is bound to a DPBP objects and provides services to +create a hardware offloaded packet buffer mempool. + +DPAA2 NIC Driver +~~~~~~~~~~~~~~~~ +The Ethernet driver is bound to a DPNI and implements the kernel +interfaces needed to connect the DPAA2 network interface to +the network stack. + +Each DPNI corresponds to a DPDK network interface. + +Features +^^^^^^^^ + +Features of the DPAA2 PMD are: + +- Multiple queues for TX and RX +- Receive Side Scaling (RSS) +- MAC/VLAN filtering +- Packet type information +- Checksum offload +- Promiscuous mode +- Multicast mode +- Port hardware statistics +- Jumbo frames +- Link flow control +- Scattered and gather for TX and RX + +Supported DPAA2 SoCs +-------------------- +- LX2160A +- LS2084A/LS2044A +- LS2088A/LS2048A +- LS1088A/LS1048A + +Prerequisites +------------- + +See :doc:`../platform/dpaa2` for setup information + +Currently supported by DPDK: + +- NXP LSDK **19.08+**. +- MC Firmware version **10.18.0** and higher. +- Supported architectures: **arm64 LE**. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +.. note:: + + Some part of fslmc bus code (mc flib - object library) routines are + dual licensed (BSD & GPLv2), however they are used as BSD in DPDK in userspace. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_FSLMC_BUS`` (default ``y``) + + Toggle compilation of the ``librte_bus_fslmc`` driver. + +- ``CONFIG_RTE_LIBRTE_DPAA2_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_dpaa2`` driver. + +- ``CONFIG_RTE_LIBRTE_DPAA2_DEBUG_DRIVER`` (default ``n``) + + Toggle display of debugging messages/logic + +- ``CONFIG_RTE_LIBRTE_DPAA2_USE_PHYS_IOVA`` (default ``n``) + + Toggle to use physical address vs virtual address for hardware accelerators. + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +#. Running testpmd: + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to run testpmd. + + Example output: + + .. code-block:: console + + ./testpmd -c 0xff -n 1 -- -i --portmask=0x3 --nb-cores=1 --no-flush-rx + + ..... + EAL: Registered [pci] bus. + EAL: Registered [fslmc] bus. + EAL: Detected 8 lcore(s) + EAL: Probing VFIO support... + EAL: VFIO support initialized + ..... + PMD: DPAA2: Processing Container = dprc.2 + EAL: fslmc: DPRC contains = 51 devices + EAL: fslmc: Bus scan completed + ..... + Configuring Port 0 (socket 0) + Port 0: 00:00:00:00:00:01 + Configuring Port 1 (socket 0) + Port 1: 00:00:00:00:00:02 + ..... + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> + + +* Use dev arg option ``drv_loopback=1`` to loopback packets at + driver level. Any packet received will be reflected back by the + driver on same port. e.g. ``fslmc:dpni.1,drv_loopback=1`` + +* Use dev arg option ``drv_no_prefetch=1`` to disable prefetching + of the packet pull command which is issued in the previous cycle. + e.g. ``fslmc:dpni.1,drv_no_prefetch=1`` + +Enabling logs +------------- + +For enabling logging for DPAA2 PMD, following log-level prefix can be used: + + .. code-block:: console + + --log-level=bus.fslmc: -- ... + +Using ``bus.fslmc`` as log matching criteria, all FSLMC bus logs can be enabled +which are lower than logging ``level``. + + Or + + .. code-block:: console + + --log-level=pmd.net.dpaa2: -- ... + +Using ``pmd.net.dpaa2`` as log matching criteria, all PMD logs can be enabled +which are lower than logging ``level``. + +Whitelisting & Blacklisting +--------------------------- + +For blacklisting a DPAA2 device, following commands can be used. + + .. code-block:: console + + -b "fslmc:dpni.x" -- ... + +Where x is the device object id as configured in resource container. + +Limitations +----------- + +Platform Requirement +~~~~~~~~~~~~~~~~~~~~ +DPAA2 drivers for DPDK can only work on NXP SoCs as listed in the +``Supported DPAA2 SoCs``. + +Maximum packet length +~~~~~~~~~~~~~~~~~~~~~ + +The DPAA2 SoC family support a maximum of a 10240 jumbo frame. The value +is fixed and cannot be changed. So, even when the ``rxmode.max_rx_pkt_len`` +member of ``struct rte_eth_conf`` is set to a value lower than 10240, frames +up to 10240 bytes can still reach the host interface. + +Other Limitations +~~~~~~~~~~~~~~~~~ + +- RSS hash key cannot be modified. +- RSS RETA cannot be configured. diff --git a/src/spdk/dpdk/doc/guides/nics/e1000em.rst b/src/spdk/dpdk/doc/guides/nics/e1000em.rst new file mode 100644 index 000000000..b6a2534e3 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/e1000em.rst @@ -0,0 +1,155 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +Driver for VM Emulated Devices +============================== + +The DPDK EM poll mode driver supports the following emulated devices: + +* qemu-kvm emulated Intel® 82540EM Gigabit Ethernet Controller (qemu e1000 device) + +* VMware* emulated Intel® 82545EM Gigabit Ethernet Controller + +* VMware emulated Intel® 8274L Gigabit Ethernet Controller. + +Validated Hypervisors +--------------------- + +The validated hypervisors are: + +* KVM (Kernel Virtual Machine) with Qemu, version 0.14.0 + +* KVM (Kernel Virtual Machine) with Qemu, version 0.15.1 + +* VMware ESXi 5.0, Update 1 + +Recommended Guest Operating System in Virtual Machine +----------------------------------------------------- + +The recommended guest operating system in a virtualized environment is: + +* Fedora* 18 (64-bit) + +For supported kernel versions, refer to the *DPDK Release Notes*. + +Setting Up a KVM Virtual Machine +-------------------------------- + +The following describes a target environment: + +* Host Operating System: Fedora 14 + +* Hypervisor: KVM (Kernel Virtual Machine) with Qemu version, 0.14.0 + +* Guest Operating System: Fedora 14 + +* Linux Kernel Version: Refer to the DPDK Getting Started Guide + +* Target Applications: testpmd + +The setup procedure is as follows: + +#. Download qemu-kvm-0.14.0 from + `http://sourceforge.net/projects/kvm/files/qemu-kvm/ `_ + and install it in the Host OS using the following steps: + + When using a recent kernel (2.6.25+) with kvm modules included: + + .. code-block:: console + + tar xzf qemu-kvm-release.tar.gz cd qemu-kvm-release + ./configure --prefix=/usr/local/kvm + make + sudo make install + sudo /sbin/modprobe kvm-intel + + When using an older kernel or a kernel from a distribution without the kvm modules, + you must download (from the same link), compile and install the modules yourself: + + .. code-block:: console + + tar xjf kvm-kmod-release.tar.bz2 + cd kvm-kmod-release + ./configure + make + sudo make install + sudo /sbin/modprobe kvm-intel + + Note that qemu-kvm installs in the /usr/local/bin directory. + + For more details about KVM configuration and usage, please refer to: + `http://www.linux-kvm.org/page/HOWTO1 `_. + +#. Create a Virtual Machine and install Fedora 14 on the Virtual Machine. + This is referred to as the Guest Operating System (Guest OS). + +#. Start the Virtual Machine with at least one emulated e1000 device. + + .. note:: + + The Qemu provides several choices for the emulated network device backend. + Most commonly used is a TAP networking backend that uses a TAP networking device in the host. + For more information about Qemu supported networking backends and different options for configuring networking at Qemu, + please refer to: + + — `http://www.linux-kvm.org/page/Networking `_ + + — `http://wiki.qemu.org/Documentation/Networking `_ + + — `http://qemu.weilnetz.de/qemu-doc.html `_ + + For example, to start a VM with two emulated e1000 devices, issue the following command: + + .. code-block:: console + + /usr/local/kvm/bin/qemu-system-x86_64 -cpu host -smp 4 -hda qemu1.raw -m 1024 + -net nic,model=e1000,vlan=1,macaddr=DE:AD:1E:00:00:01 + -net tap,vlan=1,ifname=tapvm01,script=no,downscript=no + -net nic,model=e1000,vlan=2,macaddr=DE:AD:1E:00:00:02 + -net tap,vlan=2,ifname=tapvm02,script=no,downscript=no + + where: + + — -m = memory to assign + + — -smp = number of smp cores + + — -hda = virtual disk image + + This command starts a new virtual machine with two emulated 82540EM devices, + backed up with two TAP networking host interfaces, tapvm01 and tapvm02. + + .. code-block:: console + + # ip tuntap show + tapvm01: tap + tapvm02: tap + +#. Configure your TAP networking interfaces using ip/ifconfig tools. + +#. Log in to the guest OS and check that the expected emulated devices exist: + + .. code-block:: console + + # lspci -d 8086:100e + 00:04.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03) + 00:05.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03) + +#. Install the DPDK and run testpmd. + +Known Limitations of Emulated Devices +------------------------------------- + +The following are known limitations: + +#. The Qemu e1000 RX path does not support multiple descriptors/buffers per packet. + Therefore, rte_mbuf should be big enough to hold the whole packet. + For example, to allow testpmd to receive jumbo frames, use the following: + + testpmd [options] -- --mbuf-size= + +#. Qemu e1000 does not validate the checksum of incoming packets. + +#. Qemu e1000 only supports one interrupt source, so link and Rx interrupt should be exclusive. + +#. Qemu e1000 does not support interrupt auto-clear, application should disable interrupt immediately when woken up. diff --git a/src/spdk/dpdk/doc/guides/nics/ena.rst b/src/spdk/dpdk/doc/guides/nics/ena.rst new file mode 100644 index 000000000..bec97c332 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/ena.rst @@ -0,0 +1,265 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. + All rights reserved. + +ENA Poll Mode Driver +==================== + +The ENA PMD is a DPDK poll-mode driver for the Amazon Elastic +Network Adapter (ENA) family. + +Overview +-------- + +The ENA driver exposes a lightweight management interface with a +minimal set of memory mapped registers and an extendable command set +through an Admin Queue. + +The driver supports a wide range of ENA adapters, is link-speed +independent (i.e., the same driver is used for 10GbE, 25GbE, 40GbE, +etc.), and it negotiates and supports an extendable feature set. + +ENA adapters allow high speed and low overhead Ethernet traffic +processing by providing a dedicated Tx/Rx queue pair per CPU core. + +The ENA driver supports industry standard TCP/IP offload features such +as checksum offload and TCP transmit segmentation offload (TSO). + +Receive-side scaling (RSS) is supported for multi-core scaling. + +Some of the ENA devices support a working mode called Low-latency +Queue (LLQ), which saves several more microseconds. + +Management Interface +-------------------- + +ENA management interface is exposed by means of: + +* Device Registers +* Admin Queue (AQ) and Admin Completion Queue (ACQ) + +ENA device memory-mapped PCIe space for registers (MMIO registers) +are accessed only during driver initialization and are not involved +in further normal device operation. + +AQ is used for submitting management commands, and the +results/responses are reported asynchronously through ACQ. + +ENA introduces a very small set of management commands with room for +vendor-specific extensions. Most of the management operations are +framed in a generic Get/Set feature command. + +The following admin queue commands are supported: + +* Create I/O submission queue +* Create I/O completion queue +* Destroy I/O submission queue +* Destroy I/O completion queue +* Get feature +* Set feature +* Get statistics + +Refer to ``ena_admin_defs.h`` for the list of supported Get/Set Feature +properties. + +Data Path Interface +------------------- + +I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx +SQ correspondingly). Each SQ has a completion queue (CQ) associated +with it. + +The SQs and CQs are implemented as descriptor rings in contiguous +physical memory. + +Refer to ``ena_eth_io_defs.h`` for the detailed structure of the descriptor + +The driver supports multi-queue for both Tx and Rx. + +Configuration information +------------------------- + +**DPDK Configuration Parameters** + + The following configuration options are available for the ENA PMD: + + * **CONFIG_RTE_LIBRTE_ENA_PMD** (default y): Enables or disables inclusion + of the ENA PMD driver in the DPDK compilation. + + * **CONFIG_RTE_LIBRTE_ENA_DEBUG_RX** (default n): Enables or disables debug + logging of RX logic within the ENA PMD driver. + + * **CONFIG_RTE_LIBRTE_ENA_DEBUG_TX** (default n): Enables or disables debug + logging of TX logic within the ENA PMD driver. + + * **CONFIG_RTE_LIBRTE_ENA_COM_DEBUG** (default n): Enables or disables debug + logging of low level tx/rx logic in ena_com(base) within the ENA PMD driver. + +**Runtime Configuration Parameters** + + * **large_llq_hdr** (default 0) + + Enables or disables usage of large LLQ headers. This option will have + effect only if the device also supports large LLQ headers. Otherwise, the + default value will be used. + +**ENA Configuration Parameters** + + * **Number of Queues** + + This is the requested number of queues upon initialization, however, the actual + number of receive and transmit queues to be created will be the minimum between + the maximal number supported by the device and number of queues requested. + + * **Size of Queues** + + This is the requested size of receive/transmit queues, while the actual size + will be the minimum between the requested size and the maximal receive/transmit + supported by the device. + +Building DPDK +------------- + +See the :ref:`DPDK Getting Started Guide for Linux ` for +instructions on how to build DPDK. + +By default the ENA PMD library will be built into the DPDK library. + +For configuring and using UIO and VFIO frameworks, please also refer :ref:`the +documentation that comes with DPDK suite `. + +Supported ENA adapters +---------------------- + +Current ENA PMD supports the following ENA adapters including: + +* ``1d0f:ec20`` - ENA VF +* ``1d0f:ec21`` - ENA VF with LLQ support + +Supported Operating Systems +--------------------------- + +Any Linux distribution fulfilling the conditions described in ``System Requirements`` +section of :ref:`the DPDK documentation ` or refer to *DPDK Release Notes*. + +Supported features +------------------ + +* MTU configuration +* Jumbo frames up to 9K +* IPv4/TCP/UDP checksum offload +* TSO offload +* Multiple receive and transmit queues +* RSS hash +* RSS indirection table configuration +* Low Latency Queue for Tx +* Basic and extended statistics +* LSC event notification +* Watchdog (requires handling of timers in the application) +* Device reset upon failure + +Prerequisites +------------- + +#. Prepare the system as recommended by DPDK suite. This includes environment + variables, hugepages configuration, tool-chains and configuration. + +#. ENA PMD can operate with ``vfio-pci``(*) or ``igb_uio`` driver. + + (*) ENAv2 hardware supports Low Latency Queue v2 (LLQv2). This feature + reduces the latency of the packets by pushing the header directly through + the PCI to the device, before the DMA is even triggered. For proper work + kernel PCI driver must support write combining (WC). In mainline version of + ``igb_uio`` (in DPDK repo) it must be enabled by loading module with + ``wc_activate=1`` flag (example below). However, mainline's vfio-pci + driver in kernel doesn't have WC support yet (planed to be added). + If vfio-pci used user should be either turn off ENAv2 (to avoid performance + impact) or recompile vfio-pci driver with patch provided in + `amzn-github `_. + +#. Insert ``vfio-pci`` or ``igb_uio`` kernel module using the command + ``modprobe vfio-pci`` or ``modprobe uio; insmod igb_uio.ko wc_activate=1`` + respectively. + +#. For ``vfio-pci`` users only: + Please make sure that ``IOMMU`` is enabled in your system, + or use ``vfio`` driver in ``noiommu`` mode:: + + echo 1 > /sys/module/vfio/parameters/enable_unsafe_noiommu_mode + + To use ``noiommu`` mode, the ``vfio-pci`` must be built with flag + ``CONFIG_VFIO_NOIOMMU``. + +#. Bind the intended ENA device to ``vfio-pci`` or ``igb_uio`` module. + +At this point the system should be ready to run DPDK applications. Once the +application runs to completion, the ENA can be detached from attached module if +necessary. + +**Note about usage on \*.metal instances** + +On AWS, the metal instances are supporting IOMMU for both arm64 and x86_64 +hosts. + +* x86_64 (e.g. c5.metal, i3.metal): + IOMMU should be disabled by default. In that situation, the ``igb_uio`` can + be used as it is but ``vfio-pci`` should be working in no-IOMMU mode (please + see above). + + When IOMMU is enabled, ``igb_uio`` cannot be used as it's not supporting this + feature, while ``vfio-pci`` should work without any changes. + To enable IOMMU on those hosts, please update ``GRUB_CMDLINE_LINUX`` in file + ``/etc/default/grub`` with the below extra boot arguments:: + + iommu=1 intel_iommu=on + + Then, make the changes live by executing as a root:: + + # grub2-mkconfig > /boot/grub2/grub.cfg + + Finally, reboot should result in IOMMU being enabled. + +* arm64 (a1.metal): + IOMMU should be enabled by default. Unfortunately, ``vfio-pci`` isn't + supporting SMMU, which is implementation of IOMMU for arm64 architecture and + ``igb_uio`` isn't supporting IOMMU at all, so to use DPDK with ENA on those + hosts, one must disable IOMMU. This can be done by updating + ``GRUB_CMDLINE_LINUX`` in file ``/etc/default/grub`` with the extra boot + argument:: + + iommu.passthrough=1 + + Then, make the changes live by executing as a root:: + + # grub2-mkconfig > /boot/grub2/grub.cfg + + Finally, reboot should result in IOMMU being disabled. + Without IOMMU, ``igb_uio`` can be used as it is but ``vfio-pci`` should be + working in no-IOMMU mode (please see above). + +Usage example +------------- + +Follow instructions available in the document +:ref:`compiling and testing a PMD for a NIC ` to launch +**testpmd** with Amazon ENA devices managed by librte_pmd_ena. + +Example output: + +.. code-block:: console + + [...] + EAL: PCI device 0000:00:06.0 on NUMA socket -1 + EAL: Invalid NUMA socket, default to 0 + EAL: probe driver: 1d0f:ec20 net_ena + + Interactive-mode selected + testpmd: create a new mbuf pool : n=171456, size=2176, socket=0 + testpmd: preferred mempool ops selected: ring_mp_mc + Warning! port-topology=paired and odd forward ports number, the last port will pair with itself. + Configuring Port 0 (socket 0) + Port 0: 00:00:00:11:00:01 + Checking link statuses... + + Done + testpmd> diff --git a/src/spdk/dpdk/doc/guides/nics/enetc.rst b/src/spdk/dpdk/doc/guides/nics/enetc.rst new file mode 100644 index 000000000..3c896eeff --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/enetc.rst @@ -0,0 +1,117 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2018 NXP + +ENETC Poll Mode Driver +====================== + +The ENETC NIC PMD (**librte_pmd_enetc**) provides poll mode driver +support for the inbuilt NIC found in the **NXP LS1028** SoC. + +More information can be found at `NXP Official Website +`_. + +ENETC +----- + +This section provides an overview of the NXP ENETC +and how it is integrated into the DPDK. + +Contents summary + +- ENETC overview +- ENETC features +- PCI bus driver +- NIC driver +- Supported ENETC SoCs +- Prerequisites +- Driver compilation and testing + +ENETC Overview +~~~~~~~~~~~~~~ + +ENETC is a PCI Integrated End Point(IEP). IEP implements +peripheral devices in an SoC such that software sees them as PCIe device. +ENETC is an evolution of BDR(Buffer Descriptor Ring) based networking +IPs. + +This infrastructure simplifies adding support for IEP and facilitates in following: + +- Device discovery and location +- Resource requirement discovery and allocation (e.g. interrupt assignment, + device register address) +- Event reporting + +ENETC Features +~~~~~~~~~~~~~~ + +- Link Status +- Packet type information +- Basic stats +- Promiscuous +- Multicast +- Jumbo packets +- Queue Start/Stop +- Deferred Queue Start +- CRC offload + +NIC Driver (PMD) +~~~~~~~~~~~~~~~~ + +ENETC PMD is traditional DPDK PMD which provides necessary interface between +RTE framework and ENETC internal drivers. + +- Driver registers the device vendor table in PCI subsystem. +- RTE framework scans the PCI bus for connected devices. +- This scanning will invoke the probe function of ENETC driver. +- The probe function will set the basic device registers and also setups BD rings. +- On packet Rx the respective BD Ring status bit is set which is then used for + packet processing. +- Then Tx is done first followed by Rx. + +Supported ENETC SoCs +~~~~~~~~~~~~~~~~~~~~ + +- LS1028 + +Prerequisites +~~~~~~~~~~~~~ + +There are three main pre-requisites for executing ENETC PMD on a ENETC +compatible board: + +1. **ARM 64 Tool Chain** + + For example, the `*aarch64* Linaro Toolchain `_. + +2. **Linux Kernel** + + It can be obtained from `NXP's Github hosting `_. + +3. **Rootfile system** + + Any *aarch64* supporting filesystem can be used. For example, + Ubuntu 16.04 LTS (Xenial) or 18.04 (Bionic) userland which can be obtained + from `here `_. + +The following dependencies are not part of DPDK and must be installed +separately: + +- **NXP Linux LSDK** + + NXP Layerscape software development kit (LSDK) includes support for family + of QorIQ® ARM-Architecture-based system on chip (SoC) processors + and corresponding boards. + + It includes the Linux board support packages (BSPs) for NXP SoCs, + a fully operational tool chain, kernel and board specific modules. + + LSDK and related information can be obtained from: `LSDK `_ + +Driver compilation and testing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Follow instructions available in the document +:ref:`compiling and testing a PMD for a NIC ` +to launch **testpmd** + +To compile in performance mode, please set ``CONFIG_RTE_CACHE_LINE_SIZE=64`` diff --git a/src/spdk/dpdk/doc/guides/nics/enic.rst b/src/spdk/dpdk/doc/guides/nics/enic.rst new file mode 100644 index 000000000..a28a7f4e4 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/enic.rst @@ -0,0 +1,604 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright (c) 2017, Cisco Systems, Inc. + All rights reserved. + +ENIC Poll Mode Driver +===================== + +ENIC PMD is the DPDK poll-mode driver for the Cisco System Inc. VIC Ethernet +NICs. These adapters are also referred to as vNICs below. If you are running +or would like to run DPDK software applications on Cisco UCS servers using +Cisco VIC adapters the following documentation is relevant. + +How to obtain ENIC PMD integrated DPDK +-------------------------------------- + +ENIC PMD support is integrated into the DPDK suite. dpdk-.tar.gz +should be downloaded from https://core.dpdk.org/download/ + + +Configuration information +------------------------- + +- **DPDK Configuration Parameters** + + The following configuration options are available for the ENIC PMD: + + - **CONFIG_RTE_LIBRTE_ENIC_PMD** (default y): Enables or disables inclusion + of the ENIC PMD driver in the DPDK compilation. + +- **vNIC Configuration Parameters** + + - **Number of Queues** + + The maximum number of receive queues (RQs), work queues (WQs) and + completion queues (CQs) are configurable on a per vNIC basis + through the Cisco UCS Manager (CIMC or UCSM). + + These values should be configured as follows: + + - The number of WQs should be greater or equal to the value of the + expected nb_tx_q parameter in the call to + rte_eth_dev_configure() + + - The number of RQs configured in the vNIC should be greater or + equal to *twice* the value of the expected nb_rx_q parameter in + the call to rte_eth_dev_configure(). With the addition of Rx + scatter, a pair of RQs on the vnic is needed for each receive + queue used by DPDK, even if Rx scatter is not being used. + Having a vNIC with only 1 RQ is not a valid configuration, and + will fail with an error message. + + - The number of CQs should set so that there is one CQ for each + WQ, and one CQ for each pair of RQs. + + For example: If the application requires 3 Rx queues, and 3 Tx + queues, the vNIC should be configured to have at least 3 WQs, 6 + RQs (3 pairs), and 6 CQs (3 for use by WQs + 3 for use by the 3 + pairs of RQs). + + - **Size of Queues** + + Likewise, the number of receive and transmit descriptors are configurable on + a per-vNIC basis via the UCS Manager and should be greater than or equal to + the nb_rx_desc and nb_tx_desc parameters expected to be used in the calls + to rte_eth_rx_queue_setup() and rte_eth_tx_queue_setup() respectively. + An application requesting more than the set size will be limited to that + size. + + Unless there is a lack of resources due to creating many vNICs, it + is recommended that the WQ and RQ sizes be set to the maximum. This + gives the application the greatest amount of flexibility in its + queue configuration. + + - *Note*: Since the introduction of Rx scatter, for performance + reasons, this PMD uses two RQs on the vNIC per receive queue in + DPDK. One RQ holds descriptors for the start of a packet, and the + second RQ holds the descriptors for the rest of the fragments of + a packet. This means that the nb_rx_desc parameter to + rte_eth_rx_queue_setup() can be a greater than 4096. The exact + amount will depend on the size of the mbufs being used for + receives, and the MTU size. + + For example: If the mbuf size is 2048, and the MTU is 9000, then + receiving a full size packet will take 5 descriptors, 1 from the + start-of-packet queue, and 4 from the second queue. Assuming + that the RQ size was set to the maximum of 4096, then the + application can specify up to 1024 + 4096 as the nb_rx_desc + parameter to rte_eth_rx_queue_setup(). + + - **Interrupts** + + At least one interrupt per vNIC interface should be configured in the UCS + manager regardless of the number receive/transmit queues. The ENIC PMD + uses this interrupt to get information about link status and errors + in the fast path. + + In addition to the interrupt for link status and errors, when using Rx queue + interrupts, increase the number of configured interrupts so that there is at + least one interrupt for each Rx queue. For example, if the app uses 3 Rx + queues and wants to use per-queue interrupts, configure 4 (3 + 1) interrupts. + + - **Receive Side Scaling** + + In order to fully utilize RSS in DPDK, enable all RSS related settings in + CIMC or UCSM. These include the following items listed under + Receive Side Scaling: + TCP, IPv4, TCP-IPv4, IPv6, TCP-IPv6, IPv6 Extension, TCP-IPv6 Extension. + + +SR-IOV mode utilization +----------------------- + +UCS blade servers configured with dynamic vNIC connection policies in UCSM +are capable of supporting SR-IOV. SR-IOV virtual functions (VFs) are +specialized vNICs, distinct from regular Ethernet vNICs. These VFs can be +directly assigned to virtual machines (VMs) as 'passthrough' devices. + +In UCS, SR-IOV VFs require the use of the Cisco Virtual Machine Fabric Extender +(VM-FEX), which gives the VM a dedicated +interface on the Fabric Interconnect (FI). Layer 2 switching is done at +the FI. This may eliminate the requirement for software switching on the +host to route intra-host VM traffic. + +Please refer to `Creating a Dynamic vNIC Connection Policy +`_ +for information on configuring SR-IOV adapter policies and port profiles +using UCSM. + +Once the policies are in place and the host OS is rebooted, VFs should be +visible on the host, E.g.: + +.. code-block:: console + + # lspci | grep Cisco | grep Ethernet + 0d:00.0 Ethernet controller: Cisco Systems Inc VIC Ethernet NIC (rev a2) + 0d:00.1 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + 0d:00.2 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + 0d:00.3 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + 0d:00.4 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + 0d:00.5 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + 0d:00.6 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + 0d:00.7 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + +Enable Intel IOMMU on the host and install KVM and libvirt, and reboot again as +required. Then, using libvirt, create a VM instance with an assigned device. +Below is an example ``interface`` block (part of the domain configuration XML) +that adds the host VF 0d:00:01 to the VM. ``profileid='pp-vlan-25'`` indicates +the port profile that has been configured in UCSM. + +.. code-block:: console + + + + + +
+ + + + + + + +Alternatively, the configuration can be done in a separate file using the +``network`` keyword. These methods are described in the libvirt documentation for +`Network XML format `_. + +When the VM instance is started, libvirt will bind the host VF to +vfio, complete provisioning on the FI and bring up the link. + +.. note:: + + It is not possible to use a VF directly from the host because it is not + fully provisioned until libvirt brings up the VM that it is assigned + to. + +In the VM instance, the VF will now be visible. E.g., here the VF 00:04.0 is +seen on the VM instance and should be available for binding to a DPDK. + +.. code-block:: console + + # lspci | grep Ether + 00:04.0 Ethernet controller: Cisco Systems Inc VIC SR-IOV VF (rev a2) + +Follow the normal DPDK install procedure, binding the VF to either ``igb_uio`` +or ``vfio`` in non-IOMMU mode. + +In the VM, the kernel enic driver may be automatically bound to the VF during +boot. Unbinding it currently hangs due to a known issue with the driver. To +work around the issue, blacklist the enic module as follows. +Please see :ref:`Limitations ` for limitations in +the use of SR-IOV. + +.. code-block:: console + + # cat /etc/modprobe.d/enic.conf + blacklist enic + + # dracut --force + +.. note:: + + Passthrough does not require SR-IOV. If VM-FEX is not desired, the user + may create as many regular vNICs as necessary and assign them to VMs as + passthrough devices. Since these vNICs are not SR-IOV VFs, using them as + passthrough devices do not require libvirt, port profiles, and VM-FEX. + + +.. _enic-generic-flow-api: + +Generic Flow API support +------------------------ + +Generic Flow API (also called "rte_flow" API) is supported. More advanced +capabilities are available when "Advanced Filtering" is enabled on the adapter. +Advanced filtering was added to 1300 series VIC firmware starting with version +2.0.13 for C-series UCS servers and version 3.1.2 for UCSM managed blade +servers. Advanced filtering is available on 1400 series adapters and beyond. +To enable advanced filtering, the 'Advanced filter' radio button should be +selected via CIMC or UCSM followed by a reboot of the server. + +- **1200 series VICs** + + 5-tuple exact flow support for 1200 series adapters. This allows: + + - Attributes: ingress + - Items: ipv4, ipv6, udp, tcp (must exactly match src/dst IP + addresses and ports and all must be specified) + - Actions: queue and void + - Selectors: 'is' + +- **1300 and later series VICS with advanced filters disabled** + + With advanced filters disabled, an IPv4 or IPv6 item must be specified + in the pattern. + + - Attributes: ingress + - Items: eth, vlan, ipv4, ipv6, udp, tcp, vxlan, inner eth, vlan, ipv4, ipv6, udp, tcp + - Actions: queue and void + - Selectors: 'is', 'spec' and 'mask'. 'last' is not supported + - In total, up to 64 bytes of mask is allowed across all headers + +- **1300 and later series VICS with advanced filters enabled** + + - Attributes: ingress + - Items: eth, vlan, ipv4, ipv6, udp, tcp, vxlan, raw, inner eth, vlan, ipv4, ipv6, udp, tcp + - Actions: queue, mark, drop, flag, rss, passthru, and void + - Selectors: 'is', 'spec' and 'mask'. 'last' is not supported + - In total, up to 64 bytes of mask is allowed across all headers + +- **1400 and later series VICs with Flow Manager API enabled** + + - Attributes: ingress, egress + - Items: eth, vlan, ipv4, ipv6, sctp, udp, tcp, vxlan, raw, inner eth, vlan, ipv4, ipv6, sctp, udp, tcp + - Ingress Actions: count, drop, flag, jump, mark, port_id, passthru, queue, rss, vxlan_decap, vxlan_encap, and void + - Egress Actions: count, drop, jump, passthru, vxlan_encap, and void + - Selectors: 'is', 'spec' and 'mask'. 'last' is not supported + - In total, up to 64 bytes of mask is allowed across all headers + +The VIC performs packet matching after applying VLAN strip. If VLAN +stripping is enabled, EtherType in the ETH item corresponds to the +stripped VLAN header's EtherType. Stripping does not affect the VLAN +item. TCI and EtherType in the VLAN item are matched against those in +the (stripped) VLAN header whether stripping is enabled or disabled. + +More features may be added in future firmware and new versions of the VIC. +Please refer to the release notes. + +.. _overlay_offload: + +Overlay Offload +--------------- + +Recent hardware models support overlay offload. When enabled, the NIC performs +the following operations for VXLAN, NVGRE, and GENEVE packets. In all cases, +inner and outer packets can be IPv4 or IPv6. + +- TSO for VXLAN and GENEVE packets. + + Hardware supports NVGRE TSO, but DPDK currently has no NVGRE offload flags. + +- Tx checksum offloads. + + The NIC fills in IPv4/UDP/TCP checksums for both inner and outer packets. + +- Rx checksum offloads. + + The NIC validates IPv4/UDP/TCP checksums of both inner and outer packets. + Good checksum flags (e.g. ``PKT_RX_L4_CKSUM_GOOD``) indicate that the inner + packet has the correct checksum, and if applicable, the outer packet also + has the correct checksum. Bad checksum flags (e.g. ``PKT_RX_L4_CKSUM_BAD``) + indicate that the inner and/or outer packets have invalid checksum values. + +- Inner Rx packet type classification + + PMD sets inner L3/L4 packet types (e.g. ``RTE_PTYPE_INNER_L4_TCP``), and + ``RTE_PTYPE_TUNNEL_GRENAT`` to indicate that the packet is tunneled. + PMD does not set L3/L4 packet types for outer packets. + +- Inner RSS + + RSS hash calculation, therefore queue selection, is done on inner packets. + +In order to enable overlay offload, the 'Enable VXLAN' box should be checked +via CIMC or UCSM followed by a reboot of the server. When PMD successfully +enables overlay offload, it prints the following message on the console. + +.. code-block:: console + + Overlay offload is enabled + +By default, PMD enables overlay offload if hardware supports it. To disable +it, set ``devargs`` parameter ``disable-overlay=1``. For example:: + + -w 12:00.0,disable-overlay=1 + +By default, the NIC uses 4789 as the VXLAN port. The user may change +it through ``rte_eth_dev_udp_tunnel_port_{add,delete}``. However, as +the current NIC has a single VXLAN port number, the user cannot +configure multiple port numbers. + +Geneve headers with non-zero options are not supported by default. To +use Geneve with options, update the VIC firmware to the latest version +and then set ``devargs`` parameter ``geneve-opt=1``. When Geneve with +options is enabled, flow API cannot be used as the features are +currently mutually exclusive. When this feature is successfully +enabled, PMD prints the following message. + +.. code-block:: console + + Geneve with options is enabled + + +Ingress VLAN Rewrite +-------------------- + +VIC adapters can tag, untag, or modify the VLAN headers of ingress +packets. The ingress VLAN rewrite mode controls this behavior. By +default, it is set to pass-through, where the NIC does not modify the +VLAN header in any way so that the application can see the original +header. This mode is sufficient for many applications, but may not be +suitable for others. Such applications may change the mode by setting +``devargs`` parameter ``ig-vlan-rewrite`` to one of the following. + +- ``pass``: Pass-through mode. The NIC does not modify the VLAN + header. This is the default mode. + +- ``priority``: Priority-tag default VLAN mode. If the ingress packet + is tagged with the default VLAN, the NIC replaces its VLAN header + with the priority tag (VLAN ID 0). + +- ``trunk``: Default trunk mode. The NIC tags untagged ingress packets + with the default VLAN. Tagged ingress packets are not modified. To + the application, every packet appears as tagged. + +- ``untag``: Untag default VLAN mode. If the ingress packet is tagged + with the default VLAN, the NIC removes or untags its VLAN header so + that the application sees an untagged packet. As a result, the + default VLAN becomes `untagged`. This mode can be useful for + applications such as OVS-DPDK performance benchmarks that utilize + only the default VLAN and want to see only untagged packets. + + +Vectorized Rx Handler +--------------------- + +ENIC PMD includes a version of the receive handler that is vectorized using +AVX2 SIMD instructions. It is meant for bulk, throughput oriented workloads +where reducing cycles/packet in PMD is a priority. In order to use the +vectorized handler, take the following steps. + +- Use a recent version of gcc, icc, or clang and build 64-bit DPDK. If + the compiler is known to support AVX2, DPDK build system + automatically compiles the vectorized handler. Otherwise, the + handler is not available. + +- Set ``devargs`` parameter ``enable-avx2-rx=1`` to explicitly request that + PMD consider the vectorized handler when selecting the receive handler. + For example:: + + -w 12:00.0,enable-avx2-rx=1 + + As the current implementation is intended for field trials, by default, the + vectorized handler is not considered (``enable-avx2-rx=0``). + +- Run on a UCS M4 or later server with CPUs that support AVX2. + +PMD selects the vectorized handler when the handler is compiled into +the driver, the user requests its use via ``enable-avx2-rx=1``, CPU +supports AVX2, and scatter Rx is not used. To verify that the +vectorized handler is selected, enable debug logging +(``--log-level=pmd,debug``) and check the following message. + +.. code-block:: console + + enic_use_vector_rx_handler use the non-scatter avx2 Rx handler + +.. _enic_limitations: + +Limitations +----------- + +- **VLAN 0 Priority Tagging** + + If a vNIC is configured in TRUNK mode by the UCS manager, the adapter will + priority tag egress packets according to 802.1Q if they were not already + VLAN tagged by software. If the adapter is connected to a properly configured + switch, there will be no unexpected behavior. + + In test setups where an Ethernet port of a Cisco adapter in TRUNK mode is + connected point-to-point to another adapter port or connected though a router + instead of a switch, all ingress packets will be VLAN tagged. Programs such + as l3fwd may not account for VLAN tags in packets and may misbehave. One + solution is to enable VLAN stripping on ingress so the VLAN tag is removed + from the packet and put into the mbuf->vlan_tci field. Here is an example + of how to accomplish this: + +.. code-block:: console + + vlan_offload = rte_eth_dev_get_vlan_offload(port); + vlan_offload |= ETH_VLAN_STRIP_OFFLOAD; + rte_eth_dev_set_vlan_offload(port, vlan_offload); + +Another alternative is modify the adapter's ingress VLAN rewrite mode so that +packets with the default VLAN tag are stripped by the adapter and presented to +DPDK as untagged packets. In this case mbuf->vlan_tci and the PKT_RX_VLAN and +PKT_RX_VLAN_STRIPPED mbuf flags would not be set. This mode is enabled with the +``devargs`` parameter ``ig-vlan-rewrite=untag``. For example:: + + -w 12:00.0,ig-vlan-rewrite=untag + +- **SR-IOV** + + - KVM hypervisor support only. VMware has not been tested. + - Requires VM-FEX, and so is only available on UCS managed servers connected + to Fabric Interconnects. It is not on standalone C-Series servers. + - VF devices are not usable directly from the host. They can only be used + as assigned devices on VM instances. + - Currently, unbind of the ENIC kernel mode driver 'enic.ko' on the VM + instance may hang. As a workaround, enic.ko should be blacklisted or removed + from the boot process. + - pci_generic cannot be used as the uio module in the VM. igb_uio or + vfio in non-IOMMU mode can be used. + - The number of RQs in UCSM dynamic vNIC configurations must be at least 2. + - The number of SR-IOV devices is limited to 256. Components on target system + might limit this number to fewer than 256. + +- **Flow API** + + - The number of filters that can be specified with the Generic Flow API is + dependent on how many header fields are being masked. Use 'flow create' in + a loop to determine how many filters your VIC will support (not more than + 1000 for 1300 series VICs). Filters are checked for matching in the order they + were added. Since there currently is no grouping or priority support, + 'catch-all' filters should be added last. + - The supported range of IDs for the 'MARK' action is 0 - 0xFFFD. + - RSS and PASSTHRU actions only support "receive normally". They are limited + to supporting MARK + RSS and PASSTHRU + MARK to allow the application to mark + packets and then receive them normally. These require 1400 series VIC adapters + and latest firmware. + - RAW items are limited to matching UDP tunnel headers like VXLAN. + - For 1400 VICs, all flows using the RSS action on a port use same hash + configuration. The RETA is ignored. The queues used in the RSS group must be + sequential. There is a performance hit if the number of queues is not a power of 2. + Only level 0 (outer header) RSS is allowed. + +- **Statistics** + + - ``rx_good_bytes`` (ibytes) always includes VLAN header (4B) and CRC bytes (4B). + This behavior applies to 1300 and older series VIC adapters. + 1400 series VICs do not count CRC bytes, and count VLAN header only when VLAN + stripping is disabled. + - When the NIC drops a packet because the Rx queue has no free buffers, + ``rx_good_bytes`` still increments by 4B if the packet is not VLAN tagged or + VLAN stripping is disabled, or by 8B if the packet is VLAN tagged and stripping + is enabled. + This behavior applies to 1300 and older series VIC adapters. 1400 series VICs + do not increment this byte counter when packets are dropped. + +- **RSS Hashing** + + - Hardware enables and disables UDP and TCP RSS hashing together. The driver + cannot control UDP and TCP hashing individually. + +How to build the suite +---------------------- + +The build instructions for the DPDK suite should be followed. By default +the ENIC PMD library will be built into the DPDK library. + +Refer to the document :ref:`compiling and testing a PMD for a NIC +` for details. + +For configuring and using UIO and VFIO frameworks, please refer to the +documentation that comes with DPDK suite. + +Supported Cisco VIC adapters +---------------------------- + +ENIC PMD supports all recent generations of Cisco VIC adapters including: + +- VIC 1200 series +- VIC 1300 series +- VIC 1400 series + +Supported Operating Systems +--------------------------- + +Any Linux distribution fulfilling the conditions described in Dependencies +section of DPDK documentation. + +Supported features +------------------ + +- Unicast, multicast and broadcast transmission and reception +- Receive queue polling +- Port Hardware Statistics +- Hardware VLAN acceleration +- IP checksum offload +- Receive side VLAN stripping +- Multiple receive and transmit queues +- Promiscuous mode +- Setting RX VLAN (supported via UCSM/CIMC only) +- VLAN filtering (supported via UCSM/CIMC only) +- Execution of application by unprivileged system users +- IPV4, IPV6 and TCP RSS hashing +- UDP RSS hashing (1400 series and later adapters) +- Scattered Rx +- MTU update +- SR-IOV on UCS managed servers connected to Fabric Interconnects +- Flow API +- Overlay offload + + - Rx/Tx checksum offloads for VXLAN, NVGRE, GENEVE + - TSO for VXLAN and GENEVE packets + - Inner RSS + +Known bugs and unsupported features in this release +--------------------------------------------------- + +- Signature or flex byte based flow direction +- Drop feature of flow direction +- VLAN based flow direction +- Non-IPV4 flow direction +- Setting of extended VLAN +- MTU update only works if Scattered Rx mode is disabled +- Maximum receive packet length is ignored if Scattered Rx mode is used + +Prerequisites +------------- + +- Prepare the system as recommended by DPDK suite. This includes environment + variables, hugepages configuration, tool-chains and configuration. +- Insert vfio-pci kernel module using the command 'modprobe vfio-pci' if the + user wants to use VFIO framework. +- Insert uio kernel module using the command 'modprobe uio' if the user wants + to use UIO framework. +- DPDK suite should be configured based on the user's decision to use VFIO or + UIO framework. +- If the vNIC device(s) to be used is bound to the kernel mode Ethernet driver + use 'ip' to bring the interface down. The dpdk-devbind.py tool can + then be used to unbind the device's bus id from the ENIC kernel mode driver. +- Bind the intended vNIC to vfio-pci in case the user wants ENIC PMD to use + VFIO framework using dpdk-devbind.py. +- Bind the intended vNIC to igb_uio in case the user wants ENIC PMD to use + UIO framework using dpdk-devbind.py. + +At this point the system should be ready to run DPDK applications. Once the +application runs to completion, the vNIC can be detached from vfio-pci or +igb_uio if necessary. + +Root privilege is required to bind and unbind vNICs to/from VFIO/UIO. +VFIO framework helps an unprivileged user to run the applications. +For an unprivileged user to run the applications on DPDK and ENIC PMD, +it may be necessary to increase the maximum locked memory of the user. +The following command could be used to do this. + +.. code-block:: console + + sudo sh -c "ulimit -l " + +The value depends on the memory configuration of the application, DPDK and +PMD. Typically, the limit has to be raised to higher than 2GB. +e.g., 2621440 + +The compilation of any unused drivers can be disabled using the +configuration file in config/ directory (e.g., config/common_linux). +This would help in bringing down the time taken for building the +libraries and the initialization time of the application. + +Additional Reference +-------------------- + +- https://www.cisco.com/c/en/us/products/servers-unified-computing/index.html +- https://www.cisco.com/c/en/us/products/interfaces-modules/unified-computing-system-adapters/index.html + +Contact Information +------------------- + +Any questions or bugs should be reported to DPDK community and to the ENIC PMD +maintainers: + +- John Daley +- Hyong Youb Kim diff --git a/src/spdk/dpdk/doc/guides/nics/fail_safe.rst b/src/spdk/dpdk/doc/guides/nics/fail_safe.rst new file mode 100644 index 000000000..b4a92f663 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/fail_safe.rst @@ -0,0 +1,246 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2017 6WIND S.A. + +Fail-safe poll mode driver library +================================== + +The Fail-safe poll mode driver library (**librte_pmd_failsafe**) implements a +virtual device that allows using device supporting hotplug, without modifying +other components relying on such device (application, other PMDs). +In this context, hotplug support is meant as plugging or removing a device +from its bus suddenly. + +Additionally to the Seamless Hotplug feature, the Fail-safe PMD offers the +ability to redirect operations to a secondary device when the primary has been +removed from the system. + +.. note:: + + The library is enabled by default. You can enable it or disable it manually + by setting the ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` configuration option. + +Features +-------- + +The Fail-safe PMD only supports a limited set of features. If you plan to use a +device underneath the Fail-safe PMD with a specific feature, this feature must +also be supported by the Fail-safe PMD. + +A notable exception is the device removal feature. The fail-safe PMD is not +meant to be removed itself, unlike its sub-devices which should support it. +If a sub-device supports hotplugging, the fail-safe PMD will enable its use +automatically by detecting capable devices and registering the relevant handler. + +Check the feature matrix for the complete set of supported features. + +Compilation option +------------------ + +Available options within the ``$RTE_TARGET/build/.config`` file: + +- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` (default **y**) + + This option enables or disables compiling librte_pmd_failsafe. + +Using the Fail-safe PMD from the EAL command line +------------------------------------------------- + +The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a +``--vdev`` parameter to the EAL when starting the application. The device name +must start with the *net_failsafe* prefix, followed by numbers or letters. This +name must be unique for each device. Each fail-safe instance must have at least one +sub-device, and at most two. + +A sub-device can be any DPDK device, including possibly another fail-safe device. + +Fail-safe command line parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- **dev()** parameter + + This parameter allows the user to define a sub-device. The ```` part of + this parameter must be a valid device definition. It follows the same format + provided to any ``-w`` or ``--vdev`` options. + + Enclosing the device definition within parentheses here allows using + additional sub-device parameters if need be. They will be passed on to the + sub-device. + +.. note:: + + In case where the sub-device is also used as a whitelist device, using ``-w`` + on the EAL command line, the fail-safe PMD will use the device with the + options provided to the EAL instead of its own parameters. + + When trying to use a PCI device automatically probed by the blacklist mode, + the name for the fail-safe sub-device must be the full PCI id: + Domain:Bus:Device.Function, *i.e.* ``00:00:00.0`` instead of ``00:00.0``, + as the second form is historically accepted by the DPDK. + +- **exec()** parameter + + This parameter allows the user to provide a command to the fail-safe PMD to + execute and define a sub-device. + It is done within a regular shell context. + The first line of its output is read by the fail-safe PMD and otherwise + interpreted as if passed to a **dev** parameter. + Any other line is discarded. + If the command fails or output an incorrect string, the sub-device is not + initialized. + All commas within the ``shell command`` are replaced by spaces before + executing the command. This helps using scripts to specify devices. + +- **fd()** parameter + + This parameter reads a device definition from an arbitrary file descriptor + number in ```` format as described above. + + The file descriptor is read in non-blocking mode and is never closed in + order to take only the last line into account (unlike ``exec()``) at every + probe attempt. + +- **mac** parameter [MAC address] + + This parameter allows the user to set a default MAC address to the fail-safe + and all of its sub-devices. + If no default mac address is provided, the fail-safe PMD will read the MAC + address of the first of its sub-device to be successfully probed and use it as + its default MAC address, trying to set it to all of its other sub-devices. + If no sub-device was successfully probed at initialization, then a random MAC + address is generated, that will be subsequently applied to all sub-devices once + they are probed. + +- **hotplug_poll** parameter [UINT64] (default **2000**) + + This parameter allows the user to configure the amount of time in milliseconds + between two sub-device upkeep round. + +Usage example +~~~~~~~~~~~~~ + +This section shows some example of using **testpmd** with a fail-safe PMD. + +#. To build a PMD and configure DPDK, refer to the document + :ref:`compiling and testing a PMD for a NIC `. + +#. Start testpmd. The sub-device ``84:00.0`` should be blacklisted from normal EAL + operations to avoid probing it twice, as the PCI bus is in blacklist mode. + + .. code-block:: console + + $RTE_TARGET/build/app/testpmd -c 0xff -n 4 \ + --vdev 'net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0)' \ + -b 84:00.0 -b 00:04.0 -- -i + + If the sub-device ``84:00.0`` is not blacklisted, it will be probed by the + EAL first. When the fail-safe then tries to initialize it the probe operation + fails. + + Note that PCI blacklist mode is the default PCI operating mode. + +#. Alternatively, it can be used alongside any other device in whitelist mode. + + .. code-block:: console + + $RTE_TARGET/build/app/testpmd -c 0xff -n 4 \ + --vdev 'net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0)' \ + -w 81:00.0 -- -i + +#. Start testpmd using a flexible device definition + + .. code-block:: console + + $RTE_TARGET/build/app/testpmd -c 0xff -n 4 -w ff:ff.f \ + --vdev='net_failsafe0,exec(echo 84:00.0)' -- -i + +#. Start testpmd, automatically probing the device 84:00.0 and using it with + the fail-safe. + + .. code-block:: console + + $RTE_TARGET/build/app/testpmd -c 0xff -n 4 \ + --vdev 'net_failsafe0,dev(0000:84:00.0),dev(net_ring0)' -- -i + + +Using the Fail-safe PMD from an application +------------------------------------------- + +This driver strives to be as seamless as possible to existing applications, in +order to propose the hotplug functionality in the easiest way possible. + +Care must be taken, however, to respect the **ether** API concerning device +access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate +over ethernet devices, instead of directly accessing them or by writing one's +own device iterator. + + .. code-block:: C + + unsigned int i; + + /* VALID iteration over eth-dev. */ + RTE_ETH_FOREACH_DEV(i) { + [...] + } + + /* INVALID iteration over eth-dev. */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + [...] + } + +Plug-in feature +--------------- + +A sub-device can be defined without existing on the system when the fail-safe +PMD is initialized. Upon probing this device, the fail-safe PMD will detect its +absence and postpone its use. It will then register for a periodic check on any +missing sub-device. + +During this time, the fail-safe PMD can be used normally, configured and told to +emit and receive packets. It will store any applied configuration but will fail +to emit anything, returning ``0`` from its TX function. Any unsent packet must +be freed. + +Upon the probing of its missing sub-device, the current stored configuration +will be applied. After this configuration pass, the new sub-device will be +synchronized with other sub-devices, i.e. be started if the fail-safe PMD has +been started by the user before. + +Plug-out feature +---------------- + +A sub-device supporting the device removal event can be removed from its bus at +any time. The fail-safe PMD will register a callback for such event and react +accordingly. It will try to safely stop, close and uninit the sub-device having +emitted this event, allowing it to free its eventual resources. + +Fail-safe glossary +------------------ + +Fallback device + Also called **Secondary device**. + + The fail-safe will fail-over onto this device when the preferred device is + absent. + +Preferred device + Also called **Primary device**. + + The first declared sub-device in the fail-safe parameters. + When this device is plugged, it is always used as emitting device. + It is the main sub-device and is used as target for configuration + operations if there is any ambiguity. + +Upkeep round + Periodical event during which sub-devices are serviced. Each devices having a state + different to that of the fail-safe device itself, is synchronized with it + (brought down or up accordingly). Additionally, any sub-device marked for + removal is cleaned-up. + +Slave + In the context of the fail-safe PMD, synonymous to sub-device. + +Sub-device + A device being utilized by the fail-safe PMD. + This is another PMD running underneath the fail-safe PMD. + Any sub-device can disappear at any time. The fail-safe will ensure + that the device removal happens gracefully. diff --git a/src/spdk/dpdk/doc/guides/nics/features.rst b/src/spdk/dpdk/doc/guides/nics/features.rst new file mode 100644 index 000000000..edd21c4d8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features.rst @@ -0,0 +1,934 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +Features Overview +================= + +This section explains the supported features that are listed in the +:doc:`overview`. + +As a guide to implementers it also shows the structs where the features are +defined and the APIs that can be use to get/set the values. + +Following tags used for feature details, these are from driver point of view: + +``[uses]`` : Driver uses some kind of input from the application. + +``[implements]`` : Driver implements a functionality. + +``[provides]`` : Driver provides some kind of data to the application. It is possible +to provide data by implementing some function, but "provides" is used +for cases where provided data can't be represented simply by a function. + +``[related]`` : Related API with that feature. + + +.. _nic_features_speed_capabilities: + +Speed capabilities +------------------ + +Supports getting the speed capabilities that the current device is capable of. + +* **[provides] rte_eth_dev_info**: ``speed_capa:ETH_LINK_SPEED_*``. +* **[related] API**: ``rte_eth_dev_info_get()``. + + +.. _nic_features_link_status: + +Link status +----------- + +Supports getting the link speed, duplex mode and link state (up/down). + +* **[implements] eth_dev_ops**: ``link_update``. +* **[implements] rte_eth_dev_data**: ``dev_link``. +* **[related] API**: ``rte_eth_link_get()``, ``rte_eth_link_get_nowait()``. + + +.. _nic_features_link_status_event: + +Link status event +----------------- + +Supports Link Status Change interrupts. + +* **[uses] user config**: ``dev_conf.intr_conf.lsc``. +* **[uses] rte_eth_dev_data**: ``dev_flags:RTE_ETH_DEV_INTR_LSC``. +* **[uses] rte_eth_event_type**: ``RTE_ETH_EVENT_INTR_LSC``. +* **[implements] rte_eth_dev_data**: ``dev_link``. +* **[provides] rte_pci_driver.drv_flags**: ``RTE_PCI_DRV_INTR_LSC``. +* **[related] API**: ``rte_eth_link_get()``, ``rte_eth_link_get_nowait()``. + + +.. _nic_features_removal_event: + +Removal event +------------- + +Supports device removal interrupts. + +* **[uses] user config**: ``dev_conf.intr_conf.rmv``. +* **[uses] rte_eth_dev_data**: ``dev_flags:RTE_ETH_DEV_INTR_RMV``. +* **[uses] rte_eth_event_type**: ``RTE_ETH_EVENT_INTR_RMV``. +* **[provides] rte_pci_driver.drv_flags**: ``RTE_PCI_DRV_INTR_RMV``. + + +.. _nic_features_queue_status_event: + +Queue status event +------------------ + +Supports queue enable/disable events. + +* **[uses] rte_eth_event_type**: ``RTE_ETH_EVENT_QUEUE_STATE``. + + +.. _nic_features_rx_interrupt: + +Rx interrupt +------------ + +Supports Rx interrupts. + +* **[uses] user config**: ``dev_conf.intr_conf.rxq``. +* **[implements] eth_dev_ops**: ``rx_queue_intr_enable``, ``rx_queue_intr_disable``. +* **[related] API**: ``rte_eth_dev_rx_intr_enable()``, ``rte_eth_dev_rx_intr_disable()``. + + +.. _nic_features_lock-free_tx_queue: + +Lock-free Tx queue +------------------ + +If a PMD advertises DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can +invoke rte_eth_tx_burst() concurrently on the same Tx queue without SW lock. + +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_MT_LOCKFREE``. +* **[provides] rte_eth_dev_info**: ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_MT_LOCKFREE``. +* **[related] API**: ``rte_eth_tx_burst()``. + + +.. _nic_features_fast_mbuf_free: + +Fast mbuf free +-------------- + +Supports optimization for fast release of mbufs following successful Tx. +Requires that per queue, all mbufs come from the same mempool and has refcnt = 1. + +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_MBUF_FAST_FREE``. +* **[provides] rte_eth_dev_info**: ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_MBUF_FAST_FREE``. + + +.. _nic_features_free_tx_mbuf_on_demand: + +Free Tx mbuf on demand +---------------------- + +Supports freeing consumed buffers on a Tx ring. + +* **[implements] eth_dev_ops**: ``tx_done_cleanup``. +* **[related] API**: ``rte_eth_tx_done_cleanup()``. + + +.. _nic_features_queue_start_stop: + +Queue start/stop +---------------- + +Supports starting/stopping a specific Rx/Tx queue of a port. + +* **[implements] eth_dev_ops**: ``rx_queue_start``, ``rx_queue_stop``, ``tx_queue_start``, + ``tx_queue_stop``. +* **[related] API**: ``rte_eth_dev_rx_queue_start()``, ``rte_eth_dev_rx_queue_stop()``, + ``rte_eth_dev_tx_queue_start()``, ``rte_eth_dev_tx_queue_stop()``. + + +.. _nic_features_mtu_update: + +MTU update +---------- + +Supports updating port MTU. + +* **[implements] eth_dev_ops**: ``mtu_set``. +* **[implements] rte_eth_dev_data**: ``mtu``. +* **[provides] rte_eth_dev_info**: ``max_rx_pktlen``. +* **[related] API**: ``rte_eth_dev_set_mtu()``, ``rte_eth_dev_get_mtu()``. + + +.. _nic_features_jumbo_frame: + +Jumbo frame +----------- + +Supports Rx jumbo frames. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_JUMBO_FRAME``. + ``dev_conf.rxmode.max_rx_pkt_len``. +* **[related] rte_eth_dev_info**: ``max_rx_pktlen``. +* **[related] API**: ``rte_eth_dev_set_mtu()``. + + +.. _nic_features_scattered_rx: + +Scattered Rx +------------ + +Supports receiving segmented mbufs. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_SCATTER``. +* **[implements] datapath**: ``Scattered Rx function``. +* **[implements] rte_eth_dev_data**: ``scattered_rx``. +* **[provides] eth_dev_ops**: ``rxq_info_get:scattered_rx``. +* **[related] eth_dev_ops**: ``rx_pkt_burst``. + + +.. _nic_features_lro: + +LRO +--- + +Supports Large Receive Offload. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_TCP_LRO``. + ``dev_conf.rxmode.max_lro_pkt_size``. +* **[implements] datapath**: ``LRO functionality``. +* **[implements] rte_eth_dev_data**: ``lro``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_LRO``, ``mbuf.tso_segsz``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_TCP_LRO``. +* **[provides] rte_eth_dev_info**: ``max_lro_pkt_size``. + + +.. _nic_features_tso: + +TSO +--- + +Supports TCP Segmentation Offloading. + +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_TCP_TSO``. +* **[uses] rte_eth_desc_lim**: ``nb_seg_max``, ``nb_mtu_seg_max``. +* **[uses] mbuf**: ``mbuf.ol_flags:`` ``PKT_TX_TCP_SEG``, ``PKT_TX_IPV4``, ``PKT_TX_IPV6``, ``PKT_TX_IP_CKSUM``. +* **[uses] mbuf**: ``mbuf.tso_segsz``, ``mbuf.l2_len``, ``mbuf.l3_len``, ``mbuf.l4_len``. +* **[implements] datapath**: ``TSO functionality``. +* **[provides] rte_eth_dev_info**: ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_TCP_TSO,DEV_TX_OFFLOAD_UDP_TSO``. + + +.. _nic_features_promiscuous_mode: + +Promiscuous mode +---------------- + +Supports enabling/disabling promiscuous mode for a port. + +* **[implements] eth_dev_ops**: ``promiscuous_enable``, ``promiscuous_disable``. +* **[implements] rte_eth_dev_data**: ``promiscuous``. +* **[related] API**: ``rte_eth_promiscuous_enable()``, ``rte_eth_promiscuous_disable()``, + ``rte_eth_promiscuous_get()``. + + +.. _nic_features_allmulticast_mode: + +Allmulticast mode +----------------- + +Supports enabling/disabling receiving multicast frames. + +* **[implements] eth_dev_ops**: ``allmulticast_enable``, ``allmulticast_disable``. +* **[implements] rte_eth_dev_data**: ``all_multicast``. +* **[related] API**: ``rte_eth_allmulticast_enable()``, + ``rte_eth_allmulticast_disable()``, ``rte_eth_allmulticast_get()``. + + +.. _nic_features_unicast_mac_filter: + +Unicast MAC filter +------------------ + +Supports adding MAC addresses to enable whitelist filtering to accept packets. + +* **[implements] eth_dev_ops**: ``mac_addr_set``, ``mac_addr_add``, ``mac_addr_remove``. +* **[implements] rte_eth_dev_data**: ``mac_addrs``. +* **[related] API**: ``rte_eth_dev_default_mac_addr_set()``, + ``rte_eth_dev_mac_addr_add()``, ``rte_eth_dev_mac_addr_remove()``, + ``rte_eth_macaddr_get()``. + + +.. _nic_features_multicast_mac_filter: + +Multicast MAC filter +-------------------- + +Supports setting multicast addresses to filter. + +* **[implements] eth_dev_ops**: ``set_mc_addr_list``. +* **[related] API**: ``rte_eth_dev_set_mc_addr_list()``. + + +.. _nic_features_rss_hash: + +RSS hash +-------- + +Supports RSS hashing on RX. + +* **[uses] user config**: ``dev_conf.rxmode.mq_mode`` = ``ETH_MQ_RX_RSS_FLAG``. +* **[uses] user config**: ``dev_conf.rx_adv_conf.rss_conf``. +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_RSS_HASH``. +* **[provides] rte_eth_dev_info**: ``flow_type_rss_offloads``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_RSS_HASH``, ``mbuf.rss``. + + +.. _nic_features_inner_rss: + +Inner RSS +--------- + +Supports RX RSS hashing on Inner headers. + +* **[uses] rte_flow_action_rss**: ``level``. +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_RSS_HASH``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_RSS_HASH``, ``mbuf.rss``. + + +.. _nic_features_rss_key_update: + +RSS key update +-------------- + +Supports configuration of Receive Side Scaling (RSS) hash computation. Updating +Receive Side Scaling (RSS) hash key. + +* **[implements] eth_dev_ops**: ``rss_hash_update``, ``rss_hash_conf_get``. +* **[provides] rte_eth_dev_info**: ``hash_key_size``. +* **[related] API**: ``rte_eth_dev_rss_hash_update()``, + ``rte_eth_dev_rss_hash_conf_get()``. + + +.. _nic_features_rss_reta_update: + +RSS reta update +--------------- + +Supports updating Redirection Table of the Receive Side Scaling (RSS). + +* **[implements] eth_dev_ops**: ``reta_update``, ``reta_query``. +* **[provides] rte_eth_dev_info**: ``reta_size``. +* **[related] API**: ``rte_eth_dev_rss_reta_update()``, ``rte_eth_dev_rss_reta_query()``. + + +.. _nic_features_vmdq: + +VMDq +---- + +Supports Virtual Machine Device Queues (VMDq). + +* **[uses] user config**: ``dev_conf.rxmode.mq_mode`` = ``ETH_MQ_RX_VMDQ_FLAG``. +* **[uses] user config**: ``dev_conf.rx_adv_conf.vmdq_dcb_conf``. +* **[uses] user config**: ``dev_conf.rx_adv_conf.vmdq_rx_conf``. +* **[uses] user config**: ``dev_conf.tx_adv_conf.vmdq_dcb_tx_conf``. +* **[uses] user config**: ``dev_conf.tx_adv_conf.vmdq_tx_conf``. + + +.. _nic_features_sriov: + +SR-IOV +------ + +Driver supports creating Virtual Functions. + +* **[implements] rte_eth_dev_data**: ``sriov``. + +.. _nic_features_dcb: + +DCB +--- + +Supports Data Center Bridging (DCB). + +* **[uses] user config**: ``dev_conf.rxmode.mq_mode`` = ``ETH_MQ_RX_DCB_FLAG``. +* **[uses] user config**: ``dev_conf.rx_adv_conf.vmdq_dcb_conf``. +* **[uses] user config**: ``dev_conf.rx_adv_conf.dcb_rx_conf``. +* **[uses] user config**: ``dev_conf.tx_adv_conf.vmdq_dcb_tx_conf``. +* **[uses] user config**: ``dev_conf.tx_adv_conf.vmdq_tx_conf``. +* **[implements] eth_dev_ops**: ``get_dcb_info``. +* **[related] API**: ``rte_eth_dev_get_dcb_info()``. + + +.. _nic_features_vlan_filter: + +VLAN filter +----------- + +Supports filtering of a VLAN Tag identifier. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_VLAN_FILTER``. +* **[implements] eth_dev_ops**: ``vlan_filter_set``. +* **[related] API**: ``rte_eth_dev_vlan_filter()``. + + +.. _nic_features_flow_control: + +Flow control +------------ + +Supports configuring link flow control. + +* **[implements] eth_dev_ops**: ``flow_ctrl_get``, ``flow_ctrl_set``, + ``priority_flow_ctrl_set``. +* **[related] API**: ``rte_eth_dev_flow_ctrl_get()``, ``rte_eth_dev_flow_ctrl_set()``, + ``rte_eth_dev_priority_flow_ctrl_set()``. + + +.. _nic_features_flow_api: + +Flow API +-------- + +Supports the DPDK Flow API for generic filtering. + +* **[implements] eth_dev_ops**: ``filter_ctrl:RTE_ETH_FILTER_GENERIC``. +* **[implements] rte_flow_ops**: ``All``. + + +.. _nic_features_rate_limitation: + +Rate limitation +--------------- + +Supports Tx rate limitation for a queue. + +* **[implements] eth_dev_ops**: ``set_queue_rate_limit``. +* **[related] API**: ``rte_eth_set_queue_rate_limit()``. + + +.. _nic_features_traffic_mirroring: + +Traffic mirroring +----------------- + +Supports adding traffic mirroring rules. + +* **[implements] eth_dev_ops**: ``mirror_rule_set``, ``mirror_rule_reset``. +* **[related] API**: ``rte_eth_mirror_rule_set()``, ``rte_eth_mirror_rule_reset()``. + + +.. _nic_features_inline_crypto_doc: + +Inline crypto +------------- + +Supports inline crypto processing defined by rte_security library to perform crypto +operations of security protocol while packet is received in NIC. NIC is not aware +of protocol operations. See Security library and PMD documentation for more details. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_SECURITY``, +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_SECURITY``. +* **[implements] rte_security_ops**: ``session_create``, ``session_update``, + ``session_stats_get``, ``session_destroy``, ``set_pkt_metadata``, ``capabilities_get``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_SECURITY``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_SECURITY``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_SEC_OFFLOAD``, + ``mbuf.ol_flags:PKT_TX_SEC_OFFLOAD``, ``mbuf.ol_flags:PKT_RX_SEC_OFFLOAD_FAILED``. +* **[provides] rte_security_ops, capabilities_get**: ``action: RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO`` + + +.. _nic_features_inline_protocol_doc: + +Inline protocol +--------------- + +Supports inline protocol processing defined by rte_security library to perform +protocol processing for the security protocol (e.g. IPsec, MACSEC) while the +packet is received at NIC. The NIC is capable of understanding the security +protocol operations. See security library and PMD documentation for more details. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_SECURITY``, +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_SECURITY``. +* **[implements] rte_security_ops**: ``session_create``, ``session_update``, + ``session_stats_get``, ``session_destroy``, ``set_pkt_metadata``, ``get_userdata``, + ``capabilities_get``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_SECURITY``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_SECURITY``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_SEC_OFFLOAD``, + ``mbuf.ol_flags:PKT_TX_SEC_OFFLOAD``, ``mbuf.ol_flags:PKT_RX_SEC_OFFLOAD_FAILED``. +* **[provides] rte_security_ops, capabilities_get**: ``action: RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL`` + + +.. _nic_features_crc_offload: + +CRC offload +----------- + +Supports CRC stripping by hardware. +A PMD assumed to support CRC stripping by default. PMD should advertise if it supports keeping CRC. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_KEEP_CRC``. + + +.. _nic_features_vlan_offload: + +VLAN offload +------------ + +Supports VLAN offload to hardware. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_VLAN_STRIP,DEV_RX_OFFLOAD_VLAN_FILTER,DEV_RX_OFFLOAD_VLAN_EXTEND``. +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_VLAN_INSERT``. +* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_VLAN``, ``mbuf.vlan_tci``. +* **[implements] eth_dev_ops**: ``vlan_offload_set``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_VLAN_STRIPPED``, ``mbuf.ol_flags:PKT_RX_VLAN`` ``mbuf.vlan_tci``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_VLAN_STRIP``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_VLAN_INSERT``. +* **[related] API**: ``rte_eth_dev_set_vlan_offload()``, + ``rte_eth_dev_get_vlan_offload()``. + + +.. _nic_features_qinq_offload: + +QinQ offload +------------ + +Supports QinQ (queue in queue) offload. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_QINQ_STRIP``. +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_QINQ_INSERT``. +* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_QINQ``, ``mbuf.vlan_tci_outer``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_QINQ_STRIPPED``, ``mbuf.ol_flags:PKT_RX_QINQ``, + ``mbuf.ol_flags:PKT_RX_VLAN_STRIPPED``, ``mbuf.ol_flags:PKT_RX_VLAN`` + ``mbuf.vlan_tci``, ``mbuf.vlan_tci_outer``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_QINQ_STRIP``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_QINQ_INSERT``. + + +.. _nic_features_l3_checksum_offload: + +L3 checksum offload +------------------- + +Supports L3 checksum offload. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_IPV4_CKSUM``. +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_IPV4_CKSUM``. +* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_IP_CKSUM``, + ``mbuf.ol_flags:PKT_TX_IPV4`` | ``PKT_TX_IPV6``. +* **[uses] mbuf**: ``mbuf.l2_len``, ``mbuf.l3_len``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_IP_CKSUM_UNKNOWN`` | + ``PKT_RX_IP_CKSUM_BAD`` | ``PKT_RX_IP_CKSUM_GOOD`` | + ``PKT_RX_IP_CKSUM_NONE``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_IPV4_CKSUM``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_IPV4_CKSUM``. + + +.. _nic_features_l4_checksum_offload: + +L4 checksum offload +------------------- + +Supports L4 checksum offload. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_UDP_CKSUM,DEV_RX_OFFLOAD_TCP_CKSUM,DEV_RX_OFFLOAD_SCTP_CKSUM``. +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_UDP_CKSUM,DEV_TX_OFFLOAD_TCP_CKSUM,DEV_TX_OFFLOAD_SCTP_CKSUM``. +* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_IPV4`` | ``PKT_TX_IPV6``, + ``mbuf.ol_flags:PKT_TX_L4_NO_CKSUM`` | ``PKT_TX_TCP_CKSUM`` | + ``PKT_TX_SCTP_CKSUM`` | ``PKT_TX_UDP_CKSUM``. +* **[uses] mbuf**: ``mbuf.l2_len``, ``mbuf.l3_len``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_L4_CKSUM_UNKNOWN`` | + ``PKT_RX_L4_CKSUM_BAD`` | ``PKT_RX_L4_CKSUM_GOOD`` | + ``PKT_RX_L4_CKSUM_NONE``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_UDP_CKSUM,DEV_RX_OFFLOAD_TCP_CKSUM,DEV_RX_OFFLOAD_SCTP_CKSUM``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_UDP_CKSUM,DEV_TX_OFFLOAD_TCP_CKSUM,DEV_TX_OFFLOAD_SCTP_CKSUM``. + +.. _nic_features_hw_timestamp: + +Timestamp offload +----------------- + +Supports Timestamp. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_TIMESTAMP``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_TIMESTAMP``. +* **[provides] mbuf**: ``mbuf.timestamp``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa: DEV_RX_OFFLOAD_TIMESTAMP``. +* **[related] eth_dev_ops**: ``read_clock``. + +.. _nic_features_macsec_offload: + +MACsec offload +-------------- + +Supports MACsec. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_MACSEC_STRIP``. +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_MACSEC_INSERT``. +* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_MACSEC``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_MACSEC_STRIP``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_MACSEC_INSERT``. + + +.. _nic_features_inner_l3_checksum: + +Inner L3 checksum +----------------- + +Supports inner packet L3 checksum. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM``. +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM``. +* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_IP_CKSUM``, + ``mbuf.ol_flags:PKT_TX_IPV4`` | ``PKT_TX_IPV6``, + ``mbuf.ol_flags:PKT_TX_OUTER_IP_CKSUM``, + ``mbuf.ol_flags:PKT_TX_OUTER_IPV4`` | ``PKT_TX_OUTER_IPV6``. +* **[uses] mbuf**: ``mbuf.outer_l2_len``, ``mbuf.outer_l3_len``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_EIP_CKSUM_BAD``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM``. + + +.. _nic_features_inner_l4_checksum: + +Inner L4 checksum +----------------- + +Supports inner packet L4 checksum. + +* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_OUTER_UDP_CKSUM``. +* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_OUTER_L4_CKSUM_UNKNOWN`` | + ``PKT_RX_OUTER_L4_CKSUM_BAD`` | ``PKT_RX_OUTER_L4_CKSUM_GOOD`` | ``PKT_RX_OUTER_L4_CKSUM_INVALID``. +* **[uses] rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_OUTER_UDP_CKSUM``. +* **[uses] mbuf**: ``mbuf.ol_flags:PKT_TX_OUTER_IPV4`` | ``PKT_TX_OUTER_IPV6``. + ``mbuf.ol_flags:PKT_TX_OUTER_UDP_CKSUM``. +* **[uses] mbuf**: ``mbuf.outer_l2_len``, ``mbuf.outer_l3_len``. +* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_OUTER_UDP_CKSUM``, + ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_OUTER_UDP_CKSUM``. + + +.. _nic_features_packet_type_parsing: + +Packet type parsing +------------------- + +Supports packet type parsing and returns a list of supported types. +Allows application to set ptypes it is interested in. + +* **[implements] eth_dev_ops**: ``dev_supported_ptypes_get``, +* **[related] API**: ``rte_eth_dev_get_supported_ptypes()``, + ``rte_eth_dev_set_ptypes()``, ``dev_ptypes_set``. +* **[provides] mbuf**: ``mbuf.packet_type``. + + +.. _nic_features_timesync: + +Timesync +-------- + +Supports IEEE1588/802.1AS timestamping. + +* **[implements] eth_dev_ops**: ``timesync_enable``, ``timesync_disable`` + ``timesync_read_rx_timestamp``, ``timesync_read_tx_timestamp``, + ``timesync_adjust_time``, ``timesync_read_time``, ``timesync_write_time``. +* **[related] API**: ``rte_eth_timesync_enable()``, ``rte_eth_timesync_disable()``, + ``rte_eth_timesync_read_rx_timestamp()``, + ``rte_eth_timesync_read_tx_timestamp``, ``rte_eth_timesync_adjust_time()``, + ``rte_eth_timesync_read_time()``, ``rte_eth_timesync_write_time()``. + + +.. _nic_features_rx_descriptor_status: + +Rx descriptor status +-------------------- + +Supports check the status of a Rx descriptor. When ``rx_descriptor_status`` is +used, status can be "Available", "Done" or "Unavailable". When +``rx_descriptor_done`` is used, status can be "DD bit is set" or "DD bit is +not set". + +* **[implements] eth_dev_ops**: ``rx_descriptor_status``. +* **[related] API**: ``rte_eth_rx_descriptor_status()``. +* **[implements] eth_dev_ops**: ``rx_descriptor_done``. +* **[related] API**: ``rte_eth_rx_descriptor_done()``. + + +.. _nic_features_tx_descriptor_status: + +Tx descriptor status +-------------------- + +Supports checking the status of a Tx descriptor. Status can be "Full", "Done" +or "Unavailable." + +* **[implements] eth_dev_ops**: ``tx_descriptor_status``. +* **[related] API**: ``rte_eth_tx_descriptor_status()``. + + +.. _nic_features_basic_stats: + +Basic stats +----------- + +Support basic statistics such as: ipackets, opackets, ibytes, obytes, +imissed, ierrors, oerrors, rx_nombuf. + +And per queue stats: q_ipackets, q_opackets, q_ibytes, q_obytes, q_errors. + +These apply to all drivers. + +* **[implements] eth_dev_ops**: ``stats_get``, ``stats_reset``. +* **[related] API**: ``rte_eth_stats_get``, ``rte_eth_stats_reset()``. + + +.. _nic_features_extended_stats: + +Extended stats +-------------- + +Supports Extended Statistics, changes from driver to driver. + +* **[implements] eth_dev_ops**: ``xstats_get``, ``xstats_reset``, ``xstats_get_names``. +* **[implements] eth_dev_ops**: ``xstats_get_by_id``, ``xstats_get_names_by_id``. +* **[related] API**: ``rte_eth_xstats_get()``, ``rte_eth_xstats_reset()``, + ``rte_eth_xstats_get_names``, ``rte_eth_xstats_get_by_id()``, + ``rte_eth_xstats_get_names_by_id()``, ``rte_eth_xstats_get_id_by_name()``. + + +.. _nic_features_stats_per_queue: + +Stats per queue +--------------- + +Supports configuring per-queue stat counter mapping. + +* **[implements] eth_dev_ops**: ``queue_stats_mapping_set``. +* **[related] API**: ``rte_eth_dev_set_rx_queue_stats_mapping()``, + ``rte_eth_dev_set_tx_queue_stats_mapping()``. + + +.. _nic_features_fw_version: + +FW version +---------- + +Supports getting device hardware firmware information. + +* **[implements] eth_dev_ops**: ``fw_version_get``. +* **[related] API**: ``rte_eth_dev_fw_version_get()``. + + +.. _nic_features_eeprom_dump: + +EEPROM dump +----------- + +Supports getting/setting device eeprom data. + +* **[implements] eth_dev_ops**: ``get_eeprom_length``, ``get_eeprom``, ``set_eeprom``. +* **[related] API**: ``rte_eth_dev_get_eeprom_length()``, ``rte_eth_dev_get_eeprom()``, + ``rte_eth_dev_set_eeprom()``. + + +.. _nic_features_module_eeprom_dump: + +Module EEPROM dump +------------------ + +Supports getting information and data of plugin module eeprom. + +* **[implements] eth_dev_ops**: ``get_module_info``, ``get_module_eeprom``. +* **[related] API**: ``rte_eth_dev_get_module_info()``, ``rte_eth_dev_get_module_eeprom()``. + + +.. _nic_features_register_dump: + +Registers dump +-------------- + +Supports retrieving device registers and registering attributes (number of +registers and register size). + +* **[implements] eth_dev_ops**: ``get_reg``. +* **[related] API**: ``rte_eth_dev_get_reg_info()``. + + +.. _nic_features_led: + +LED +--- + +Supports turning on/off a software controllable LED on a device. + +* **[implements] eth_dev_ops**: ``dev_led_on``, ``dev_led_off``. +* **[related] API**: ``rte_eth_led_on()``, ``rte_eth_led_off()``. + + +.. _nic_features_multiprocess_aware: + +Multiprocess aware +------------------ + +Driver can be used for primary-secondary process model. + + +.. _nic_features_bsd_nic_uio: + +BSD nic_uio +----------- + +BSD ``nic_uio`` module supported. + + +.. _nic_features_linux_uio: + +Linux UIO +--------- + +Works with ``igb_uio`` kernel module. + +* **[provides] RTE_PMD_REGISTER_KMOD_DEP**: ``igb_uio``. + +.. _nic_features_linux_vfio: + +Linux VFIO +---------- + +Works with ``vfio-pci`` kernel module. + +* **[provides] RTE_PMD_REGISTER_KMOD_DEP**: ``vfio-pci``. + +.. _nic_features_other_kdrv: + +Other kdrv +---------- + +Kernel module other than above ones supported. + + +.. _nic_features_armv7: + +ARMv7 +----- + +Support armv7 architecture. + +Use ``defconfig_arm-armv7a-*-*``. + + +.. _nic_features_armv8: + +ARMv8 +----- + +Support armv8a (64bit) architecture. + +Use ``defconfig_arm64-armv8a-*-*`` + + +.. _nic_features_power8: + +Power8 +------ + +Support PowerPC architecture. + +Use ``defconfig_ppc_64-power8-*-*`` + +.. _nic_features_x86-32: + +x86-32 +------ + +Support 32bits x86 architecture. + +Use ``defconfig_x86_x32-native-*-*`` and ``defconfig_i686-native-*-*``. + + +.. _nic_features_x86-64: + +x86-64 +------ + +Support 64bits x86 architecture. + +Use ``defconfig_x86_64-native-*-*``. + + +.. _nic_features_usage_doc: + +Usage doc +--------- + +Documentation describes usage. + +See ``doc/guides/nics/*.rst`` + + +.. _nic_features_design_doc: + +Design doc +---------- + +Documentation describes design. + +See ``doc/guides/nics/*.rst``. + + +.. _nic_features_perf_doc: + +Perf doc +-------- + +Documentation describes performance values. + +See ``dpdk.org/doc/perf/*``. + +.. _nic_features_runtime_rx_queue_setup: + +Runtime Rx queue setup +---------------------- + +Supports Rx queue setup after device started. + +* **[provides] rte_eth_dev_info**: ``dev_capa:RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP``. +* **[related] API**: ``rte_eth_dev_info_get()``. + +.. _nic_features_runtime_tx_queue_setup: + +Runtime Tx queue setup +---------------------- + +Supports Tx queue setup after device started. + +* **[provides] rte_eth_dev_info**: ``dev_capa:RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP``. +* **[related] API**: ``rte_eth_dev_info_get()``. + +.. _nic_features_burst_mode_info: + +Burst mode info +--------------- + +Supports to get Rx/Tx packet burst mode information. + +* **[implements] eth_dev_ops**: ``rx_burst_mode_get``, ``tx_burst_mode_get``. +* **[related] API**: ``rte_eth_rx_burst_mode_get()``, ``rte_eth_tx_burst_mode_get()``. + +.. _nic_features_other: + +Other dev ops not represented by a Feature +------------------------------------------ + +* ``rxq_info_get`` +* ``txq_info_get`` +* ``vlan_tpid_set`` +* ``vlan_strip_queue_set`` +* ``vlan_pvid_set`` +* ``rx_queue_count`` +* ``l2_tunnel_offload_set`` +* ``uc_hash_table_set`` +* ``uc_all_hash_table_set`` +* ``udp_tunnel_port_add`` +* ``udp_tunnel_port_del`` +* ``l2_tunnel_eth_type_conf`` +* ``l2_tunnel_offload_set`` +* ``tx_pkt_prepare`` diff --git a/src/spdk/dpdk/doc/guides/nics/features/af_xdp.ini b/src/spdk/dpdk/doc/guides/nics/features/af_xdp.ini new file mode 100644 index 000000000..36953c2de --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/af_xdp.ini @@ -0,0 +1,11 @@ +; +; Supported features of the 'af_xdp' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +MTU update = Y +Promiscuous mode = Y +Stats per queue = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/afpacket.ini b/src/spdk/dpdk/doc/guides/nics/features/afpacket.ini new file mode 100644 index 000000000..99f87ab6e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/afpacket.ini @@ -0,0 +1,6 @@ +; +; Supported features of the 'afpacket' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] diff --git a/src/spdk/dpdk/doc/guides/nics/features/ark.ini b/src/spdk/dpdk/doc/guides/nics/features/ark.ini new file mode 100644 index 000000000..ec8a2b998 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/ark.ini @@ -0,0 +1,15 @@ +; +; Supported features of the 'ark' poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Queue start/stop = Y +Jumbo frame = Y +Scattered Rx = Y +Basic stats = Y +Stats per queue = Y +Linux UIO = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/atlantic.ini b/src/spdk/dpdk/doc/guides/nics/features/atlantic.ini new file mode 100644 index 000000000..2bb8ecc01 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/atlantic.ini @@ -0,0 +1,38 @@ +; +; Supported features of the 'atlantic' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +Flow control = Y +CRC offload = Y +VLAN offload = Y +MACsec offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +EEPROM dump = Y +Registers dump = Y +Linux UIO = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/avp.ini b/src/spdk/dpdk/doc/guides/nics/features/avp.ini new file mode 100644 index 000000000..ceb69939b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/avp.ini @@ -0,0 +1,16 @@ +; +; Supported features of the 'AVP' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Jumbo frame = Y +Scattered Rx = Y +Promiscuous mode = Y +Unicast MAC filter = Y +VLAN offload = Y +Basic stats = Y +Stats per queue = Y +Linux UIO = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/axgbe.ini b/src/spdk/dpdk/doc/guides/nics/features/axgbe.ini new file mode 100644 index 000000000..0becaa097 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/axgbe.ini @@ -0,0 +1,20 @@ +; +; Supported features of the 'axgbe' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Jumbo frame = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +RSS hash = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Basic stats = Y +Linux UIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/bnx2x.ini b/src/spdk/dpdk/doc/guides/nics/features/bnx2x.ini new file mode 100644 index 000000000..e17bf6068 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/bnx2x.ini @@ -0,0 +1,18 @@ +; +; Supported features of the 'bnx2x' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Promiscuous mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +SR-IOV = Y +Basic stats = Y +Extended stats = Y +Linux UIO = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/bnxt.ini b/src/spdk/dpdk/doc/guides/nics/features/bnxt.ini new file mode 100644 index 000000000..37a99e336 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/bnxt.ini @@ -0,0 +1,50 @@ +; +; Supported features of the 'bnxt' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +LRO = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VMDq = Y +SR-IOV = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Timesync = Y +VLAN offload = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +EEPROM dump = Y +LED = Y +Multiprocess aware = Y +Linux UIO = Y +Linux VFIO = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/cxgbe.ini b/src/spdk/dpdk/doc/guides/nics/features/cxgbe.ini new file mode 100644 index 000000000..88f2f92b7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/cxgbe.ini @@ -0,0 +1,35 @@ +; +; Supported features of the 'cxgbe' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +RSS hash = Y +RSS key update = Y +Flow control = Y +Flow API = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Stats per queue = Y +EEPROM dump = Y +Registers dump = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/cxgbevf.ini b/src/spdk/dpdk/doc/guides/nics/features/cxgbevf.ini new file mode 100644 index 000000000..b41fc3655 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/cxgbevf.ini @@ -0,0 +1,29 @@ +; +; Supported features of the 'cxgbevf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +RSS hash = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Stats per queue = Y +Multiprocess aware = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/default.ini b/src/spdk/dpdk/doc/guides/nics/features/default.ini new file mode 100644 index 000000000..4d0ad324e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/default.ini @@ -0,0 +1,79 @@ +; +; Features of a default network driver. +; +; This file defines the features that are valid for inclusion in +; the other driver files and also the order that they appear in +; the features table in the documentation. The feature description +; string should not exceed feature_str_len defined in conf.py. +; +[Features] +Speed capabilities = +Link status = +Link status event = +Removal event = +Queue status event = +Rx interrupt = +Lock-free Tx queue = +Fast mbuf free = +Free Tx mbuf on demand = +Queue start/stop = +Runtime Rx queue setup = +Runtime Tx queue setup = +Burst mode info = +MTU update = +Jumbo frame = +Scattered Rx = +LRO = +TSO = +Promiscuous mode = +Allmulticast mode = +Unicast MAC filter = +Multicast MAC filter = +RSS hash = +RSS key update = +RSS reta update = +Inner RSS = +VMDq = +SR-IOV = +DCB = +VLAN filter = +Flow control = +Flow API = +Rate limitation = +Traffic mirroring = +Inline crypto = +Inline protocol = +CRC offload = +VLAN offload = +QinQ offload = +L3 checksum offload = +L4 checksum offload = +Timestamp offload = +MACsec offload = +Inner L3 checksum = +Inner L4 checksum = +Packet type parsing = +Timesync = +Rx descriptor status = +Tx descriptor status = +Basic stats = +Extended stats = +Stats per queue = +FW version = +EEPROM dump = +Module EEPROM dump = +Registers dump = +LED = +Multiprocess aware = +BSD nic_uio = +Linux UIO = +Linux VFIO = +Other kdrv = +ARMv7 = +ARMv8 = +Power8 = +x86-32 = +x86-64 = +Usage doc = +Design doc = +Perf doc = diff --git a/src/spdk/dpdk/doc/guides/nics/features/dpaa.ini b/src/spdk/dpdk/doc/guides/nics/features/dpaa.ini new file mode 100644 index 000000000..24cfd8566 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/dpaa.ini @@ -0,0 +1,24 @@ +; +; Supported features of the 'dpaa' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Jumbo frame = Y +MTU update = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +Flow control = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +FW version = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/dpaa2.ini b/src/spdk/dpdk/doc/guides/nics/features/dpaa2.ini new file mode 100644 index 000000000..c2214fbd5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/dpaa2.ini @@ -0,0 +1,28 @@ +; +; Supported features of the 'dpaa2' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Queue start/stop = Y +Jumbo frame = Y +MTU update = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +VLAN filter = Y +Flow control = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +FW version = Y +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/e1000.ini b/src/spdk/dpdk/doc/guides/nics/features/e1000.ini new file mode 100644 index 000000000..51ca580f5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/e1000.ini @@ -0,0 +1,32 @@ +; +; Supported features of the 'e1000' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Rx interrupt = Y +Free Tx mbuf on demand = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +VLAN filter = Y +Flow control = Y +CRC offload = Y +VLAN offload = Y +QinQ offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/ena.ini b/src/spdk/dpdk/doc/guides/nics/features/ena.ini new file mode 100644 index 000000000..4300dd012 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/ena.ini @@ -0,0 +1,24 @@ +; +; Supported features of the 'ena' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Link status event = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +RSS hash = Y +RSS reta update = Y +L3 checksum offload = Y +L4 checksum offload = Y +Basic stats = Y +Extended stats = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y +Design doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/enetc.ini b/src/spdk/dpdk/doc/guides/nics/features/enetc.ini new file mode 100644 index 000000000..39a520172 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/enetc.ini @@ -0,0 +1,20 @@ +; +; Supported features of the 'enetc' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Packet type parsing = Y +Link status = Y +Basic stats = Y +Promiscuous mode = Y +Allmulticast mode = Y +MTU update = Y +Jumbo frame = Y +Queue start/stop = Y +CRC offload = Y +L3 checksum offload = P +L4 checksum offload = P +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/enic.ini b/src/spdk/dpdk/doc/guides/nics/features/enic.ini new file mode 100644 index 000000000..1a065a84f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/enic.ini @@ -0,0 +1,41 @@ +; +; Supported features of the 'enic' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Inner RSS = Y +SR-IOV = Y +CRC offload = Y +VLAN offload = Y +Flow API = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Basic stats = Y +FW version = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/failsafe.ini b/src/spdk/dpdk/doc/guides/nics/features/failsafe.ini new file mode 100644 index 000000000..b6f3dcee6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/failsafe.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'fail-safe' poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Link status event = Y +Rx interrupt = Y +Fast mbuf free = Y +Queue start/stop = Y +Runtime Rx queue setup = Y +Runtime Tx queue setup = Y +MTU update = Y +Jumbo frame = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +Packet type parsing = Y +Basic stats = Y +Stats per queue = Y +ARMv7 = Y +ARMv8 = Y +Power8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/fm10k.ini b/src/spdk/dpdk/doc/guides/nics/features/fm10k.ini new file mode 100644 index 000000000..0acdf0d33 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/fm10k.ini @@ -0,0 +1,39 @@ +; +; Supported features of the 'fm10k' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VMDq = Y +VLAN filter = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/fm10k_vf.ini b/src/spdk/dpdk/doc/guides/nics/features/fm10k_vf.ini new file mode 100644 index 000000000..44b50faa1 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/fm10k_vf.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'fm10k_vf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/hinic.ini b/src/spdk/dpdk/doc/guides/nics/features/hinic.ini new file mode 100644 index 000000000..5be05d36f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/hinic.ini @@ -0,0 +1,42 @@ +; +; Supported features of the 'hinic' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +LRO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Inner RSS = Y +SR-IOV = Y +VLAN filter = Y +VLAN offload = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Flow API = Y +Flow control = Y +FW version = Y +Multiprocess aware = Y +Linux UIO = Y +Linux VFIO = Y +x86-64 = Y +ARMv8 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/hns3.ini b/src/spdk/dpdk/doc/guides/nics/features/hns3.ini new file mode 100644 index 000000000..c3a8544bc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/hns3.ini @@ -0,0 +1,35 @@ +; +; Supported features of the 'hns3' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Rx interrupt = Y +MTU update = Y +Jumbo frame = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +DCB = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +Linux UIO = Y +Linux VFIO = Y +ARMv8 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/hns3_vf.ini b/src/spdk/dpdk/doc/guides/nics/features/hns3_vf.ini new file mode 100644 index 000000000..80773ac90 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/hns3_vf.ini @@ -0,0 +1,32 @@ +; +; Supported features of the 'hns3' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Rx interrupt = Y +MTU update = Y +Jumbo frame = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +Flow API = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Linux UIO = Y +Linux VFIO = Y +ARMv8 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/i40e.ini b/src/spdk/dpdk/doc/guides/nics/features/i40e.ini new file mode 100644 index 000000000..2e89079f5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/i40e.ini @@ -0,0 +1,54 @@ +; +; Supported features of the 'i40e' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Fast mbuf free = P +Queue start/stop = Y +Runtime Rx queue setup = Y +Runtime Tx queue setup = Y +Burst mode info = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VMDq = Y +SR-IOV = Y +DCB = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +Traffic mirroring = Y +CRC offload = Y +VLAN offload = Y +QinQ offload = P +L3 checksum offload = P +L4 checksum offload = P +Inner L3 checksum = P +Inner L4 checksum = P +Packet type parsing = Y +Timesync = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +FW version = Y +Module EEPROM dump = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y +ARMv8 = Y +Power8 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/i40e_vf.ini b/src/spdk/dpdk/doc/guides/nics/features/i40e_vf.ini new file mode 100644 index 000000000..9f95063d2 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/i40e_vf.ini @@ -0,0 +1,39 @@ +; +; Supported features of the 'i40e_vf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Rx interrupt = Y +Link status = Y +Fast mbuf free = P +Queue start/stop = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +CRC offload = Y +VLAN offload = Y +QinQ offload = P +L3 checksum offload = P +L4 checksum offload = P +Inner L3 checksum = P +Inner L4 checksum = P +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/iavf.ini b/src/spdk/dpdk/doc/guides/nics/features/iavf.ini new file mode 100644 index 000000000..2e7cea38d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/iavf.ini @@ -0,0 +1,36 @@ +; +; Supported features of the 'iavf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Rx interrupt = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Flow API = Y +VLAN filter = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = P +L4 checksum offload = P +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/ice.ini b/src/spdk/dpdk/doc/guides/nics/features/ice.ini new file mode 100644 index 000000000..3ee1f6ea4 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/ice.ini @@ -0,0 +1,45 @@ +; +; Supported features of the 'ice' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Fast mbuf free = P +Queue start/stop = Y +Burst mode info = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Flow API = Y +VLAN filter = Y +CRC offload = Y +VLAN offload = Y +QinQ offload = P +L3 checksum offload = P +L4 checksum offload = P +Inner L3 checksum = P +Inner L4 checksum = P +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +FW version = Y +Module EEPROM dump = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/igb.ini b/src/spdk/dpdk/doc/guides/nics/features/igb.ini new file mode 100644 index 000000000..167c0cabe --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/igb.ini @@ -0,0 +1,47 @@ +; +; Supported features of the 'igb' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Rx interrupt = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VMDq = Y +SR-IOV = Y +DCB = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +CRC offload = Y +VLAN offload = Y +QinQ offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Timesync = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +FW version = Y +EEPROM dump = Y +Module EEPROM dump = Y +Registers dump = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/igb_vf.ini b/src/spdk/dpdk/doc/guides/nics/features/igb_vf.ini new file mode 100644 index 000000000..d9653234b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/igb_vf.ini @@ -0,0 +1,30 @@ +; +; Supported features of the 'igb_vf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Rx interrupt = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +VLAN filter = Y +CRC offload = Y +VLAN offload = Y +QinQ offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Registers dump = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/igc.ini b/src/spdk/dpdk/doc/guides/nics/features/igc.ini new file mode 100644 index 000000000..09300eb36 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/igc.ini @@ -0,0 +1,38 @@ +; Supported features of the 'igc' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +FW version = Y +LED = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Rx interrupt = Y +Flow control = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +VLAN offload = Y +Flow API = P +Linux UIO = Y +Linux VFIO = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/ionic.ini b/src/spdk/dpdk/doc/guides/nics/features/ionic.ini new file mode 100644 index 000000000..083c7bd99 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/ionic.ini @@ -0,0 +1,36 @@ +; +; Supported features of the 'ionic' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +LRO = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +VLAN offload = Y +Flow control = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +Linux UIO = Y +Linux VFIO = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/ipn3ke.ini b/src/spdk/dpdk/doc/guides/nics/features/ipn3ke.ini new file mode 100644 index 000000000..47a6526be --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/ipn3ke.ini @@ -0,0 +1,51 @@ +; +; Supported features of the 'ipn3ke' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +Runtime Rx queue setup = Y +Runtime Tx queue setup = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VMDq = Y +SR-IOV = Y +DCB = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +Traffic mirroring = Y +CRC offload = Y +VLAN offload = Y +QinQ offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Timesync = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +FW version = Y +Module EEPROM dump = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/ixgbe.ini b/src/spdk/dpdk/doc/guides/nics/features/ixgbe.ini new file mode 100644 index 000000000..f817c93b8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/ixgbe.ini @@ -0,0 +1,58 @@ +; +; Supported features of the 'ixgbe' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +LRO = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VMDq = Y +SR-IOV = Y +DCB = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +Rate limitation = Y +Traffic mirroring = Y +Inline crypto = Y +CRC offload = P +VLAN offload = P +QinQ offload = P +L3 checksum offload = P +L4 checksum offload = P +MACsec offload = P +Inner L3 checksum = P +Inner L4 checksum = P +Packet type parsing = Y +Timesync = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +EEPROM dump = Y +Module EEPROM dump = Y +Registers dump = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/ixgbe_vf.ini b/src/spdk/dpdk/doc/guides/nics/features/ixgbe_vf.ini new file mode 100644 index 000000000..b75d7089a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/ixgbe_vf.ini @@ -0,0 +1,41 @@ +; +; Supported features of the 'ixgbe_vf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Rx interrupt = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +LRO = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +Inline crypto = Y +CRC offload = P +VLAN offload = P +QinQ offload = P +L3 checksum offload = P +L4 checksum offload = P +Inner L3 checksum = P +Inner L4 checksum = P +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Registers dump = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/liquidio.ini b/src/spdk/dpdk/doc/guides/nics/features/liquidio.ini new file mode 100644 index 000000000..f628b764b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/liquidio.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'LiquidIO' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +CRC offload = Y +VLAN offload = P +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Basic stats = Y +Extended stats = Y +Multiprocess aware = Y +Linux UIO = Y +Linux VFIO = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/memif.ini b/src/spdk/dpdk/doc/guides/nics/features/memif.ini new file mode 100644 index 000000000..807d9ecdc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/memif.ini @@ -0,0 +1,14 @@ +; +; Supported features of the 'memif' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Basic stats = Y +Jumbo frame = Y +ARMv8 = Y +Power8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/mlx4.ini b/src/spdk/dpdk/doc/guides/nics/features/mlx4.ini new file mode 100644 index 000000000..9b70089df --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/mlx4.ini @@ -0,0 +1,40 @@ +; +; Supported features of the 'mlx4' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Removal event = Y +Rx interrupt = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +SR-IOV = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Basic stats = Y +Stats per queue = Y +FW version = Y +Multiprocess aware = Y +Other kdrv = Y +Power8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/mlx5.ini b/src/spdk/dpdk/doc/guides/nics/features/mlx5.ini new file mode 100644 index 000000000..54ec95db7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/mlx5.ini @@ -0,0 +1,52 @@ +; +; Supported features of the 'mlx5' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Removal event = Y +Rx interrupt = Y +Queue start/stop = Y +Burst mode info = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +LRO = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Inner RSS = Y +SR-IOV = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Timestamp offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +Module EEPROM dump = Y +Multiprocess aware = Y +Other kdrv = Y +ARMv8 = Y +Power8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/mvneta.ini b/src/spdk/dpdk/doc/guides/nics/features/mvneta.ini new file mode 100644 index 000000000..701eb03d8 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/mvneta.ini @@ -0,0 +1,19 @@ +; +; Supported features of the 'mvneta' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +MTU update = Y +Jumbo frame = Y +Promiscuous mode = Y +Unicast MAC filter = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/mvpp2.ini b/src/spdk/dpdk/doc/guides/nics/features/mvpp2.ini new file mode 100644 index 000000000..ef47546d1 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/mvpp2.ini @@ -0,0 +1,25 @@ +; +; Supported features of the 'mvpp2' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +MTU update = Y +Jumbo frame = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +Flow control = Y +VLAN filter = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/netvsc.ini b/src/spdk/dpdk/doc/guides/nics/features/netvsc.ini new file mode 100644 index 000000000..f5dc1e784 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/netvsc.ini @@ -0,0 +1,24 @@ +; +; Supported features of the 'netvsc' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Free Tx mbuf on demand = Y +Queue start/stop = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +Basic stats = Y +Stats per queue = Y +Extended stats = Y +Multiprocess aware = Y +Other kdrv = Y +ARMv7 = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y +MTU update = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/nfb.ini b/src/spdk/dpdk/doc/guides/nics/features/nfb.ini new file mode 100644 index 000000000..6174d65cc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/nfb.ini @@ -0,0 +1,17 @@ +; +; Supported features of the 'nfb' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Queue start/stop = Y +Promiscuous mode = Y +Allmulticast mode = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Other kdrv = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/nfp.ini b/src/spdk/dpdk/doc/guides/nics/features/nfp.ini new file mode 100644 index 000000000..70297b090 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/nfp.ini @@ -0,0 +1,29 @@ +; +; Supported features of the 'nfp' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Promiscuous mode = Y +TSO = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Flow control = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Basic stats = Y +Stats per queue = Y +Linux UIO = Y +Linux VFIO = Y +Multiprocess aware = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/octeontx.ini b/src/spdk/dpdk/doc/guides/nics/features/octeontx.ini new file mode 100644 index 000000000..8a95c216c --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/octeontx.ini @@ -0,0 +1,29 @@ +; +; Supported features of the 'octeontx' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Lock-free Tx queue = Y +Queue start/stop = P +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +Promiscuous mode = Y +Unicast MAC filter = Y +VLAN filter = Y +VLAN offload = P +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Flow control = Y +Basic stats = Y +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/octeontx2.ini b/src/spdk/dpdk/doc/guides/nics/features/octeontx2.ini new file mode 100644 index 000000000..fb1351782 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/octeontx2.ini @@ -0,0 +1,56 @@ +; +; Supported features of the 'octeontx2' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Rx interrupt = Y +Lock-free Tx queue = Y +SR-IOV = Y +Multiprocess aware = Y +Link status = Y +Link status event = Y +Runtime Rx queue setup = Y +Runtime Tx queue setup = Y +Burst mode info = Y +Fast mbuf free = Y +Free Tx mbuf on demand = Y +Queue start/stop = Y +MTU update = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Inner RSS = Y +Inline protocol = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +Rate limitation = Y +Jumbo frame = Y +Scattered Rx = Y +VLAN offload = Y +QinQ offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Timesync = Y +Timestamp offload = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Stats per queue = Y +Extended stats = Y +FW version = Y +Module EEPROM dump = Y +Registers dump = Y +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/octeontx2_vec.ini b/src/spdk/dpdk/doc/guides/nics/features/octeontx2_vec.ini new file mode 100644 index 000000000..66e233aba --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/octeontx2_vec.ini @@ -0,0 +1,50 @@ +; +; Supported features of the 'octeontx2_vec' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Lock-free Tx queue = Y +SR-IOV = Y +Multiprocess aware = Y +Link status = Y +Link status event = Y +Runtime Rx queue setup = Y +Runtime Tx queue setup = Y +Burst mode info = Y +Fast mbuf free = Y +Free Tx mbuf on demand = Y +Queue start/stop = Y +MTU update = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Inner RSS = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +Rate limitation = Y +Jumbo frame = Y +VLAN offload = Y +QinQ offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +Module EEPROM dump = Y +Registers dump = Y +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/octeontx2_vf.ini b/src/spdk/dpdk/doc/guides/nics/features/octeontx2_vf.ini new file mode 100644 index 000000000..c04c0e906 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/octeontx2_vf.ini @@ -0,0 +1,47 @@ +; +; Supported features of the 'octeontx2_vf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Lock-free Tx queue = Y +Multiprocess aware = Y +Rx interrupt = Y +Link status = Y +Link status event = Y +Runtime Rx queue setup = Y +Runtime Tx queue setup = Y +Burst mode info = Y +Fast mbuf free = Y +Free Tx mbuf on demand = Y +Queue start/stop = Y +TSO = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +Inner RSS = Y +Inline protocol = Y +VLAN filter = Y +Flow API = Y +Rate limitation = Y +Jumbo frame = Y +Scattered Rx = Y +VLAN offload = Y +QinQ offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +FW version = Y +Module EEPROM dump = Y +Registers dump = Y +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/pcap.ini b/src/spdk/dpdk/doc/guides/nics/features/pcap.ini new file mode 100644 index 000000000..28e648807 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/pcap.ini @@ -0,0 +1,15 @@ +; +; Supported features of the 'pcap' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Jumbo frame = Y +Basic stats = Y +Multiprocess aware = Y +ARMv7 = Y +ARMv8 = Y +Power8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/pfe.ini b/src/spdk/dpdk/doc/guides/nics/features/pfe.ini new file mode 100644 index 000000000..5b6a367d7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/pfe.ini @@ -0,0 +1,17 @@ +; +; Supported features of the 'pfe' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +MTU update = Y +Promiscuous mode = Y +Allmulticast mode = Y +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/qede.ini b/src/spdk/dpdk/doc/guides/nics/features/qede.ini new file mode 100644 index 000000000..20c90e626 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/qede.ini @@ -0,0 +1,40 @@ +; +; Supported features of the 'qede' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +LRO = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +VLAN filter = Y +Flow control = Y +Flow API = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Multiprocess aware = Y +Linux UIO = Y +Linux VFIO = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/qede_vf.ini b/src/spdk/dpdk/doc/guides/nics/features/qede_vf.ini new file mode 100644 index 000000000..e796b3131 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/qede_vf.ini @@ -0,0 +1,38 @@ +; +; Supported features of the 'qede_vf' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +SR-IOV = Y +VLAN filter = Y +Flow control = Y +CRC offload = Y +VLAN offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Multiprocess aware = Y +Linux UIO = Y +Linux VFIO = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y +LRO = Y +TSO = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/sfc_efx.ini b/src/spdk/dpdk/doc/guides/nics/features/sfc_efx.ini new file mode 100644 index 000000000..eca14270e --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/sfc_efx.ini @@ -0,0 +1,43 @@ +; +; Supported features of the 'sfc_efx' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Rx interrupt = Y +Fast mbuf free = Y +Queue start/stop = Y +Runtime Rx queue setup = Y +Runtime Tx queue setup = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Multicast MAC filter = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +SR-IOV = Y +Flow control = Y +Flow API = Y +VLAN offload = P +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +Basic stats = Y +Extended stats = Y +FW version = Y +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/szedata2.ini b/src/spdk/dpdk/doc/guides/nics/features/szedata2.ini new file mode 100644 index 000000000..a0e6f6e87 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/szedata2.ini @@ -0,0 +1,18 @@ +; +; Supported features of the 'szedata2' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Queue start/stop = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Other kdrv = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/tap.ini b/src/spdk/dpdk/doc/guides/nics/features/tap.ini new file mode 100644 index 000000000..519cad924 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/tap.ini @@ -0,0 +1,28 @@ +; +; Supported features of the 'tap' driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Rx interrupt = Y +Promiscuous mode = Y +Allmulticast mode = Y +Basic stats = Y +Flow API = Y +L3 checksum offload = Y +L4 checksum offload = Y +MTU update = Y +Multicast MAC filter = Y +Unicast MAC filter = Y +Packet type parsing = Y +Flow control = Y +Other kdrv = Y +ARMv7 = Y +ARMv8 = Y +Power8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/thunderx.ini b/src/spdk/dpdk/doc/guides/nics/features/thunderx.ini new file mode 100644 index 000000000..626858971 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/thunderx.ini @@ -0,0 +1,31 @@ +; +; Supported features of the 'thunderx' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = Y +Link status = Y +Link status event = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +Promiscuous mode = Y +Allmulticast mode = Y +RSS hash = Y +RSS key update = Y +RSS reta update = Y +SR-IOV = Y +CRC offload = Y +VLAN offload = P +L3 checksum offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Stats per queue = Y +Registers dump = Y +Multiprocess aware = Y +Linux VFIO = Y +ARMv8 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/vhost.ini b/src/spdk/dpdk/doc/guides/nics/features/vhost.ini new file mode 100644 index 000000000..ef81abb43 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/vhost.ini @@ -0,0 +1,13 @@ +; +; Supported features of the 'vhost' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Free Tx mbuf on demand = Y +Queue status event = Y +Basic stats = Y +Extended stats = Y +x86-32 = Y +x86-64 = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/virtio.ini b/src/spdk/dpdk/doc/guides/nics/features/virtio.ini new file mode 100644 index 000000000..bbf973031 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/virtio.ini @@ -0,0 +1,30 @@ +; +; Supported features of the 'virtio' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Rx interrupt = Y +Queue start/stop = Y +Scattered Rx = P +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +VLAN filter = Y +Basic stats = Y +Stats per queue = Y +Extended stats = P +Multiprocess aware = Y +BSD nic_uio = Y +Linux UIO = Y +Linux VFIO = Y +ARMv7 = Y +ARMv8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y +MTU update = Y diff --git a/src/spdk/dpdk/doc/guides/nics/features/vmxnet3.ini b/src/spdk/dpdk/doc/guides/nics/features/vmxnet3.ini new file mode 100644 index 000000000..9a1151380 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/features/vmxnet3.ini @@ -0,0 +1,30 @@ +; +; Supported features of the 'vmxnet3' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Speed capabilities = P +Link status = Y +Link status event = Y +Queue start/stop = Y +MTU update = Y +Jumbo frame = Y +LRO = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +RSS hash = Y +VLAN filter = Y +VLAN offload = Y +L4 checksum offload = Y +Packet type parsing = Y +Basic stats = Y +Extended stats = Y +Stats per queue = Y +Linux UIO = Y +Linux VFIO = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/src/spdk/dpdk/doc/guides/nics/fm10k.rst b/src/spdk/dpdk/doc/guides/nics/fm10k.rst new file mode 100644 index 000000000..4e178c2cc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/fm10k.rst @@ -0,0 +1,174 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2015-2016 Intel Corporation. + +FM10K Poll Mode Driver +====================== + +The FM10K poll mode driver library provides support for the Intel FM10000 +(FM10K) family of 40GbE/100GbE adapters. + +FTAG Based Forwarding of FM10K +------------------------------ + +FTAG Based Forwarding is a unique feature of FM10K. The FM10K family of NICs +support the addition of a Fabric Tag (FTAG) to carry special information. +The FTAG is placed at the beginning of the frame, it contains information +such as where the packet comes from and goes, and the vlan tag. In FTAG based +forwarding mode, the switch logic forwards packets according to glort (global +resource tag) information, rather than the mac and vlan table. Currently this +feature works only on PF. + +To enable this feature, the user should pass a devargs parameter to the eal +like "-w 84:00.0,enable_ftag=1", and the application should make sure an +appropriate FTAG is inserted for every frame on TX side. + +Vector PMD for FM10K +-------------------- + +Vector PMD (vPMD) uses Intel® SIMD instructions to optimize packet I/O. +It improves load/store bandwidth efficiency of L1 data cache by using a wider +SSE/AVX ''register (1)''. +The wider register gives space to hold multiple packet buffers so as to save +on the number of instructions when bulk processing packets. + +There is no change to the PMD API. The RX/TX handlers are the only two entries for +vPMD packet I/O. They are transparently registered at runtime RX/TX execution +if all required conditions are met. + +1. To date, only an SSE version of FM10K vPMD is available. + To ensure that vPMD is in the binary code, set + ``CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y`` in the configure file. + +Some constraints apply as pre-conditions for specific optimizations on bulk +packet transfers. The following sections explain RX and TX constraints in the +vPMD. + + +RX Constraints +~~~~~~~~~~~~~~ + + +Prerequisites and Pre-conditions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For Vector RX it is assumed that the number of descriptor rings will be a power +of 2. With this pre-condition, the ring pointer can easily scroll back to the +head after hitting the tail without a conditional check. In addition Vector RX +can use this assumption to do a bit mask using ``ring_size - 1``. + + +Features not Supported by Vector RX PMD +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some features are not supported when trying to increase the throughput in +vPMD. They are: + +* IEEE1588 + +* Flow director + +* Header split + +* RX checksum offload + +Other features are supported using optional MACRO configuration. They include: + +* HW VLAN strip + +* L3/L4 packet type + +To enable via ``RX_OLFLAGS`` use ``RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE=y``. + +To guarantee the constraint, the following capabilities in ``dev_conf.rxmode.offloads`` +will be checked: + +* ``DEV_RX_OFFLOAD_VLAN_EXTEND`` + +* ``DEV_RX_OFFLOAD_CHECKSUM`` + +* ``DEV_RX_OFFLOAD_HEADER_SPLIT`` + +* ``fdir_conf->mode`` + + +RX Burst Size +^^^^^^^^^^^^^ + +As vPMD is focused on high throughput, it processes 4 packets at a time. So it assumes +that the RX burst should be greater than 4 packets per burst. It returns zero if using +``nb_pkt`` < 4 in the receive handler. If ``nb_pkt`` is not a multiple of 4, a +floor alignment will be applied. + + +TX Constraint +~~~~~~~~~~~~~ + +Features not Supported by TX Vector PMD +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +TX vPMD only works when offloads is set to 0 + +This means that it does not support any TX offload. + +Limitations +----------- + + +Switch manager +~~~~~~~~~~~~~~ + +The Intel FM10000 family of NICs integrate a hardware switch and multiple host +interfaces. The FM10000 PMD driver only manages host interfaces. For the +switch component another switch driver has to be loaded prior to the +FM10000 PMD driver. The switch driver can be acquired from Intel support. +Only Testpoint is validated with DPDK, the latest version that has been +validated with DPDK is 4.1.6. + +Support for Switch Restart +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For FM10000 multi host based design a DPDK app running in the VM or host needs +to be aware of the switch's state since it may undergo a quit-restart. When +the switch goes down the DPDK app will receive a LSC event indicating link +status down, and the app should stop the worker threads that are polling on +the Rx/Tx queues. When switch comes up, a LSC event indicating ``LINK_UP`` is +sent to the app, which can then restart the FM10000 port to resume network +processing. + +CRC stripping +~~~~~~~~~~~~~ + +The FM10000 family of NICs strip the CRC for every packets coming into the +host interface. So, keeping CRC is not supported. + +Maximum packet length +~~~~~~~~~~~~~~~~~~~~~ + +The FM10000 family of NICS support a maximum of a 15K jumbo frame. The value +is fixed and cannot be changed. So, even when the ``rxmode.max_rx_pkt_len`` +member of ``struct rte_eth_conf`` is set to a value lower than 15364, frames +up to 15364 bytes can still reach the host interface. + +Statistic Polling Frequency +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The FM10000 NICs expose a set of statistics via the PCI BARs. These statistics +are read from the hardware registers when ``rte_eth_stats_get()`` or +``rte_eth_xstats_get()`` is called. The packet counting registers are 32 bits +while the byte counting registers are 48 bits. As a result, the statistics must +be polled regularly in order to ensure the consistency of the returned reads. + +Given the PCIe Gen3 x8, about 50Gbps of traffic can occur. With 64 byte packets +this gives almost 100 million packets/second, causing 32 bit integer overflow +after approx 40 seconds. To ensure these overflows are detected and accounted +for in the statistics, it is necessary to read statistic regularly. It is +suggested to read stats every 20 seconds, which will ensure the statistics +are accurate. + + +Interrupt mode +~~~~~~~~~~~~~~ + +The FM10000 family of NICS need one separate interrupt for mailbox. So only +drivers which support multiple interrupt vectors e.g. vfio-pci can work +for fm10k interrupt mode. diff --git a/src/spdk/dpdk/doc/guides/nics/hinic.rst b/src/spdk/dpdk/doc/guides/nics/hinic.rst new file mode 100644 index 000000000..f76506064 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/hinic.rst @@ -0,0 +1,68 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Huawei Technologies Co., Ltd + + +HINIC Poll Mode Driver +====================== + +The hinic PMD (librte_pmd_hinic) provides poll mode driver support +for 25Gbps Huawei Intelligent PCIE Network Adapters based on the +Huawei Ethernet Controller Hi1822. + + +Features +-------- + +- Multi arch support: x86_64, ARMv8. +- Multiple queues for TX and RX +- Receiver Side Scaling (RSS) +- MAC/VLAN filtering +- Checksum offload +- TSO offload +- Promiscuous mode +- Port hardware statistics +- Link state information +- Link flow control +- Scattered and gather for TX and RX +- SR-IOV - Partially supported at this point, VFIO only +- VLAN filter and VLAN offload +- Allmulticast mode +- MTU update +- Unicast MAC filter +- Multicast MAC filter +- Flow API +- Set Link down or up +- FW version +- LRO + +Prerequisites +------------- + +- Learning about Huawei Hi1822 IN200 Series Intelligent NICs using + ``_. + +- Getting the latest product documents and software supports using + ``_. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_HINIC_PMD`` (default ``y``) + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Limitations or Known issues +--------------------------- +Build with ICC is not supported yet. +X86-32, Power8, ARMv7 and BSD are not supported yet. diff --git a/src/spdk/dpdk/doc/guides/nics/hns3.rst b/src/spdk/dpdk/doc/guides/nics/hns3.rst new file mode 100644 index 000000000..05dbe4174 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/hns3.rst @@ -0,0 +1,62 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018-2019 Hisilicon Limited. + +HNS3 Poll Mode Driver +=============================== + +The hns3 PMD (librte_pmd_hns3) provides poll mode driver support +for the inbuilt Hisilicon Network Subsystem(HNS) network engine +found in the Hisilicon Kunpeng 920 SoC. + +Features +-------- + +Features of the HNS3 PMD are: + +- Multiple queues for TX and RX +- Receive Side Scaling (RSS) +- Packet type information +- Checksum offload +- TSO offload +- Promiscuous mode +- Multicast mode +- Port hardware statistics +- Jumbo frames +- Link state information +- Interrupt mode for RX +- VLAN stripping +- NUMA support + +Prerequisites +------------- +- Get the information about Kunpeng920 chip using + ``_. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_HNS3_PMD`` (default ``y``) + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Limitations or Known issues +--------------------------- +Currently, we only support VF device is bound to vfio_pci or +igb_uio and then driven by DPDK driver when PF is driven by +kernel mode hns3 ethdev driver, VF is not supported when PF +is driven by DPDK driver. + +Build with ICC is not supported yet. +X86-32, Power8, ARMv7 and BSD are not supported yet. diff --git a/src/spdk/dpdk/doc/guides/nics/i40e.rst b/src/spdk/dpdk/doc/guides/nics/i40e.rst new file mode 100644 index 000000000..00c3042d5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/i40e.rst @@ -0,0 +1,821 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 Intel Corporation. + +I40E Poll Mode Driver +====================== + +The i40e PMD (librte_pmd_i40e) provides poll mode driver support for +10/25/40 Gbps Intel® Ethernet 700 Series Network Adapters based on +the Intel Ethernet Controller X710/XL710/XXV710 and Intel Ethernet +Connection X722 (only support part of features). + + +Features +-------- + +Features of the i40e PMD are: + +- Multiple queues for TX and RX +- Receiver Side Scaling (RSS) +- MAC/VLAN filtering +- Packet type information +- Flow director +- Cloud filter +- Checksum offload +- VLAN/QinQ stripping and inserting +- TSO offload +- Promiscuous mode +- Multicast mode +- Port hardware statistics +- Jumbo frames +- Link state information +- Link flow control +- Mirror on port, VLAN and VSI +- Interrupt mode for RX +- Scattered and gather for TX and RX +- Vector Poll mode driver +- DCB +- VMDQ +- SR-IOV VF +- Hot plug +- IEEE1588/802.1AS timestamping +- VF Daemon (VFD) - EXPERIMENTAL +- Dynamic Device Personalization (DDP) +- Queue region configuration +- Virtual Function Port Representors +- Malicious Device Drive event catch and notify +- Generic flow API + +Prerequisites +------------- + +- Identifying your adapter using `Intel Support + `_ and get the latest NVM/FW images. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +- To get better performance on Intel platforms, please follow the "How to get best performance with NICs on Intel platforms" + section of the :ref:`Getting Started Guide for Linux `. + +- Upgrade the NVM/FW version following the `Intel® Ethernet NVM Update Tool Quick Usage Guide for Linux + `_ and `Intel® Ethernet NVM Update Tool: Quick Usage Guide for EFI `_ if needed. + +- For information about supported media, please refer to this document: `Intel® Ethernet Controller X710/XXV710/XL710 Feature Support Matrix + `_. + + .. Note:: + + * Some adapters based on the Intel(R) Ethernet Controller 700 Series only + support Intel Ethernet Optics modules. On these adapters, other modules are not + supported and will not function. + + * For connections based on Intel(R) Ethernet Controller 700 Series, + support is dependent on your system board. Please see your vendor for details. + + * In all cases Intel recommends using Intel Ethernet Optics; other modules + may function but are not validated by Intel. Contact Intel for supported media types. + +Recommended Matching List +------------------------- + +It is highly recommended to upgrade the i40e kernel driver and firmware to +avoid the compatibility issues with i40e PMD. Here is the suggested matching +list which has been tested and verified. The detailed information can refer +to chapter Tested Platforms/Tested NICs in release notes. + +For X710/XL710/XXV710, + + +--------------+-----------------------+------------------+ + | DPDK version | Kernel driver version | Firmware version | + +==============+=======================+==================+ + | 20.05 | 2.11.27 | 7.30 | + +--------------+-----------------------+------------------+ + | 20.02 | 2.10.19 | 7.20 | + +--------------+-----------------------+------------------+ + | 19.11 | 2.9.21 | 7.00 | + +--------------+-----------------------+------------------+ + | 19.08 | 2.8.43 | 7.00 | + +--------------+-----------------------+------------------+ + | 19.05 | 2.7.29 | 6.80 | + +--------------+-----------------------+------------------+ + | 19.02 | 2.7.26 | 6.80 | + +--------------+-----------------------+------------------+ + | 18.11 | 2.4.6 | 6.01 | + +--------------+-----------------------+------------------+ + | 18.08 | 2.4.6 | 6.01 | + +--------------+-----------------------+------------------+ + | 18.05 | 2.4.6 | 6.01 | + +--------------+-----------------------+------------------+ + | 18.02 | 2.4.3 | 6.01 | + +--------------+-----------------------+------------------+ + | 17.11 | 2.1.26 | 6.01 | + +--------------+-----------------------+------------------+ + | 17.08 | 2.0.19 | 6.01 | + +--------------+-----------------------+------------------+ + | 17.05 | 1.5.23 | 5.05 | + +--------------+-----------------------+------------------+ + | 17.02 | 1.5.23 | 5.05 | + +--------------+-----------------------+------------------+ + | 16.11 | 1.5.23 | 5.05 | + +--------------+-----------------------+------------------+ + | 16.07 | 1.4.25 | 5.04 | + +--------------+-----------------------+------------------+ + | 16.04 | 1.4.25 | 5.02 | + +--------------+-----------------------+------------------+ + + +For X722, + + +--------------+-----------------------+------------------+ + | DPDK version | Kernel driver version | Firmware version | + +==============+=======================+==================+ + | 20.05 | 2.11.27 | 4.11 | + +--------------+-----------------------+------------------+ + | 20.02 | 2.10.19 | 4.11 | + +--------------+-----------------------+------------------+ + | 19.11 | 2.9.21 | 4.10 | + +--------------+-----------------------+------------------+ + | 19.08 | 2.9.21 | 4.10 | + +--------------+-----------------------+------------------+ + | 19.05 | 2.7.29 | 3.33 | + +--------------+-----------------------+------------------+ + | 19.02 | 2.7.26 | 3.33 | + +--------------+-----------------------+------------------+ + | 18.11 | 2.4.6 | 3.33 | + +--------------+-----------------------+------------------+ + + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_I40E_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_i40e`` driver. + +- ``CONFIG_RTE_LIBRTE_I40E_DEBUG_*`` (default ``n``) + + Toggle display of generic debugging messages. + +- ``CONFIG_RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC`` (default ``y``) + + Toggle bulk allocation for RX. + +- ``CONFIG_RTE_LIBRTE_I40E_INC_VECTOR`` (default ``n``) + + Toggle the use of Vector PMD instead of normal RX/TX path. + To enable vPMD for RX, bulk allocation for Rx must be allowed. + +- ``CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC`` (default ``n``) + + Toggle to use a 16-byte RX descriptor, by default the RX descriptor is 32 byte. + +- ``CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF`` (default ``64``) + + Number of queues reserved for PF. + +- ``CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM`` (default ``4``) + + Number of queues reserved for each VMDQ Pool. + +Runtime Config Options +~~~~~~~~~~~~~~~~~~~~~~ + +- ``Reserved number of Queues per VF`` (default ``4``) + + The number of reserved queue per VF is determined by its host PF. If the + PCI address of an i40e PF is aaaa:bb.cc, the number of reserved queues per + VF can be configured with EAL parameter like -w aaaa:bb.cc,queue-num-per-vf=n. + The value n can be 1, 2, 4, 8 or 16. If no such parameter is configured, the + number of reserved queues per VF is 4 by default. If VF request more than + reserved queues per VF, PF will able to allocate max to 16 queues after a VF + reset. + + +- ``Support multiple driver`` (default ``disable``) + + There was a multiple driver support issue during use of 700 series Ethernet + Adapter with both Linux kernel and DPDK PMD. To fix this issue, ``devargs`` + parameter ``support-multi-driver`` is introduced, for example:: + + -w 84:00.0,support-multi-driver=1 + + With the above configuration, DPDK PMD will not change global registers, and + will switch PF interrupt from IntN to Int0 to avoid interrupt conflict between + DPDK and Linux Kernel. + +- ``Support VF Port Representor`` (default ``not enabled``) + + The i40e PF PMD supports the creation of VF port representors for the control + and monitoring of i40e virtual function devices. Each port representor + corresponds to a single virtual function of that device. Using the ``devargs`` + option ``representor`` the user can specify which virtual functions to create + port representors for on initialization of the PF PMD by passing the VF IDs of + the VFs which are required.:: + + -w DBDF,representor=[0,1,4] + + Currently hot-plugging of representor ports is not supported so all required + representors must be specified on the creation of the PF. + +- ``Use latest supported vector`` (default ``disable``) + + Latest supported vector path may not always get the best perf so vector path was + recommended to use only on later platform. But users may want the latest vector path + since it can get better perf in some real work loading cases. So ``devargs`` param + ``use-latest-supported-vec`` is introduced, for example:: + + -w 84:00.0,use-latest-supported-vec=1 + +- ``Enable validation for VF message`` (default ``not enabled``) + + The PF counts messages from each VF. If in any period of seconds the message + statistic from a VF exceeds maximal limitation, the PF will ignore any new message + from that VF for some seconds. + Format -- "maximal-message@period-seconds:ignore-seconds" + For example:: + + -w 84:00.0,vf_msg_cfg=80@120:180 + +Vector RX Pre-conditions +~~~~~~~~~~~~~~~~~~~~~~~~ +For Vector RX it is assumed that the number of descriptor rings will be a power +of 2. With this pre-condition, the ring pointer can easily scroll back to the +head after hitting the tail without a conditional check. In addition Vector RX +can use this assumption to do a bit mask using ``ring_size - 1``. + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + + +SR-IOV: Prerequisites and sample Application Notes +-------------------------------------------------- + +#. Load the kernel module: + + .. code-block:: console + + modprobe i40e + + Check the output in dmesg: + + .. code-block:: console + + i40e 0000:83:00.1 ens802f0: renamed from eth0 + +#. Bring up the PF ports: + + .. code-block:: console + + ifconfig ens802f0 up + +#. Create VF device(s): + + Echo the number of VFs to be created into the ``sriov_numvfs`` sysfs entry + of the parent PF. + + Example: + + .. code-block:: console + + echo 2 > /sys/devices/pci0000:00/0000:00:03.0/0000:81:00.0/sriov_numvfs + + +#. Assign VF MAC address: + + Assign MAC address to the VF using iproute2 utility. The syntax is: + + .. code-block:: console + + ip link set vf mac + + Example: + + .. code-block:: console + + ip link set ens802f0 vf 0 mac a0:b0:c0:d0:e0:f0 + +#. Assign VF to VM, and bring up the VM. + Please see the documentation for the *I40E/IXGBE/IGB Virtual Function Driver*. + +#. Running testpmd: + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to run testpmd. + + Example output: + + .. code-block:: console + + ... + EAL: PCI device 0000:83:00.0 on NUMA socket 1 + EAL: probe driver: 8086:1572 rte_i40e_pmd + EAL: PCI memory mapped at 0x7f7f80000000 + EAL: PCI memory mapped at 0x7f7f80800000 + PMD: eth_i40e_dev_init(): FW 5.0 API 1.5 NVM 05.00.02 eetrack 8000208a + Interactive-mode selected + Configuring Port 0 (socket 0) + ... + + PMD: i40e_dev_rx_queue_setup(): Rx Burst Bulk Alloc Preconditions are + satisfied.Rx Burst Bulk Alloc function will be used on port=0, queue=0. + + ... + Port 0: 68:05:CA:26:85:84 + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Done + + testpmd> + + +Sample Application Notes +------------------------ + +Vlan filter +~~~~~~~~~~~ + +Vlan filter only works when Promiscuous mode is off. + +To start ``testpmd``, and add vlan 10 to port 0: + +.. code-block:: console + + ./app/testpmd -l 0-15 -n 4 -- -i --forward-mode=mac + ... + + testpmd> set promisc 0 off + testpmd> rx_vlan add 10 0 + + +Flow Director +~~~~~~~~~~~~~ + +The Flow Director works in receive mode to identify specific flows or sets of flows and route them to specific queues. +The Flow Director filters can match the different fields for different type of packet: flow type, specific input set per flow type and the flexible payload. + +The default input set of each flow type is:: + + ipv4-other : src_ip_address, dst_ip_address + ipv4-frag : src_ip_address, dst_ip_address + ipv4-tcp : src_ip_address, dst_ip_address, src_port, dst_port + ipv4-udp : src_ip_address, dst_ip_address, src_port, dst_port + ipv4-sctp : src_ip_address, dst_ip_address, src_port, dst_port, + verification_tag + ipv6-other : src_ip_address, dst_ip_address + ipv6-frag : src_ip_address, dst_ip_address + ipv6-tcp : src_ip_address, dst_ip_address, src_port, dst_port + ipv6-udp : src_ip_address, dst_ip_address, src_port, dst_port + ipv6-sctp : src_ip_address, dst_ip_address, src_port, dst_port, + verification_tag + l2_payload : ether_type + +The flex payload is selected from offset 0 to 15 of packet's payload by default, while it is masked out from matching. + +Start ``testpmd`` with ``--disable-rss`` and ``--pkt-filter-mode=perfect``: + +.. code-block:: console + + ./app/testpmd -l 0-15 -n 4 -- -i --disable-rss --pkt-filter-mode=perfect \ + --rxq=8 --txq=8 --nb-cores=8 --nb-ports=1 + +Add a rule to direct ``ipv4-udp`` packet whose ``dst_ip=2.2.2.5, src_ip=2.2.2.3, src_port=32, dst_port=32`` to queue 1: + +.. code-block:: console + + testpmd> flow_director_filter 0 mode IP add flow ipv4-udp \ + src 2.2.2.3 32 dst 2.2.2.5 32 vlan 0 flexbytes () \ + fwd pf queue 1 fd_id 1 + +Check the flow director status: + +.. code-block:: console + + testpmd> show port fdir 0 + + ######################## FDIR infos for port 0 #################### + MODE: PERFECT + SUPPORTED FLOW TYPE: ipv4-frag ipv4-tcp ipv4-udp ipv4-sctp ipv4-other + ipv6-frag ipv6-tcp ipv6-udp ipv6-sctp ipv6-other + l2_payload + FLEX PAYLOAD INFO: + max_len: 16 payload_limit: 480 + payload_unit: 2 payload_seg: 3 + bitmask_unit: 2 bitmask_num: 2 + MASK: + vlan_tci: 0x0000, + src_ipv4: 0x00000000, + dst_ipv4: 0x00000000, + src_port: 0x0000, + dst_port: 0x0000 + src_ipv6: 0x00000000,0x00000000,0x00000000,0x00000000, + dst_ipv6: 0x00000000,0x00000000,0x00000000,0x00000000 + FLEX PAYLOAD SRC OFFSET: + L2_PAYLOAD: 0 1 2 3 4 5 6 ... + L3_PAYLOAD: 0 1 2 3 4 5 6 ... + L4_PAYLOAD: 0 1 2 3 4 5 6 ... + FLEX MASK CFG: + ipv4-udp: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv4-tcp: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv4-sctp: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv4-other: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv4-frag: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv6-udp: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv6-tcp: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv6-sctp: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv6-other: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ipv6-frag: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + l2_payload: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + guarant_count: 1 best_count: 0 + guarant_space: 512 best_space: 7168 + collision: 0 free: 0 + maxhash: 0 maxlen: 0 + add: 0 remove: 0 + f_add: 0 f_remove: 0 + + +Delete all flow director rules on a port: + +.. code-block:: console + + testpmd> flush_flow_director 0 + +Floating VEB +~~~~~~~~~~~~~ + +The Intel® Ethernet 700 Series support a feature called +"Floating VEB". + +A Virtual Ethernet Bridge (VEB) is an IEEE Edge Virtual Bridging (EVB) term +for functionality that allows local switching between virtual endpoints within +a physical endpoint and also with an external bridge/network. + +A "Floating" VEB doesn't have an uplink connection to the outside world so all +switching is done internally and remains within the host. As such, this +feature provides security benefits. + +In addition, a Floating VEB overcomes a limitation of normal VEBs where they +cannot forward packets when the physical link is down. Floating VEBs don't need +to connect to the NIC port so they can still forward traffic from VF to VF +even when the physical link is down. + +Therefore, with this feature enabled VFs can be limited to communicating with +each other but not an outside network, and they can do so even when there is +no physical uplink on the associated NIC port. + +To enable this feature, the user should pass a ``devargs`` parameter to the +EAL, for example:: + + -w 84:00.0,enable_floating_veb=1 + +In this configuration the PMD will use the floating VEB feature for all the +VFs created by this PF device. + +Alternatively, the user can specify which VFs need to connect to this floating +VEB using the ``floating_veb_list`` argument:: + + -w 84:00.0,enable_floating_veb=1,floating_veb_list=1;3-4 + +In this example ``VF1``, ``VF3`` and ``VF4`` connect to the floating VEB, +while other VFs connect to the normal VEB. + +The current implementation only supports one floating VEB and one regular +VEB. VFs can connect to a floating VEB or a regular VEB according to the +configuration passed on the EAL command line. + +The floating VEB functionality requires a NIC firmware version of 5.0 +or greater. + +Dynamic Device Personalization (DDP) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Intel® Ethernet 700 Series except for the Intel Ethernet Connection +X722 support a feature called "Dynamic Device Personalization (DDP)", +which is used to configure hardware by downloading a profile to support +protocols/filters which are not supported by default. The DDP +functionality requires a NIC firmware version of 6.0 or greater. + +Current implementation supports GTP-C/GTP-U/PPPoE/PPPoL2TP/ESP, +steering can be used with rte_flow API. + +GTPv1 package is released, and it can be downloaded from +https://downloadcenter.intel.com/download/27587. + +PPPoE package is released, and it can be downloaded from +https://downloadcenter.intel.com/download/28040. + +ESP-AH package is released, and it can be downloaded from +https://downloadcenter.intel.com/download/29446. + +Load a profile which supports GTP and store backup profile: + +.. code-block:: console + + testpmd> ddp add 0 ./gtp.pkgo,./backup.pkgo + +Delete a GTP profile and restore backup profile: + +.. code-block:: console + + testpmd> ddp del 0 ./backup.pkgo + +Get loaded DDP package info list: + +.. code-block:: console + + testpmd> ddp get list 0 + +Display information about a GTP profile: + +.. code-block:: console + + testpmd> ddp get info ./gtp.pkgo + +Input set configuration +~~~~~~~~~~~~~~~~~~~~~~~ +Input set for any PCTYPE can be configured with user defined configuration, +For example, to use only 48bit prefix for IPv6 src address for IPv6 TCP RSS: + +.. code-block:: console + + testpmd> port config 0 pctype 43 hash_inset clear all + testpmd> port config 0 pctype 43 hash_inset set field 13 + testpmd> port config 0 pctype 43 hash_inset set field 14 + testpmd> port config 0 pctype 43 hash_inset set field 15 + +Queue region configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The Intel® Ethernet 700 Series supports a feature of queue regions +configuration for RSS in the PF, so that different traffic classes or +different packet classification types can be separated to different +queues in different queue regions. There is an API for configuration +of queue regions in RSS with a command line. It can parse the parameters +of the region index, queue number, queue start index, user priority, traffic +classes and so on. Depending on commands from the command line, it will call +i40e private APIs and start the process of setting or flushing the queue +region configuration. As this feature is specific for i40e only private +APIs are used. These new ``test_pmd`` commands are as shown below. For +details please refer to :doc:`../testpmd_app_ug/index`. + +.. code-block:: console + + testpmd> set port (port_id) queue-region region_id (value) \ + queue_start_index (value) queue_num (value) + testpmd> set port (port_id) queue-region region_id (value) flowtype (value) + testpmd> set port (port_id) queue-region UP (value) region_id (value) + testpmd> set port (port_id) queue-region flush (on|off) + testpmd> show port (port_id) queue-region + +Generic flow API +~~~~~~~~~~~~~~~~~~~ + +- ``RSS Flow`` + + RSS Flow supports to set hash input set, hash function, enable hash + and configure queue region. + For example: + Configure queue region as queue 0, 1, 2, 3. + + .. code-block:: console + + testpmd> flow create 0 ingress pattern end actions rss types end \ + queues 0 1 2 3 end / end + + Enable hash and set input set for ipv4-tcp. + + .. code-block:: console + + testpmd> flow create 0 ingress pattern eth / ipv4 / tcp / end \ + actions rss types ipv4-tcp l3-src-only end queues end / end + + Set symmetric hash enable for flow type ipv4-tcp. + + .. code-block:: console + + testpmd> flow create 0 ingress pattern eth / ipv4 / tcp / end \ + actions rss types ipv4-tcp end queues end func symmetric_toeplitz / end + + Set hash function as simple xor. + + .. code-block:: console + + testpmd> flow create 0 ingress pattern end actions rss types end \ + queues end func simple_xor / end + +Limitations or Known issues +--------------------------- + +MPLS packet classification +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For firmware versions prior to 5.0, MPLS packets are not recognized by the NIC. +The L2 Payload flow type in flow director can be used to classify MPLS packet +by using a command in testpmd like: + + testpmd> flow_director_filter 0 mode IP add flow l2_payload ether \ + 0x8847 flexbytes () fwd pf queue fd_id + +With the NIC firmware version 5.0 or greater, some limited MPLS support +is added: Native MPLS (MPLS in Ethernet) skip is implemented, while no +new packet type, no classification or offload are possible. With this change, +L2 Payload flow type in flow director cannot be used to classify MPLS packet +as with previous firmware versions. Meanwhile, the Ethertype filter can be +used to classify MPLS packet by using a command in testpmd like: + + testpmd> ethertype_filter 0 add mac_ignr 00:00:00:00:00:00 ethertype \ + 0x8847 fwd queue + +16 Byte RX Descriptor setting on DPDK VF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently the VF's RX descriptor mode is decided by PF. There's no PF-VF +interface for VF to request the RX descriptor mode, also no interface to notify +VF its own RX descriptor mode. +For all available versions of the i40e driver, these drivers don't support 16 +byte RX descriptor. If the Linux i40e kernel driver is used as host driver, +while DPDK i40e PMD is used as the VF driver, DPDK cannot choose 16 byte receive +descriptor. The reason is that the RX descriptor is already set to 32 byte by +the i40e kernel driver. That is to say, user should keep +``CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n`` in config file. +In the future, if the Linux i40e driver supports 16 byte RX descriptor, user +should make sure the DPDK VF uses the same RX descriptor mode, 16 byte or 32 +byte, as the PF driver. + +The same rule for DPDK PF + DPDK VF. The PF and VF should use the same RX +descriptor mode. Or the VF RX will not work. + +Receive packets with Ethertype 0x88A8 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Due to the FW limitation, PF can receive packets with Ethertype 0x88A8 +only when floating VEB is disabled. + +Incorrect Rx statistics when packet is oversize +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When a packet is over maximum frame size, the packet is dropped. +However, the Rx statistics, when calling `rte_eth_stats_get` incorrectly +shows it as received. + +VF & TC max bandwidth setting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The per VF max bandwidth and per TC max bandwidth cannot be enabled in parallel. +The behavior is different when handling per VF and per TC max bandwidth setting. +When enabling per VF max bandwidth, SW will check if per TC max bandwidth is +enabled. If so, return failure. +When enabling per TC max bandwidth, SW will check if per VF max bandwidth +is enabled. If so, disable per VF max bandwidth and continue with per TC max +bandwidth setting. + +TC TX scheduling mode setting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are 2 TX scheduling modes for TCs, round robin and strict priority mode. +If a TC is set to strict priority mode, it can consume unlimited bandwidth. +It means if APP has set the max bandwidth for that TC, it comes to no +effect. +It's suggested to set the strict priority mode for a TC that is latency +sensitive but no consuming much bandwidth. + +VF performance is impacted by PCI extended tag setting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To reach maximum NIC performance in the VF the PCI extended tag must be +enabled. The DPDK i40e PF driver will set this feature during initialization, +but the kernel PF driver does not. So when running traffic on a VF which is +managed by the kernel PF driver, a significant NIC performance downgrade has +been observed (for 64 byte packets, there is about 25% line-rate downgrade for +a 25GbE device and about 35% for a 40GbE device). + +For kernel version >= 4.11, the kernel's PCI driver will enable the extended +tag if it detects that the device supports it. So by default, this is not an +issue. For kernels <= 4.11 or when the PCI extended tag is disabled it can be +enabled using the steps below. + +#. Get the current value of the PCI configure register:: + + setpci -s a8.w + +#. Set bit 8:: + + value = value | 0x100 + +#. Set the PCI configure register with new value:: + + setpci -s a8.w= + +Vlan strip of VF +~~~~~~~~~~~~~~~~ + +The VF vlan strip function is only supported in the i40e kernel driver >= 2.1.26. + +DCB function +~~~~~~~~~~~~ + +DCB works only when RSS is enabled. + +Global configuration warning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +I40E PMD will set some global registers to enable some function or set some +configure. Then when using different ports of the same NIC with Linux kernel +and DPDK, the port with Linux kernel will be impacted by the port with DPDK. +For example, register I40E_GL_SWT_L2TAGCTRL is used to control L2 tag, i40e +PMD uses I40E_GL_SWT_L2TAGCTRL to set vlan TPID. If setting TPID in port A +with DPDK, then the configuration will also impact port B in the NIC with +kernel driver, which don't want to use the TPID. +So PMD reports warning to clarify what is changed by writing global register. + +High Performance of Small Packets on 40GbE NIC +---------------------------------------------- + +As there might be firmware fixes for performance enhancement in latest version +of firmware image, the firmware update might be needed for getting high performance. +Check the Intel support website for the latest firmware updates. +Users should consult the release notes specific to a DPDK release to identify +the validated firmware version for a NIC using the i40e driver. + +Use 16 Bytes RX Descriptor Size +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As i40e PMD supports both 16 and 32 bytes RX descriptor sizes, and 16 bytes size can provide helps to high performance of small packets. +Configuration of ``CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC`` in config files can be changed to use 16 bytes size RX descriptors. + +Input set requirement of each pctype for FDIR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each PCTYPE can only have one specific FDIR input set at one time. +For example, if creating 2 rte_flow rules with different input set for one PCTYPE, +it will fail and return the info "Conflict with the first rule's input set", +which means the current rule's input set conflicts with the first rule's. +Remove the first rule if want to change the input set of the PCTYPE. + +Example of getting best performance with l3fwd example +------------------------------------------------------ + +The following is an example of running the DPDK ``l3fwd`` sample application to get high performance with a +server with Intel Xeon processors and Intel Ethernet CNA XL710. + +The example scenario is to get best performance with two Intel Ethernet CNA XL710 40GbE ports. +See :numref:`figure_intel_perf_test_setup` for the performance test setup. + +.. _figure_intel_perf_test_setup: + +.. figure:: img/intel_perf_test_setup.* + + Performance Test Setup + + +1. Add two Intel Ethernet CNA XL710 to the platform, and use one port per card to get best performance. + The reason for using two NICs is to overcome a PCIe v3.0 limitation since it cannot provide 80GbE bandwidth + for two 40GbE ports, but two different PCIe v3.0 x8 slot can. + Refer to the sample NICs output above, then we can select ``82:00.0`` and ``85:00.0`` as test ports:: + + 82:00.0 Ethernet [0200]: Intel XL710 for 40GbE QSFP+ [8086:1583] + 85:00.0 Ethernet [0200]: Intel XL710 for 40GbE QSFP+ [8086:1583] + +2. Connect the ports to the traffic generator. For high speed testing, it's best to use a hardware traffic generator. + +3. Check the PCI devices numa node (socket id) and get the cores number on the exact socket id. + In this case, ``82:00.0`` and ``85:00.0`` are both in socket 1, and the cores on socket 1 in the referenced platform + are 18-35 and 54-71. + Note: Don't use 2 logical cores on the same core (e.g core18 has 2 logical cores, core18 and core54), instead, use 2 logical + cores from different cores (e.g core18 and core19). + +4. Bind these two ports to igb_uio. + +5. As to Intel Ethernet CNA XL710 40GbE port, we need at least two queue pairs to achieve best performance, then two queues per port + will be required, and each queue pair will need a dedicated CPU core for receiving/transmitting packets. + +6. The DPDK sample application ``l3fwd`` will be used for performance testing, with using two ports for bi-directional forwarding. + Compile the ``l3fwd sample`` with the default lpm mode. + +7. The command line of running l3fwd would be something like the following:: + + ./l3fwd -l 18-21 -n 4 -w 82:00.0 -w 85:00.0 \ + -- -p 0x3 --config '(0,0,18),(0,1,19),(1,0,20),(1,1,21)' + + This means that the application uses core 18 for port 0, queue pair 0 forwarding, core 19 for port 0, queue pair 1 forwarding, + core 20 for port 1, queue pair 0 forwarding, and core 21 for port 1, queue pair 1 forwarding. + +8. Configure the traffic at a traffic generator. + + * Start creating a stream on packet generator. + + * Set the Ethernet II type to 0x0800. + +Tx bytes affected by the link status change +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For firmware versions prior to 6.01 for X710 series and 3.33 for X722 series, the tx_bytes statistics data is affected by +the link down event. Each time the link status changes to down, the tx_bytes decreases 110 bytes. diff --git a/src/spdk/dpdk/doc/guides/nics/ice.rst b/src/spdk/dpdk/doc/guides/nics/ice.rst new file mode 100644 index 000000000..9a9f4a6bb --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/ice.rst @@ -0,0 +1,317 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +ICE Poll Mode Driver +====================== + +The ice PMD (librte_pmd_ice) provides poll mode driver support for +10/25/50/100 Gbps Intel® Ethernet 810 Series Network Adapters based on +the Intel Ethernet Controller E810. + + +Prerequisites +------------- + +- The E810 is currently in sampling state only. To obtain early samples and/or get further information + about kernel drivers, firmware and DDP support, please speak to your Intel representative. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +- To get better performance on Intel platforms, please follow the "How to get best performance with NICs on Intel platforms" + section of the :ref:`Getting Started Guide for Linux `. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_ICE_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_ice`` driver. + +- ``CONFIG_RTE_LIBRTE_ICE_DEBUG_*`` (default ``n``) + + Toggle display of generic debugging messages. + +- ``CONFIG_RTE_LIBRTE_ICE_16BYTE_RX_DESC`` (default ``n``) + + Toggle to use a 16-byte RX descriptor, by default the RX descriptor is 32 byte. + +Runtime Config Options +~~~~~~~~~~~~~~~~~~~~~~ + +- ``Safe Mode Support`` (default ``0``) + + If driver failed to load OS package, by default driver's initialization failed. + But if user intend to use the device without OS package, user can take ``devargs`` + parameter ``safe-mode-support``, for example:: + + -w 80:00.0,safe-mode-support=1 + + Then the driver will be initialized successfully and the device will enter Safe Mode. + NOTE: In Safe mode, only very limited features are available, features like RSS, + checksum, fdir, tunneling ... are all disabled. + +- ``Generic Flow Pipeline Mode Support`` (default ``0``) + + In pipeline mode, a flow can be set at one specific stage by setting parameter + ``priority``. Currently, we support two stages: priority = 0 or !0. Flows with + priority 0 located at the first pipeline stage which typically be used as a firewall + to drop the packet on a blacklist(we called it permission stage). At this stage, + flow rules are created for the device's exact match engine: switch. Flows with priority + !0 located at the second stage, typically packets are classified here and be steered to + specific queue or queue group (we called it distribution stage), At this stage, flow + rules are created for device's flow director engine. + For none-pipeline mode, ``priority`` is ignored, a flow rule can be created as a flow director + rule or a switch rule depends on its pattern/action and the resource allocation situation, + all flows are virtually at the same pipeline stage. + By default, generic flow API is enabled in none-pipeline mode, user can choose to + use pipeline mode by setting ``devargs`` parameter ``pipeline-mode-support``, + for example:: + + -w 80:00.0,pipeline-mode-support=1 + +- ``Flow Mark Support`` (default ``0``) + + This is a hint to the driver to select the data path that supports flow mark extraction + by default. + NOTE: This is an experimental devarg, it will be removed when any of below conditions + is ready. + 1) all data paths support flow mark (currently vPMD does not) + 2) a new offload like RTE_DEV_RX_OFFLOAD_FLOW_MARK be introduced as a standard way to hint. + Example:: + + -w 80:00.0,flow-mark-support=1 + +- ``Protocol extraction for per queue`` + + Configure the RX queues to do protocol extraction into mbuf for protocol + handling acceleration, like checking the TCP SYN packets quickly. + + The argument format is:: + + -w 18:00.0,proto_xtr=[...] + -w 18:00.0,proto_xtr= + + Queues are grouped by ``(`` and ``)`` within the group. The ``-`` character + is used as a range separator and ``,`` is used as a single number separator. + The grouping ``()`` can be omitted for single element group. If no queues are + specified, PMD will use this protocol extraction type for all queues. + + Protocol is : ``vlan, ipv4, ipv6, ipv6_flow, tcp``. + + .. code-block:: console + + testpmd -w 18:00.0,proto_xtr='[(1,2-3,8-9):tcp,10-13:vlan]' + + This setting means queues 1, 2-3, 8-9 are TCP extraction, queues 10-13 are + VLAN extraction, other queues run with no protocol extraction. + + .. code-block:: console + + testpmd -w 18:00.0,proto_xtr=vlan,proto_xtr='[(1,2-3,8-9):tcp,10-23:ipv6]' + + This setting means queues 1, 2-3, 8-9 are TCP extraction, queues 10-23 are + IPv6 extraction, other queues use the default VLAN extraction. + + The extraction metadata is copied into the registered dynamic mbuf field, and + the related dynamic mbuf flags is set. + + .. table:: Protocol extraction : ``vlan`` + + +----------------------------+----------------------------+ + | VLAN2 | VLAN1 | + +======+===+=================+======+===+=================+ + | PCP | D | VID | PCP | D | VID | + +------+---+-----------------+------+---+-----------------+ + + VLAN1 - single or EVLAN (first for QinQ). + + VLAN2 - C-VLAN (second for QinQ). + + .. table:: Protocol extraction : ``ipv4`` + + +----------------------------+----------------------------+ + | IPHDR2 | IPHDR1 | + +======+=======+=============+==============+=============+ + | Ver |Hdr Len| ToS | TTL | Protocol | + +------+-------+-------------+--------------+-------------+ + + IPHDR1 - IPv4 header word 4, "TTL" and "Protocol" fields. + + IPHDR2 - IPv4 header word 0, "Ver", "Hdr Len" and "Type of Service" fields. + + .. table:: Protocol extraction : ``ipv6`` + + +----------------------------+----------------------------+ + | IPHDR2 | IPHDR1 | + +=====+=============+========+=============+==============+ + | Ver |Traffic class| Flow | Next Header | Hop Limit | + +-----+-------------+--------+-------------+--------------+ + + IPHDR1 - IPv6 header word 3, "Next Header" and "Hop Limit" fields. + + IPHDR2 - IPv6 header word 0, "Ver", "Traffic class" and high 4 bits of + "Flow Label" fields. + + .. table:: Protocol extraction : ``ipv6_flow`` + + +----------------------------+----------------------------+ + | IPHDR2 | IPHDR1 | + +=====+=============+========+============================+ + | Ver |Traffic class| Flow Label | + +-----+-------------+-------------------------------------+ + + IPHDR1 - IPv6 header word 1, 16 low bits of the "Flow Label" field. + + IPHDR2 - IPv6 header word 0, "Ver", "Traffic class" and high 4 bits of + "Flow Label" fields. + + .. table:: Protocol extraction : ``tcp`` + + +----------------------------+----------------------------+ + | TCPHDR2 | TCPHDR1 | + +============================+======+======+==============+ + | Reserved |Offset| RSV | Flags | + +----------------------------+------+------+--------------+ + + TCPHDR1 - TCP header word 6, "Data Offset" and "Flags" fields. + + TCPHDR2 - Reserved + + Use ``rte_net_ice_dynf_proto_xtr_metadata_get`` to access the protocol + extraction metadata, and use ``RTE_PKT_RX_DYNF_PROTO_XTR_*`` to get the + metadata type of ``struct rte_mbuf::ol_flags``. + + The ``rte_net_ice_dump_proto_xtr_metadata`` routine shows how to + access the protocol extraction result in ``struct rte_mbuf``. + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Features +-------- + +Vector PMD +~~~~~~~~~~ + +Vector PMD for RX and TX path are selected automatically. The paths +are chosen based on 2 conditions. + +- ``CPU`` + On the X86 platform, the driver checks if the CPU supports AVX2. + If it's supported, AVX2 paths will be chosen. If not, SSE is chosen. + +- ``Offload features`` + The supported HW offload features are described in the document ice_vec.ini. + If any not supported features are used, ICE vector PMD is disabled and the + normal paths are chosen. + +Malicious driver detection (MDD) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It's not appropriate to send a packet, if this packet's destination MAC address +is just this port's MAC address. If SW tries to send such packets, HW will +report a MDD event and drop the packets. + +The APPs based on DPDK should avoid providing such packets. + +Device Config Function (DCF) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section demonstrates ICE DCF PMD, which shares the core module with ICE +PMD and iAVF PMD. + +A DCF (Device Config Function) PMD bounds to the device's trusted VF with ID 0, +it can act as a sole controlling entity to exercise advance functionality (such +as switch, ACL) for the rest VFs. + +The DCF PMD needs to advertise and acquire DCF capability which allows DCF to +send AdminQ commands that it would like to execute over to the PF and receive +responses for the same from PF. + +.. _figure_ice_dcf: + +.. figure:: img/ice_dcf.* + + DCF Communication flow. + +#. Create the VFs:: + + echo 4 > /sys/bus/pci/devices/0000\:18\:00.0/sriov_numvfs + +#. Enable the VF0 trust on:: + + ip link set dev enp24s0f0 vf 0 trust on + +#. Bind the VF0, and run testpmd with 'cap=dcf' devarg:: + + testpmd -l 22-25 -n 4 -w 18:01.0,cap=dcf -- -i + +#. Monitor the VF2 interface network traffic:: + + tcpdump -e -nn -i enp24s1f2 + +#. Create one flow to redirect the traffic to VF2 by DCF:: + + flow create 0 priority 0 ingress pattern eth / ipv4 src is 192.168.0.2 \ + dst is 192.168.0.3 / end actions vf id 2 / end + +#. Send the packet, and it should be displayed on tcpdump:: + + sendp(Ether(src='3c:fd:fe:aa:bb:78', dst='00:00:00:01:02:03')/IP(src=' \ + 192.168.0.2', dst="192.168.0.3")/TCP(flags='S')/Raw(load='XXXXXXXXXX'), \ + iface="enp24s0f0", count=10) + +Sample Application Notes +------------------------ + +Vlan filter +~~~~~~~~~~~ + +Vlan filter only works when Promiscuous mode is off. + +To start ``testpmd``, and add vlan 10 to port 0: + +.. code-block:: console + + ./app/testpmd -l 0-15 -n 4 -- -i + ... + + testpmd> rx_vlan add 10 0 + +Limitations or Known issues +--------------------------- + +The Intel E810 requires a programmable pipeline package be downloaded +by the driver to support normal operations. The E810 has a limited +functionality built in to allow PXE boot and other use cases, but the +driver must download a package file during the driver initialization +stage. + +The default DDP package file name is ice.pkg. For a specific NIC, the +DDP package supposed to be loaded can have a filename: ice-xxxxxx.pkg, +where 'xxxxxx' is the 64-bit PCIe Device Serial Number of the NIC. For +example, if the NIC's device serial number is 00-CC-BB-FF-FF-AA-05-68, +the device-specific DDP package filename is ice-00ccbbffffaa0568.pkg +(in hex and all low case). During initialization, the driver searches +in the following paths in order: /lib/firmware/updates/intel/ice/ddp +and /lib/firmware/intel/ice/ddp. The corresponding device-specific DDP +package will be downloaded first if the file exists. If not, then the +driver tries to load the default package. The type of loaded package +is stored in ``ice_adapter->active_pkg_type``. + +A symbolic link to the DDP package file is also ok. The same package +file is used by both the kernel driver and the DPDK PMD. + +limitation +~~~~~~~~~~ + +Ice code released is for evaluation only currently. diff --git a/src/spdk/dpdk/doc/guides/nics/igb.rst b/src/spdk/dpdk/doc/guides/nics/igb.rst new file mode 100644 index 000000000..0171622f4 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/igb.rst @@ -0,0 +1,38 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +IGB Poll Mode Driver +==================== + +The IGB PMD (``librte_pmd_e1000``) provides poll mode driver +support for Intel 1GbE nics. + +Features +-------- + +Features of the IGB PMD are: + +* Multiple queues for TX and RX +* Receiver Side Scaling (RSS) +* MAC/VLAN filtering +* Packet type information +* Double VLAN +* IEEE 1588 +* TSO offload +* Checksum offload +* TCP segmentation offload +* Jumbo frames supported + + +Limitations or Known issues +--------------------------- + + +Supported Chipsets and NICs +--------------------------- + +- Intel 82576EB 10 Gigabit Ethernet Controller +- Intel 82580EB 10 Gigabit Ethernet Controller +- Intel 82580DB 10 Gigabit Ethernet Controller +- Intel Ethernet Controller I210 +- Intel Ethernet Controller I350 diff --git a/src/spdk/dpdk/doc/guides/nics/igc.rst b/src/spdk/dpdk/doc/guides/nics/igc.rst new file mode 100644 index 000000000..4c5e62675 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/igc.rst @@ -0,0 +1,121 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2020 Intel Corporation. + +IGC Poll Mode Driver +====================== + +The IGC PMD (librte_pmd_igc) provides poll mode driver support for Foxville +I225 Series Network Adapters. + +- For information about I225, please refer to: + `https://ark.intel.com/content/www/us/en/ark/products/series/184686/ + intel-ethernet-controller-i225-series.html` + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_IGC_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_igc`` driver. + +- ``CONFIG_RTE_LIBRTE_IGC_DEBUG_*`` (default ``n``) + + Toggle display of generic debugging messages. + + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + + +Supported Chipsets and NICs +--------------------------- + +Foxville LM (I225 LM): Client 2.5G LAN vPro Corporate +Foxville V (I225 V): Client 2.5G LAN Consumer +Foxville I (I225 I): Client 2.5G Industrial Temp +Foxville V (I225 K): Client 2.5G LAN Consumer + + +Sample Application Notes +------------------------ + +Vlan filter +~~~~~~~~~~~ + +VLAN stripping off only works with inner vlan. +Only the outer VLAN TPID can be set to a vlan other than 0x8100. + +If extend VLAN is enabled: + +- The VLAN header in a packet that carries a single VLAN header is treated as the external VLAN. + +- Foxville expects that any transmitted packet to have at least the external VLAN added by the + software. For those packets where an external VLAN is not present, any offload that relates to + inner fields to the EtherType might not be provided. + +- If VLAN TX-OFFLOAD is enabled and the packet does not contain an external VLAN, the packet is + dropped, and if configured, the queue from which the packet was sent is disabled. + +To start ``testpmd``, add vlan 10 to port, set vlan stripping off on, set extend on, set TPID of +outer VLAN to 0x9100: + +.. code-block:: console + + ./app/testpmd -l 4-8 -- -i + ... + + testpmd> vlan set filter on 0 + testpmd> rx_vlan add 10 0 + testpmd> vlan set strip off 0 + testpmd> vlan set extend on 0 + testpmd> vlan set outer tpid 0x9100 0 + + +Flow Director +~~~~~~~~~~~~~ + +The Flow Director works in receive mode to identify specific flows or sets of flows and route +them to specific queues. + +The Flow Director filters includes the following types: + +- ether-type filter +- 2-tuple filter(destination L4 protocol and destination L4 port) +- TCP SYN filter +- RSS filter + +Start ``testpmd``: + +.. code-block:: console + + ./testpmd -l 4-8 -- i --rxq=4 --txq=4 --pkt-filter-mode=perfect --disable-rss + +Add a rule to direct packet whose ``ether-type=0x801`` to queue 1: + +.. code-block:: console + + testpmd> flow create 0 ingress pattern eth type is 0x801 / end actions queue index 1 / end + +Add a rule to direct packet whose ``ip-protocol=0x6(TCP), tcp_port=0x80`` to queue 1: + +.. code-block:: console + + testpmd> flow create 0 ingress pattern eth / ipv4 proto is 6 / tcp dst is 0x80 / end actions queue index 1 / end + +Add a rule to direct packet whose ``ip-protocol=0x6(TCP), SYN flag is set`` to queue 1: + +.. code-block:: console + + testpmd> flow validate 0 ingress pattern tcp flags spec 0x02 flags mask 0x02 / end actions queue index 1 / end + +Add a rule to enable ipv4-udp RSS: + +.. code-block:: console + + testpmd> flow create 0 ingress pattern end actions rss types ipv4-udp end / end diff --git a/src/spdk/dpdk/doc/guides/nics/img/console.png b/src/spdk/dpdk/doc/guides/nics/img/console.png new file mode 100644 index 000000000..99423340d Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/console.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/fast_pkt_proc.png b/src/spdk/dpdk/doc/guides/nics/img/fast_pkt_proc.png new file mode 100644 index 000000000..48d57e5c4 Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/fast_pkt_proc.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/forward_stats.png b/src/spdk/dpdk/doc/guides/nics/img/forward_stats.png new file mode 100644 index 000000000..23e35325c Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/forward_stats.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/host_vm_comms.png b/src/spdk/dpdk/doc/guides/nics/img/host_vm_comms.png new file mode 100644 index 000000000..4e0b3c96c Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/host_vm_comms.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/host_vm_comms_qemu.png b/src/spdk/dpdk/doc/guides/nics/img/host_vm_comms_qemu.png new file mode 100644 index 000000000..391a4eac2 Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/host_vm_comms_qemu.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/ice_dcf.svg b/src/spdk/dpdk/doc/guides/nics/img/ice_dcf.svg new file mode 100644 index 000000000..c6de820a0 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/img/ice_dcf.svg @@ -0,0 +1,516 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/spdk/dpdk/doc/guides/nics/img/intel_perf_test_setup.svg b/src/spdk/dpdk/doc/guides/nics/img/intel_perf_test_setup.svg new file mode 100644 index 000000000..27c3c1cd7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/img/intel_perf_test_setup.svg @@ -0,0 +1,507 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + Traffic Generator + + + Dest MAC: Port 0Dest IP: 2.1.1.1Src IP: Random + Port A + + + Dest MAC: Port 1Dest IP: 1.1.1.1Src IP: Random + Port B + + + + Intel XL 71040G Ethernet + Port 0 + Flow 2 + Flow 1 + + Port X + + + + + + Intel XL 71040G Ethernet + + Port 1 + + Port X + + Port 0 to Port 1Port 1 to Port 0 + Forwarding + IA Platform(Socket 1) + + diff --git a/src/spdk/dpdk/doc/guides/nics/img/inter_vm_comms.png b/src/spdk/dpdk/doc/guides/nics/img/inter_vm_comms.png new file mode 100644 index 000000000..6d85ece73 Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/inter_vm_comms.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/mvpp2_tm.svg b/src/spdk/dpdk/doc/guides/nics/img/mvpp2_tm.svg new file mode 100644 index 000000000..4aa927219 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/img/mvpp2_tm.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + Port N + + + + + + + + + + Txq 0 + + + + + + + Txq 1 + + + + + + + Txq M + + + + + + Level 0: + + + Level 1: + + + + + + + + + + + + + + + + + + + diff --git a/src/spdk/dpdk/doc/guides/nics/img/perf_benchmark.png b/src/spdk/dpdk/doc/guides/nics/img/perf_benchmark.png new file mode 100644 index 000000000..aba818c30 Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/perf_benchmark.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/single_port_nic.png b/src/spdk/dpdk/doc/guides/nics/img/single_port_nic.png new file mode 100644 index 000000000..8f39d73b2 Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/single_port_nic.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/szedata2_nfb200g_architecture.svg b/src/spdk/dpdk/doc/guides/nics/img/szedata2_nfb200g_architecture.svg new file mode 100644 index 000000000..e152e4a8f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/img/szedata2_nfb200g_architecture.svg @@ -0,0 +1,214 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + ETH 0 + ETH 1 + NFB-200G2QL card + PCI-E master slot + PCI-E slave slot + QUEUE 0 + QUEUE 15 + QUEUE 16 + QUEUE 31 + CPU 0 + CPU 1 + + diff --git a/src/spdk/dpdk/doc/guides/nics/img/vm_vm_comms.png b/src/spdk/dpdk/doc/guides/nics/img/vm_vm_comms.png new file mode 100644 index 000000000..2bf1cd272 Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/vm_vm_comms.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/vmxnet3_int.png b/src/spdk/dpdk/doc/guides/nics/img/vmxnet3_int.png new file mode 100644 index 000000000..6541feba7 Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/vmxnet3_int.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/img/vswitch_vm.png b/src/spdk/dpdk/doc/guides/nics/img/vswitch_vm.png new file mode 100644 index 000000000..ac817aaae Binary files /dev/null and b/src/spdk/dpdk/doc/guides/nics/img/vswitch_vm.png differ diff --git a/src/spdk/dpdk/doc/guides/nics/index.rst b/src/spdk/dpdk/doc/guides/nics/index.rst new file mode 100644 index 000000000..da5d85bcd --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/index.rst @@ -0,0 +1,65 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +Network Interface Controller Drivers +==================================== + +.. toctree:: + :maxdepth: 3 + :numbered: + + overview + features + build_and_test + af_packet + af_xdp + ark + atlantic + avp + axgbe + bnx2x + bnxt + cxgbe + dpaa + dpaa2 + e1000em + ena + enetc + enic + fm10k + hinic + hns3 + i40e + ice + igb + igc + ionic + ipn3ke + ixgbe + intel_vf + kni + liquidio + memif + mlx4 + mlx5 + mvneta + mvpp2 + netvsc + nfb + nfp + null + octeontx + octeontx2 + pfe + qede + sfc_efx + softnic + szedata2 + tap + thunderx + vdev_netvsc + virtio + vhost + vmxnet3 + pcap_ring + fail_safe diff --git a/src/spdk/dpdk/doc/guides/nics/intel_vf.rst b/src/spdk/dpdk/doc/guides/nics/intel_vf.rst new file mode 100644 index 000000000..ade515259 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/intel_vf.rst @@ -0,0 +1,617 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + +Intel Virtual Function Driver +============================= + +Supported Intel® Ethernet Controllers (see the *DPDK Release Notes* for details) +support the following modes of operation in a virtualized environment: + +* **SR-IOV mode**: Involves direct assignment of part of the port resources to different guest operating systems + using the PCI-SIG Single Root I/O Virtualization (SR IOV) standard, + also known as "native mode" or "pass-through" mode. + In this chapter, this mode is referred to as IOV mode. + +* **VMDq mode**: Involves central management of the networking resources by an IO Virtual Machine (IOVM) or + a Virtual Machine Monitor (VMM), also known as software switch acceleration mode. + In this chapter, this mode is referred to as the Next Generation VMDq mode. + +SR-IOV Mode Utilization in a DPDK Environment +--------------------------------------------- + +The DPDK uses the SR-IOV feature for hardware-based I/O sharing in IOV mode. +Therefore, it is possible to partition SR-IOV capability on Ethernet controller NIC resources logically and +expose them to a virtual machine as a separate PCI function called a "Virtual Function". +Refer to :numref:`figure_single_port_nic`. + +Therefore, a NIC is logically distributed among multiple virtual machines (as shown in :numref:`figure_single_port_nic`), +while still having global data in common to share with the Physical Function and other Virtual Functions. +The DPDK fm10kvf, i40evf, igbvf or ixgbevf as a Poll Mode Driver (PMD) serves for the Intel® 82576 Gigabit Ethernet Controller, +Intel® Ethernet Controller I350 family, Intel® 82599 10 Gigabit Ethernet Controller NIC, +Intel® Fortville 10/40 Gigabit Ethernet Controller NIC's virtual PCI function, or PCIe host-interface of the Intel Ethernet Switch +FM10000 Series. +Meanwhile the DPDK Poll Mode Driver (PMD) also supports "Physical Function" of such NIC's on the host. + +The DPDK PF/VF Poll Mode Driver (PMD) supports the Layer 2 switch on Intel® 82576 Gigabit Ethernet Controller, +Intel® Ethernet Controller I350 family, Intel® 82599 10 Gigabit Ethernet Controller, +and Intel® Fortville 10/40 Gigabit Ethernet Controller NICs so that guest can choose it for inter virtual machine traffic in SR-IOV mode. + +For more detail on SR-IOV, please refer to the following documents: + +* `SR-IOV provides hardware based I/O sharing `_ + +* `PCI-SIG-Single Root I/O Virtualization Support on IA + `_ + +* `Scalable I/O Virtualized Servers `_ + +.. _figure_single_port_nic: + +.. figure:: img/single_port_nic.* + + Virtualization for a Single Port NIC in SR-IOV Mode + + +Physical and Virtual Function Infrastructure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following describes the Physical Function and Virtual Functions infrastructure for the supported Ethernet Controller NICs. + +Virtual Functions operate under the respective Physical Function on the same NIC Port and therefore have no access +to the global NIC resources that are shared between other functions for the same NIC port. + +A Virtual Function has basic access to the queue resources and control structures of the queues assigned to it. +For global resource access, a Virtual Function has to send a request to the Physical Function for that port, +and the Physical Function operates on the global resources on behalf of the Virtual Function. +For this out-of-band communication, an SR-IOV enabled NIC provides a memory buffer for each Virtual Function, +which is called a "Mailbox". + +Intel® Ethernet Adaptive Virtual Function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Adaptive Virtual Function (IAVF) is a SR-IOV Virtual Function with the same device id (8086:1889) on different Intel Ethernet Controller. +IAVF Driver is VF driver which supports for all future Intel devices without requiring a VM update. And since this happens to be an adaptive VF driver, +every new drop of the VF driver would add more and more advanced features that can be turned on in the VM if the underlying HW device supports those +advanced features based on a device agnostic way without ever compromising on the base functionality. IAVF provides generic hardware interface and +interface between IAVF driver and a compliant PF driver is specified. + +Intel products starting Ethernet Controller 700 Series to support Adaptive Virtual Function. + +The way to generate Virtual Function is like normal, and the resource of VF assignment depends on the NIC Infrastructure. + +For more detail on SR-IOV, please refer to the following documents: + +* `Intel® IAVF HAS `_ + +.. note:: + + To use DPDK IAVF PMD on Intel® 700 Series Ethernet Controller, the device id (0x1889) need to specified during device + assignment in hypervisor. Take qemu for example, the device assignment should carry the IAVF device id (0x1889) like + ``-device vfio-pci,x-pci-device-id=0x1889,host=03:0a.0``. + +The PCIE host-interface of Intel Ethernet Switch FM10000 Series VF infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a virtualized environment, the programmer can enable a maximum of *64 Virtual Functions (VF)* +globally per PCIE host-interface of the Intel Ethernet Switch FM10000 Series device. +Each VF can have a maximum of 16 queue pairs. +The Physical Function in host could be only configured by the Linux* fm10k driver +(in the case of the Linux Kernel-based Virtual Machine [KVM]), DPDK PMD PF driver doesn't support it yet. + +For example, + +* Using Linux* fm10k driver: + + .. code-block:: console + + rmmod fm10k (To remove the fm10k module) + insmod fm0k.ko max_vfs=2,2 (To enable two Virtual Functions per port) + +Virtual Function enumeration is performed in the following sequence by the Linux* pci driver for a dual-port NIC. +When you enable the four Virtual Functions with the above command, the four enabled functions have a Function# +represented by (Bus#, Device#, Function#) in sequence starting from 0 to 3. +However: + +* Virtual Functions 0 and 2 belong to Physical Function 0 + +* Virtual Functions 1 and 3 belong to Physical Function 1 + +.. note:: + + The above is an important consideration to take into account when targeting specific packets to a selected port. + +Intel® X710/XL710 Gigabit Ethernet Controller VF Infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a virtualized environment, the programmer can enable a maximum of *128 Virtual Functions (VF)* +globally per Intel® X710/XL710 Gigabit Ethernet Controller NIC device. +The number of queue pairs of each VF can be configured by ``CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF`` in ``config`` file. +The Physical Function in host could be either configured by the Linux* i40e driver +(in the case of the Linux Kernel-based Virtual Machine [KVM]) or by DPDK PMD PF driver. +When using both DPDK PMD PF/VF drivers, the whole NIC will be taken over by DPDK based application. + +For example, + +* Using Linux* i40e driver: + + .. code-block:: console + + rmmod i40e (To remove the i40e module) + insmod i40e.ko max_vfs=2,2 (To enable two Virtual Functions per port) + +* Using the DPDK PMD PF i40e driver: + + Kernel Params: iommu=pt, intel_iommu=on + + .. code-block:: console + + modprobe uio + insmod igb_uio + ./dpdk-devbind.py -b igb_uio bb:ss.f + echo 2 > /sys/bus/pci/devices/0000\:bb\:ss.f/max_vfs (To enable two VFs on a specific PCI device) + + Launch the DPDK testpmd/example or your own host daemon application using the DPDK PMD library. + +Virtual Function enumeration is performed in the following sequence by the Linux* pci driver for a dual-port NIC. +When you enable the four Virtual Functions with the above command, the four enabled functions have a Function# +represented by (Bus#, Device#, Function#) in sequence starting from 0 to 3. +However: + +* Virtual Functions 0 and 2 belong to Physical Function 0 + +* Virtual Functions 1 and 3 belong to Physical Function 1 + +.. note:: + + The above is an important consideration to take into account when targeting specific packets to a selected port. + + For Intel® X710/XL710 Gigabit Ethernet Controller, queues are in pairs. One queue pair means one receive queue and + one transmit queue. The default number of queue pairs per VF is 4, and can be 16 in maximum. + +Intel® 82599 10 Gigabit Ethernet Controller VF Infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The programmer can enable a maximum of *63 Virtual Functions* and there must be *one Physical Function* per Intel® 82599 +10 Gigabit Ethernet Controller NIC port. +The reason for this is that the device allows for a maximum of 128 queues per port and a virtual/physical function has to +have at least one queue pair (RX/TX). +The current implementation of the DPDK ixgbevf driver supports a single queue pair (RX/TX) per Virtual Function. +The Physical Function in host could be either configured by the Linux* ixgbe driver +(in the case of the Linux Kernel-based Virtual Machine [KVM]) or by DPDK PMD PF driver. +When using both DPDK PMD PF/VF drivers, the whole NIC will be taken over by DPDK based application. + +For example, + +* Using Linux* ixgbe driver: + + .. code-block:: console + + rmmod ixgbe (To remove the ixgbe module) + insmod ixgbe max_vfs=2,2 (To enable two Virtual Functions per port) + +* Using the DPDK PMD PF ixgbe driver: + + Kernel Params: iommu=pt, intel_iommu=on + + .. code-block:: console + + modprobe uio + insmod igb_uio + ./dpdk-devbind.py -b igb_uio bb:ss.f + echo 2 > /sys/bus/pci/devices/0000\:bb\:ss.f/max_vfs (To enable two VFs on a specific PCI device) + + Launch the DPDK testpmd/example or your own host daemon application using the DPDK PMD library. + +* Using the DPDK PMD PF ixgbe driver to enable VF RSS: + + Same steps as above to install the modules of uio, igb_uio, specify max_vfs for PCI device, and + launch the DPDK testpmd/example or your own host daemon application using the DPDK PMD library. + + The available queue number (at most 4) per VF depends on the total number of pool, which is + determined by the max number of VF at PF initialization stage and the number of queue specified + in config: + + * If the max number of VFs (max_vfs) is set in the range of 1 to 32: + + If the number of Rx queues is specified as 4 (``--rxq=4`` in testpmd), then there are totally 32 + pools (ETH_32_POOLS), and each VF could have 4 Rx queues; + + If the number of Rx queues is specified as 2 (``--rxq=2`` in testpmd), then there are totally 32 + pools (ETH_32_POOLS), and each VF could have 2 Rx queues; + + * If the max number of VFs (max_vfs) is in the range of 33 to 64: + + If the number of Rx queues in specified as 4 (``--rxq=4`` in testpmd), then error message is expected + as ``rxq`` is not correct at this case; + + If the number of rxq is 2 (``--rxq=2`` in testpmd), then there is totally 64 pools (ETH_64_POOLS), + and each VF have 2 Rx queues; + + On host, to enable VF RSS functionality, rx mq mode should be set as ETH_MQ_RX_VMDQ_RSS + or ETH_MQ_RX_RSS mode, and SRIOV mode should be activated (max_vfs >= 1). + It also needs config VF RSS information like hash function, RSS key, RSS key length. + +.. note:: + + The limitation for VF RSS on Intel® 82599 10 Gigabit Ethernet Controller is: + The hash and key are shared among PF and all VF, the RETA table with 128 entries is also shared + among PF and all VF; So it could not to provide a method to query the hash and reta content per + VF on guest, while, if possible, please query them on host for the shared RETA information. + +Virtual Function enumeration is performed in the following sequence by the Linux* pci driver for a dual-port NIC. +When you enable the four Virtual Functions with the above command, the four enabled functions have a Function# +represented by (Bus#, Device#, Function#) in sequence starting from 0 to 3. +However: + +* Virtual Functions 0 and 2 belong to Physical Function 0 + +* Virtual Functions 1 and 3 belong to Physical Function 1 + +.. note:: + + The above is an important consideration to take into account when targeting specific packets to a selected port. + +Intel® 82576 Gigabit Ethernet Controller and Intel® Ethernet Controller I350 Family VF Infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a virtualized environment, an Intel® 82576 Gigabit Ethernet Controller serves up to eight virtual machines (VMs). +The controller has 16 TX and 16 RX queues. +They are generally referred to (or thought of) as queue pairs (one TX and one RX queue). +This gives the controller 16 queue pairs. + +A pool is a group of queue pairs for assignment to the same VF, used for transmit and receive operations. +The controller has eight pools, with each pool containing two queue pairs, that is, two TX and two RX queues assigned to each VF. + +In a virtualized environment, an Intel® Ethernet Controller I350 family device serves up to eight virtual machines (VMs) per port. +The eight queues can be accessed by eight different VMs if configured correctly (the i350 has 4x1GbE ports each with 8T X and 8 RX queues), +that means, one Transmit and one Receive queue assigned to each VF. + +For example, + +* Using Linux* igb driver: + + .. code-block:: console + + rmmod igb (To remove the igb module) + insmod igb max_vfs=2,2 (To enable two Virtual Functions per port) + +* Using DPDK PMD PF igb driver: + + Kernel Params: iommu=pt, intel_iommu=on modprobe uio + + .. code-block:: console + + insmod igb_uio + ./dpdk-devbind.py -b igb_uio bb:ss.f + echo 2 > /sys/bus/pci/devices/0000\:bb\:ss.f/max_vfs (To enable two VFs on a specific pci device) + + Launch DPDK testpmd/example or your own host daemon application using the DPDK PMD library. + +Virtual Function enumeration is performed in the following sequence by the Linux* pci driver for a four-port NIC. +When you enable the four Virtual Functions with the above command, the four enabled functions have a Function# +represented by (Bus#, Device#, Function#) in sequence, starting from 0 to 7. +However: + +* Virtual Functions 0 and 4 belong to Physical Function 0 + +* Virtual Functions 1 and 5 belong to Physical Function 1 + +* Virtual Functions 2 and 6 belong to Physical Function 2 + +* Virtual Functions 3 and 7 belong to Physical Function 3 + +.. note:: + + The above is an important consideration to take into account when targeting specific packets to a selected port. + +Validated Hypervisors +~~~~~~~~~~~~~~~~~~~~~ + +The validated hypervisor is: + +* KVM (Kernel Virtual Machine) with Qemu, version 0.14.0 + +However, the hypervisor is bypassed to configure the Virtual Function devices using the Mailbox interface, +the solution is hypervisor-agnostic. +Xen* and VMware* (when SR- IOV is supported) will also be able to support the DPDK with Virtual Function driver support. + +Expected Guest Operating System in Virtual Machine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The expected guest operating systems in a virtualized environment are: + +* Fedora* 14 (64-bit) + +* Ubuntu* 10.04 (64-bit) + +For supported kernel versions, refer to the *DPDK Release Notes*. + +Setting Up a KVM Virtual Machine Monitor +---------------------------------------- + +The following describes a target environment: + +* Host Operating System: Fedora 14 + +* Hypervisor: KVM (Kernel Virtual Machine) with Qemu version 0.14.0 + +* Guest Operating System: Fedora 14 + +* Linux Kernel Version: Refer to the *DPDK Getting Started Guide* + +* Target Applications: l2fwd, l3fwd-vf + +The setup procedure is as follows: + +#. Before booting the Host OS, open **BIOS setup** and enable **Intel® VT features**. + +#. While booting the Host OS kernel, pass the intel_iommu=on kernel command line argument using GRUB. + When using DPDK PF driver on host, pass the iommu=pt kernel command line argument in GRUB. + +#. Download qemu-kvm-0.14.0 from + `http://sourceforge.net/projects/kvm/files/qemu-kvm/ `_ + and install it in the Host OS using the following steps: + + When using a recent kernel (2.6.25+) with kvm modules included: + + .. code-block:: console + + tar xzf qemu-kvm-release.tar.gz + cd qemu-kvm-release + ./configure --prefix=/usr/local/kvm + make + sudo make install + sudo /sbin/modprobe kvm-intel + + When using an older kernel, or a kernel from a distribution without the kvm modules, + you must download (from the same link), compile and install the modules yourself: + + .. code-block:: console + + tar xjf kvm-kmod-release.tar.bz2 + cd kvm-kmod-release + ./configure + make + sudo make install + sudo /sbin/modprobe kvm-intel + + qemu-kvm installs in the /usr/local/bin directory. + + For more details about KVM configuration and usage, please refer to: + + `http://www.linux-kvm.org/page/HOWTO1 `_. + +#. Create a Virtual Machine and install Fedora 14 on the Virtual Machine. + This is referred to as the Guest Operating System (Guest OS). + +#. Download and install the latest ixgbe driver from: + + `http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=14687 `_ + +#. In the Host OS + + When using Linux kernel ixgbe driver, unload the Linux ixgbe driver and reload it with the max_vfs=2,2 argument: + + .. code-block:: console + + rmmod ixgbe + modprobe ixgbe max_vfs=2,2 + + When using DPDK PMD PF driver, insert DPDK kernel module igb_uio and set the number of VF by sysfs max_vfs: + + .. code-block:: console + + modprobe uio + insmod igb_uio + ./dpdk-devbind.py -b igb_uio 02:00.0 02:00.1 0e:00.0 0e:00.1 + echo 2 > /sys/bus/pci/devices/0000\:02\:00.0/max_vfs + echo 2 > /sys/bus/pci/devices/0000\:02\:00.1/max_vfs + echo 2 > /sys/bus/pci/devices/0000\:0e\:00.0/max_vfs + echo 2 > /sys/bus/pci/devices/0000\:0e\:00.1/max_vfs + + .. note:: + + You need to explicitly specify number of vfs for each port, for example, + in the command above, it creates two vfs for the first two ixgbe ports. + + Let say we have a machine with four physical ixgbe ports: + + + 0000:02:00.0 + + 0000:02:00.1 + + 0000:0e:00.0 + + 0000:0e:00.1 + + The command above creates two vfs for device 0000:02:00.0: + + .. code-block:: console + + ls -alrt /sys/bus/pci/devices/0000\:02\:00.0/virt* + lrwxrwxrwx. 1 root root 0 Apr 13 05:40 /sys/bus/pci/devices/0000:02:00.0/virtfn1 -> ../0000:02:10.2 + lrwxrwxrwx. 1 root root 0 Apr 13 05:40 /sys/bus/pci/devices/0000:02:00.0/virtfn0 -> ../0000:02:10.0 + + It also creates two vfs for device 0000:02:00.1: + + .. code-block:: console + + ls -alrt /sys/bus/pci/devices/0000\:02\:00.1/virt* + lrwxrwxrwx. 1 root root 0 Apr 13 05:51 /sys/bus/pci/devices/0000:02:00.1/virtfn1 -> ../0000:02:10.3 + lrwxrwxrwx. 1 root root 0 Apr 13 05:51 /sys/bus/pci/devices/0000:02:00.1/virtfn0 -> ../0000:02:10.1 + +#. List the PCI devices connected and notice that the Host OS shows two Physical Functions (traditional ports) + and four Virtual Functions (two for each port). + This is the result of the previous step. + +#. Insert the pci_stub module to hold the PCI devices that are freed from the default driver using the following command + (see http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM Section 4 for more information): + + .. code-block:: console + + sudo /sbin/modprobe pci-stub + + Unbind the default driver from the PCI devices representing the Virtual Functions. + A script to perform this action is as follows: + + .. code-block:: console + + echo "8086 10ed" > /sys/bus/pci/drivers/pci-stub/new_id + echo 0000:08:10.0 > /sys/bus/pci/devices/0000:08:10.0/driver/unbind + echo 0000:08:10.0 > /sys/bus/pci/drivers/pci-stub/bind + + where, 0000:08:10.0 belongs to the Virtual Function visible in the Host OS. + +#. Now, start the Virtual Machine by running the following command: + + .. code-block:: console + + /usr/local/kvm/bin/qemu-system-x86_64 -m 4096 -smp 4 -boot c -hda lucid.qcow2 -device pci-assign,host=08:10.0 + + where: + + — -m = memory to assign + + — -smp = number of smp cores + + — -boot = boot option + + — -hda = virtual disk image + + — -device = device to attach + + .. note:: + + — The pci-assign,host=08:10.0 value indicates that you want to attach a PCI device + to a Virtual Machine and the respective (Bus:Device.Function) + numbers should be passed for the Virtual Function to be attached. + + — qemu-kvm-0.14.0 allows a maximum of four PCI devices assigned to a VM, + but this is qemu-kvm version dependent since qemu-kvm-0.14.1 allows a maximum of five PCI devices. + + — qemu-system-x86_64 also has a -cpu command line option that is used to select the cpu_model + to emulate in a Virtual Machine. Therefore, it can be used as: + + .. code-block:: console + + /usr/local/kvm/bin/qemu-system-x86_64 -cpu ? + + (to list all available cpu_models) + + /usr/local/kvm/bin/qemu-system-x86_64 -m 4096 -cpu host -smp 4 -boot c -hda lucid.qcow2 -device pci-assign,host=08:10.0 + + (to use the same cpu_model equivalent to the host cpu) + + For more information, please refer to: `http://wiki.qemu.org/Features/CPUModels `_. + +#. If use vfio-pci to pass through device instead of pci-assign, steps 8 and 9 need to be updated to bind device to vfio-pci and + replace pci-assign with vfio-pci when start virtual machine. + + .. code-block:: console + + sudo /sbin/modprobe vfio-pci + + echo "8086 10ed" > /sys/bus/pci/drivers/vfio-pci/new_id + echo 0000:08:10.0 > /sys/bus/pci/devices/0000:08:10.0/driver/unbind + echo 0000:08:10.0 > /sys/bus/pci/drivers/vfio-pci/bind + + /usr/local/kvm/bin/qemu-system-x86_64 -m 4096 -smp 4 -boot c -hda lucid.qcow2 -device vfio-pci,host=08:10.0 + +#. Install and run DPDK host app to take over the Physical Function. Eg. + + .. code-block:: console + + make install T=x86_64-native-linux-gcc + ./x86_64-native-linux-gcc/app/testpmd -l 0-3 -n 4 -- -i + +#. Finally, access the Guest OS using vncviewer with the localhost:5900 port and check the lspci command output in the Guest OS. + The virtual functions will be listed as available for use. + +#. Configure and install the DPDK with an x86_64-native-linux-gcc configuration on the Guest OS as normal, + that is, there is no change to the normal installation procedure. + + .. code-block:: console + + make config T=x86_64-native-linux-gcc O=x86_64-native-linux-gcc + cd x86_64-native-linux-gcc + make + +.. note:: + + If you are unable to compile the DPDK and you are getting "error: CPU you selected does not support x86-64 instruction set", + power off the Guest OS and start the virtual machine with the correct -cpu option in the qemu- system-x86_64 command as shown in step 9. + You must select the best x86_64 cpu_model to emulate or you can select host option if available. + +.. note:: + + Run the DPDK l2fwd sample application in the Guest OS with Hugepages enabled. + For the expected benchmark performance, you must pin the cores from the Guest OS to the Host OS (taskset can be used to do this) and + you must also look at the PCI Bus layout on the board to ensure you are not running the traffic over the QPI Interface. + +.. note:: + + * The Virtual Machine Manager (the Fedora package name is virt-manager) is a utility for virtual machine management + that can also be used to create, start, stop and delete virtual machines. + If this option is used, step 2 and 6 in the instructions provided will be different. + + * virsh, a command line utility for virtual machine management, + can also be used to bind and unbind devices to a virtual machine in Ubuntu. + If this option is used, step 6 in the instructions provided will be different. + + * The Virtual Machine Monitor (see :numref:`figure_perf_benchmark`) is equivalent to a Host OS with KVM installed as described in the instructions. + +.. _figure_perf_benchmark: + +.. figure:: img/perf_benchmark.* + + Performance Benchmark Setup + + +DPDK SR-IOV PMD PF/VF Driver Usage Model +---------------------------------------- + +Fast Host-based Packet Processing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Software Defined Network (SDN) trends are demanding fast host-based packet handling. +In a virtualization environment, +the DPDK VF PMD driver performs the same throughput result as a non-VT native environment. + +With such host instance fast packet processing, lots of services such as filtering, QoS, +DPI can be offloaded on the host fast path. + +:numref:`figure_fast_pkt_proc` shows the scenario where some VMs directly communicate externally via a VFs, +while others connect to a virtual switch and share the same uplink bandwidth. + +.. _figure_fast_pkt_proc: + +.. figure:: img/fast_pkt_proc.* + + Fast Host-based Packet Processing + + +SR-IOV (PF/VF) Approach for Inter-VM Communication +-------------------------------------------------- + +Inter-VM data communication is one of the traffic bottle necks in virtualization platforms. +SR-IOV device assignment helps a VM to attach the real device, taking advantage of the bridge in the NIC. +So VF-to-VF traffic within the same physical port (VM0<->VM1) have hardware acceleration. +However, when VF crosses physical ports (VM0<->VM2), there is no such hardware bridge. +In this case, the DPDK PMD PF driver provides host forwarding between such VMs. + +:numref:`figure_inter_vm_comms` shows an example. +In this case an update of the MAC address lookup tables in both the NIC and host DPDK application is required. + +In the NIC, writing the destination of a MAC address belongs to another cross device VM to the PF specific pool. +So when a packet comes in, its destination MAC address will match and forward to the host DPDK PMD application. + +In the host DPDK application, the behavior is similar to L2 forwarding, +that is, the packet is forwarded to the correct PF pool. +The SR-IOV NIC switch forwards the packet to a specific VM according to the MAC destination address +which belongs to the destination VF on the VM. + +.. _figure_inter_vm_comms: + +.. figure:: img/inter_vm_comms.* + + Inter-VM Communication diff --git a/src/spdk/dpdk/doc/guides/nics/ionic.rst b/src/spdk/dpdk/doc/guides/nics/ionic.rst new file mode 100644 index 000000000..787123df5 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/ionic.rst @@ -0,0 +1,41 @@ +.. SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) + Copyright(c) 2018-2019 Pensando Systems, Inc. All rights reserved. + +IONIC Driver +============ + +The ionic driver provides support for Pensando server adapters. +It currently supports the below models: + +- `Naples DSC-25 `_ +- `Naples DSC-100 `_ + +Please visit https://pensando.io for more information. + +Identifying the Adapter +----------------------- + +To find if one or more Pensando PCI Ethernet devices are installed +on the host, check for the PCI devices: + + .. code-block:: console + + lspci -d 1dd8: + b5:00.0 Ethernet controller: Device 1dd8:1002 + b6:00.0 Ethernet controller: Device 1dd8:1002 + +Pre-Installation Configuration +------------------------------ + +The following options can be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_IONIC_PMD`` (default ``y``) + + Toggle compilation of ionic PMD. + +Building DPDK +------------- + +The ionic PMD driver supports UIO and VFIO, please refer to the +:ref:`DPDK documentation that comes with the DPDK suite ` +for instructions on how to build DPDK. diff --git a/src/spdk/dpdk/doc/guides/nics/ipn3ke.rst b/src/spdk/dpdk/doc/guides/nics/ipn3ke.rst new file mode 100644 index 000000000..c6c1552f6 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/ipn3ke.rst @@ -0,0 +1,107 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019 Intel Corporation. + +IPN3KE Poll Mode Driver +======================= + +The ipn3ke PMD (librte_pmd_ipn3ke) provides poll mode driver support +for Intel® FPGA PAC(Programmable Acceleration Card) N3000 based on +the Intel Ethernet Controller X710/XXV710 and Intel Arria 10 FPGA. + +In this card, FPGA is an acceleration bridge between network interface +and the Intel Ethernet Controller. Although both FPGA and Ethernet +Controllers are connected to CPU with PCIe Gen3x16 Switch, all the +packet RX/TX is handled by Intel Ethernet Controller. So from application +point of view the data path is still the legacy Intel Ethernet Controller +X710/XXV710 PMD. Besides this, users can enable more acceleration +features by FPGA IP. + +Prerequisites +------------- + +- Identifying your adapter using `Intel Support + `_ and get the latest NVM/FW images. + +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. + +- To get better performance on Intel platforms, please follow the "How to get best performance with NICs on Intel platforms" + section of the :ref:`Getting Started Guide for Linux `. + + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_IPN3KE_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_ipn3ke`` driver. + +Runtime Config Options +~~~~~~~~~~~~~~~~~~~~~~ + +- ``AFU name`` + + AFU name identifies which AFU is used by IPN3KE. The AFU name format is "Port|BDF", + Each FPGA can be divided into four blocks at most. "Port" identifies which FPGA block + the AFU bitstream belongs to, but currently only 0 IPN3KE support. "BDF" means FPGA PCIe BDF. + For example:: + + --vdev 'ipn3ke_cfg0,afu=0|b3:00.0' + +- ``FPGA Acceleration list`` + + For IPN3KE FPGA can provide different bitstream, different bitstream includes different + Acceleration, so users need to identify which Acceleration is used. Current IPN3KE can + support TM and Flow Acceleration, for example:: + + --vdev 'ipn3ke_cfg0,afu=0|b3:00.0,fpga_acc={tm|flow}' + +- ``I40e PF name list`` + + Users need to bind FPGA LineSidePort to FVL PF. So I40e PF name list should be involved in + startup command. For example:: + + --vdev 'ipn3ke_cfg0,afu=0|b3:00.0,fpga_acc={tm|flow},i40e_pf={0000:b1:00.0|0000:b1:00.1|0000:b1:00.2|0000:b1:00.3|0000:b5:00.0|0000:b5:00.1|0000:b5:00.2|0000:b5:00.3}' + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Sample Application Notes +------------------------ + +Packet TX/RX with FPGA Pass-through image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +FPGA Pass-through bitstream is original FPGA Image. + +To start ``testpmd``, and add I40e PF to FPGA network port: + +.. code-block:: console + + ./app/testpmd -l 0-15 -n 4 --vdev 'ifpga_rawdev_cfg0,ifpga=b3:00.0,port=0' --vdev 'ipn3ke_cfg0,afu=0|b3:00.0,i40e_pf={0000:b1:00.0|0000:b1:00.1|0000:b1:00.2|0000:b1:00.3|0000:b5:00.0|0000:b5:00.1|0000:b5:00.2|0000:b5:00.3}' -- -i --no-numa --port-topology=loop + +HQoS and flow acceleration +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +HQoS and flow acceleration bitstream is used to offloading HQoS and flow classifier. + +To start ``testpmd``, and add I40e PF to FPGA network port, enable FPGA HQoS and Flow Acceleration: + +.. code-block:: console + + ./app/testpmd -l 0-15 -n 4 --vdev 'ifpga_rawdev_cfg0,ifpga=b3:00.0,port=0' --vdev 'ipn3ke_cfg0,afu=0|b3:00.0,fpga_acc={tm|flow},i40e_pf={0000:b1:00.0|0000:b1:00.1|0000:b1:00.2|0000:b1:00.3|0000:b5:00.0|0000:b5:00.1|0000:b5:00.2|0000:b5:00.3}' -- -i --no-numa --forward-mode=macswap + +Limitations or Known issues +--------------------------- + +19.05 limitation +~~~~~~~~~~~~~~~~ + +Ipn3ke code released in 19.05 is for evaluation only. diff --git a/src/spdk/dpdk/doc/guides/nics/ixgbe.rst b/src/spdk/dpdk/doc/guides/nics/ixgbe.rst new file mode 100644 index 000000000..1f424b38a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/ixgbe.rst @@ -0,0 +1,314 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2016 Intel Corporation. + +IXGBE Driver +============ + +Vector PMD for IXGBE +-------------------- + +Vector PMD uses Intel® SIMD instructions to optimize packet I/O. +It improves load/store bandwidth efficiency of L1 data cache by using a wider SSE/AVX register 1 (1). +The wider register gives space to hold multiple packet buffers so as to save instruction number when processing bulk of packets. + +There is no change to PMD API. The RX/TX handler are the only two entries for vPMD packet I/O. +They are transparently registered at runtime RX/TX execution if all condition checks pass. + +1. To date, only an SSE version of IX GBE vPMD is available. + +Some constraints apply as pre-conditions for specific optimizations on bulk packet transfers. +The following sections explain RX and TX constraints in the vPMD. + +RX Constraints +~~~~~~~~~~~~~~ + +Prerequisites and Pre-conditions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following prerequisites apply: + +* To enable vPMD to work for RX, bulk allocation for Rx must be allowed. + +Ensure that the following pre-conditions are satisfied: + +* rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST + +* rxq->rx_free_thresh < rxq->nb_rx_desc + +* (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0 + +* rxq->nb_rx_desc < (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST) + +These conditions are checked in the code. + +Scattered packets are not supported in this mode. +If an incoming packet is greater than the maximum acceptable length of one "mbuf" data size (by default, the size is 2 KB), +vPMD for RX would be disabled. + +By default, IXGBE_MAX_RING_DESC is set to 4096 and RTE_PMD_IXGBE_RX_MAX_BURST is set to 32. + +Feature not Supported by RX Vector PMD +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some features are not supported when trying to increase the throughput in vPMD. +They are: + +* IEEE1588 + +* FDIR + +* Header split + +* RX checksum off load + +Other features are supported using optional MACRO configuration. They include: + +* HW VLAN strip + +* HW extend dual VLAN + +To guarantee the constraint, capabilities in dev_conf.rxmode.offloads will be checked: + +* DEV_RX_OFFLOAD_VLAN_STRIP + +* DEV_RX_OFFLOAD_VLAN_EXTEND + +* DEV_RX_OFFLOAD_CHECKSUM + +* DEV_RX_OFFLOAD_HEADER_SPLIT + +* dev_conf + +fdir_conf->mode will also be checked. + +VF Runtime Options +^^^^^^^^^^^^^^^^^^ + +The following ``devargs`` options can be enabled at runtime. They must +be passed as part of EAL arguments. For example, + +.. code-block:: console + + testpmd -w af:10.0,pflink_fullchk=1 -- -i + +- ``pflink_fullchk`` (default **0**) + + When calling ``rte_eth_link_get_nowait()`` to get VF link status, + this option is used to control how VF synchronizes its status with + PF's. If set, VF will not only check the PF's physical link status + by reading related register, but also check the mailbox status. We + call this behavior as fully checking. And checking mailbox will + trigger PF's mailbox interrupt generation. If unset, the application + can get the VF's link status quickly by just reading the PF's link + status register, this will avoid the whole system's mailbox interrupt + generation. + + ``rte_eth_link_get()`` will still use the mailbox method regardless + of the pflink_fullchk setting. + +RX Burst Size +^^^^^^^^^^^^^ + +As vPMD is focused on high throughput, it assumes that the RX burst size is equal to or greater than 32 per burst. +It returns zero if using nb_pkt < 32 as the expected packet number in the receive handler. + +TX Constraint +~~~~~~~~~~~~~ + +Prerequisite +^^^^^^^^^^^^ + +The only prerequisite is related to tx_rs_thresh. +The tx_rs_thresh value must be greater than or equal to RTE_PMD_IXGBE_TX_MAX_BURST, +but less or equal to RTE_IXGBE_TX_MAX_FREE_BUF_SZ. +Consequently, by default the tx_rs_thresh value is in the range 32 to 64. + +Feature not Supported by TX Vector PMD +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +TX vPMD only works when offloads is set to 0 + +This means that it does not support any TX offload. + +Application Programming Interface +--------------------------------- + +In DPDK release v16.11 an API for ixgbe specific functions has been added to the ixgbe PMD. +The declarations for the API functions are in the header ``rte_pmd_ixgbe.h``. + +Sample Application Notes +------------------------ + +l3fwd +~~~~~ + +When running l3fwd with vPMD, there is one thing to note. +In the configuration, ensure that DEV_RX_OFFLOAD_CHECKSUM in port_conf.rxmode.offloads is NOT set. +Otherwise, by default, RX vPMD is disabled. + +load_balancer +~~~~~~~~~~~~~ + +As in the case of l3fwd, to enable vPMD, do NOT set DEV_RX_OFFLOAD_CHECKSUM in port_conf.rxmode.offloads. +In addition, for improved performance, use -bsz "(32,32),(64,64),(32,32)" in load_balancer to avoid using the default burst size of 144. + + +Limitations or Known issues +--------------------------- + +Malicious Driver Detection not Supported +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Intel x550 series NICs support a feature called MDD (Malicious +Driver Detection) which checks the behavior of the VF driver. +If this feature is enabled, the VF must use the advanced context descriptor +correctly and set the CC (Check Context) bit. +DPDK PF doesn't support MDD, but kernel PF does. We may hit problem in this +scenario kernel PF + DPDK VF. If user enables MDD in kernel PF, DPDK VF will +not work. Because kernel PF thinks the VF is malicious. But actually it's not. +The only reason is the VF doesn't act as MDD required. +There's significant performance impact to support MDD. DPDK should check if +the advanced context descriptor should be set and set it. And DPDK has to ask +the info about the header length from the upper layer, because parsing the +packet itself is not acceptable. So, it's too expensive to support MDD. +When using kernel PF + DPDK VF on x550, please make sure to use a kernel +PF driver that disables MDD or can disable MDD. + +Some kernel drivers already disable MDD by default while some kernels can use +the command ``insmod ixgbe.ko MDD=0,0`` to disable MDD. Each "0" in the +command refers to a port. For example, if there are 6 ixgbe ports, the command +should be changed to ``insmod ixgbe.ko MDD=0,0,0,0,0,0``. + + +Statistics +~~~~~~~~~~ + +The statistics of ixgbe hardware must be polled regularly in order for it to +remain consistent. Running a DPDK application without polling the statistics will +cause registers on hardware to count to the maximum value, and "stick" at +that value. + +In order to avoid statistic registers every reaching the maximum value, +read the statistics from the hardware using ``rte_eth_stats_get()`` or +``rte_eth_xstats_get()``. + +The maximum time between statistics polls that ensures consistent results can +be calculated as follows: + +.. code-block:: c + + max_read_interval = UINT_MAX / max_packets_per_second + max_read_interval = 4294967295 / 14880952 + max_read_interval = 288.6218096127183 (seconds) + max_read_interval = ~4 mins 48 sec. + +In order to ensure valid results, it is recommended to poll every 4 minutes. + +MTU setting +~~~~~~~~~~~ + +Although the user can set the MTU separately on PF and VF ports, the ixgbe NIC +only supports one global MTU per physical port. +So when the user sets different MTUs on PF and VF ports in one physical port, +the real MTU for all these PF and VF ports is the largest value set. +This behavior is based on the kernel driver behavior. + +VF MAC address setting +~~~~~~~~~~~~~~~~~~~~~~ + +On ixgbe, the concept of "pool" can be used for different things depending on +the mode. In VMDq mode, "pool" means a VMDq pool. In IOV mode, "pool" means a +VF. + +There is no RTE API to add a VF's MAC address from the PF. On ixgbe, the +``rte_eth_dev_mac_addr_add()`` function can be used to add a VF's MAC address, +as a workaround. + +X550 does not support legacy interrupt mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Description +^^^^^^^^^^^ +X550 cannot get interrupts if using ``uio_pci_generic`` module or using legacy +interrupt mode of ``igb_uio`` or ``vfio``. Because the errata of X550 states +that the Interrupt Status bit is not implemented. The errata is the item #22 +from `X550 spec update `_ + +Implication +^^^^^^^^^^^ +When using ``uio_pci_generic`` module or using legacy interrupt mode of +``igb_uio`` or ``vfio``, the Interrupt Status bit would be checked if the +interrupt is coming. Since the bit is not implemented in X550, the irq cannot +be handled correctly and cannot report the event fd to DPDK apps. Then apps +cannot get interrupts and ``dmesg`` will show messages like ``irq #No.: `` +``nobody cared.`` + +Workaround +^^^^^^^^^^ +Do not bind the ``uio_pci_generic`` module in X550 NICs. +Do not bind ``igb_uio`` with legacy mode in X550 NICs. +Before binding ``vfio`` with legacy mode in X550 NICs, use ``modprobe vfio `` +``nointxmask=1`` to load ``vfio`` module if the intx is not shared with other +devices. + +Inline crypto processing support +-------------------------------- + +Inline IPsec processing is supported for ``RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO`` +mode for ESP packets only: + +- ESP authentication only: AES-128-GMAC (128-bit key) +- ESP encryption and authentication: AES-128-GCM (128-bit key) + +IPsec Security Gateway Sample Application supports inline IPsec processing for +ixgbe PMD. + +For more details see the IPsec Security Gateway Sample Application and Security +library documentation. + + +Virtual Function Port Representors +---------------------------------- +The IXGBE PF PMD supports the creation of VF port representors for the control +and monitoring of IXGBE virtual function devices. Each port representor +corresponds to a single virtual function of that device. Using the ``devargs`` +option ``representor`` the user can specify which virtual functions to create +port representors for on initialization of the PF PMD by passing the VF IDs of +the VFs which are required.:: + + -w DBDF,representor=[0,1,4] + +Currently hot-plugging of representor ports is not supported so all required +representors must be specified on the creation of the PF. + +Supported Chipsets and NICs +--------------------------- + +- Intel 82599EB 10 Gigabit Ethernet Controller +- Intel 82598EB 10 Gigabit Ethernet Controller +- Intel 82599ES 10 Gigabit Ethernet Controller +- Intel 82599EN 10 Gigabit Ethernet Controller +- Intel Ethernet Controller X540-AT2 +- Intel Ethernet Controller X550-BT2 +- Intel Ethernet Controller X550-AT2 +- Intel Ethernet Controller X550-AT +- Intel Ethernet Converged Network Adapter X520-SR1 +- Intel Ethernet Converged Network Adapter X520-SR2 +- Intel Ethernet Converged Network Adapter X520-LR1 +- Intel Ethernet Converged Network Adapter X520-DA1 +- Intel Ethernet Converged Network Adapter X520-DA2 +- Intel Ethernet Converged Network Adapter X520-DA4 +- Intel Ethernet Converged Network Adapter X520-QDA1 +- Intel Ethernet Converged Network Adapter X520-T2 +- Intel 10 Gigabit AF DA Dual Port Server Adapter +- Intel 10 Gigabit AT Server Adapter +- Intel 10 Gigabit AT2 Server Adapter +- Intel 10 Gigabit CX4 Dual Port Server Adapter +- Intel 10 Gigabit XF LR Server Adapter +- Intel 10 Gigabit XF SR Dual Port Server Adapter +- Intel 10 Gigabit XF SR Server Adapter +- Intel Ethernet Converged Network Adapter X540-T1 +- Intel Ethernet Converged Network Adapter X540-T2 +- Intel Ethernet Converged Network Adapter X550-T1 +- Intel Ethernet Converged Network Adapter X550-T2 diff --git a/src/spdk/dpdk/doc/guides/nics/kni.rst b/src/spdk/dpdk/doc/guides/nics/kni.rst new file mode 100644 index 000000000..90d3040cf --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/kni.rst @@ -0,0 +1,170 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Intel Corporation. + +KNI Poll Mode Driver +====================== + +KNI PMD is wrapper to the :ref:`librte_kni ` library. + +This PMD enables using KNI without having a KNI specific application, +any forwarding application can use PMD interface for KNI. + +Sending packets to any DPDK controlled interface or sending to the +Linux networking stack will be transparent to the DPDK application. + +To create a KNI device ``net_kni#`` device name should be used, and this +will create ``kni#`` Linux virtual network interface. + +There is no physical device backend for the virtual KNI device. + +Packets sent to the KNI Linux interface will be received by the DPDK +application, and DPDK application may forward packets to a physical NIC +or to a virtual device (like another KNI interface or PCAP interface). + +To forward any traffic from physical NIC to the Linux networking stack, +an application should control a physical port and create one virtual KNI port, +and forward between two. + +Using this PMD requires KNI kernel module be inserted. + + +Usage +----- + +EAL ``--vdev`` argument can be used to create KNI device instance, like:: + + testpmd --vdev=net_kni0 --vdev=net_kn1 -- -i + +Above command will create ``kni0`` and ``kni1`` Linux network interfaces, +those interfaces can be controlled by standard Linux tools. + +When testpmd forwarding starts, any packets sent to ``kni0`` interface +forwarded to the ``kni1`` interface and vice versa. + +There is no hard limit on number of interfaces that can be created. + + +Default interface configuration +------------------------------- + +``librte_kni`` can create Linux network interfaces with different features, +feature set controlled by a configuration struct, and KNI PMD uses a fixed +configuration: + + .. code-block:: console + + Interface name: kni# + force bind kernel thread to a core : NO + mbuf size: (rte_pktmbuf_data_room_size(pktmbuf_pool) - RTE_PKTMBUF_HEADROOM) + mtu: (conf.mbuf_size - RTE_ETHER_HDR_LEN) + +KNI control path is not supported with the PMD, since there is no physical +backend device by default. + + +PMD arguments +------------- + +``no_request_thread``, by default PMD creates a pthread for each KNI interface +to handle Linux network interface control commands, like ``ifconfig kni0 up`` + +With ``no_request_thread`` option, pthread is not created and control commands +not handled by PMD. + +By default request thread is enabled. And this argument should not be used +most of the time, unless this PMD used with customized DPDK application to handle +requests itself. + +Argument usage:: + + testpmd --vdev "net_kni0,no_request_thread=1" -- -i + + +PMD log messages +---------------- + +If KNI kernel module (rte_kni.ko) not inserted, following error log printed:: + + "KNI: KNI subsystem has not been initialized. Invoke rte_kni_init() first" + + +PMD testing +----------- + +It is possible to test PMD quickly using KNI kernel module loopback feature: + +* Insert KNI kernel module with loopback support: + + .. code-block:: console + + insmod build/kmod/rte_kni.ko lo_mode=lo_mode_fifo_skb + +* Start testpmd with no physical device but two KNI virtual devices: + + .. code-block:: console + + ./testpmd --vdev net_kni0 --vdev net_kni1 -- -i + + .. code-block:: console + + ... + Configuring Port 0 (socket 0) + KNI: pci: 00:00:00 c580:b8 + Port 0: 1A:4A:5B:7C:A2:8C + Configuring Port 1 (socket 0) + KNI: pci: 00:00:00 600:b9 + Port 1: AE:95:21:07:93:DD + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> + +* Observe Linux interfaces + + .. code-block:: console + + $ ifconfig kni0 && ifconfig kni1 + kni0: flags=4098 mtu 1500 + ether ae:8e:79:8e:9b:c8 txqueuelen 1000 (Ethernet) + RX packets 0 bytes 0 (0.0 B) + RX errors 0 dropped 0 overruns 0 frame 0 + TX packets 0 bytes 0 (0.0 B) + TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 + + kni1: flags=4098 mtu 1500 + ether 9e:76:43:53:3e:9b txqueuelen 1000 (Ethernet) + RX packets 0 bytes 0 (0.0 B) + RX errors 0 dropped 0 overruns 0 frame 0 + TX packets 0 bytes 0 (0.0 B) + TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 + + +* Start forwarding with tx_first: + + .. code-block:: console + + testpmd> start tx_first + +* Quit and check forwarding stats: + + .. code-block:: console + + testpmd> quit + Telling cores to stop... + Waiting for lcores to finish... + + ---------------------- Forward statistics for port 0 ---------------------- + RX-packets: 35637905 RX-dropped: 0 RX-total: 35637905 + TX-packets: 35637947 TX-dropped: 0 TX-total: 35637947 + ---------------------------------------------------------------------------- + + ---------------------- Forward statistics for port 1 ---------------------- + RX-packets: 35637915 RX-dropped: 0 RX-total: 35637915 + TX-packets: 35637937 TX-dropped: 0 TX-total: 35637937 + ---------------------------------------------------------------------------- + + +++++++++++++++ Accumulated forward statistics for all ports+++++++++++++++ + RX-packets: 71275820 RX-dropped: 0 RX-total: 71275820 + TX-packets: 71275884 TX-dropped: 0 TX-total: 71275884 + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/src/spdk/dpdk/doc/guides/nics/liquidio.rst b/src/spdk/dpdk/doc/guides/nics/liquidio.rst new file mode 100644 index 000000000..053414672 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/liquidio.rst @@ -0,0 +1,196 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Cavium, Inc + +LiquidIO VF Poll Mode Driver +============================ + +The LiquidIO VF PMD library (librte_pmd_lio) provides poll mode driver support for +Cavium LiquidIO® II server adapter VFs. PF management and VF creation can be +done using kernel driver. + +More information can be found at `Cavium Official Website +`_. + +Supported LiquidIO Adapters +----------------------------- + +- LiquidIO II CN2350 210SV/225SV +- LiquidIO II CN2350 210SVPT +- LiquidIO II CN2360 210SV/225SV +- LiquidIO II CN2360 210SVPT + + +Pre-Installation Configuration +------------------------------ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_LIO_PMD`` (default ``y``) + + Toggle compilation of LiquidIO PMD. + +- ``CONFIG_RTE_LIBRTE_LIO_DEBUG_RX`` (default ``n``) + + Toggle display of receive fast path run-time messages. + +- ``CONFIG_RTE_LIBRTE_LIO_DEBUG_TX`` (default ``n``) + + Toggle display of transmit fast path run-time messages. + +- ``CONFIG_RTE_LIBRTE_LIO_DEBUG_MBOX`` (default ``n``) + + Toggle display of mailbox messages. + +- ``CONFIG_RTE_LIBRTE_LIO_DEBUG_REGS`` (default ``n``) + + Toggle display of register reads and writes. + + +SR-IOV: Prerequisites and Sample Application Notes +-------------------------------------------------- + +This section provides instructions to configure SR-IOV with Linux OS. + +#. Verify SR-IOV and ARI capabilities are enabled on the adapter using ``lspci``: + + .. code-block:: console + + lspci -s -vvv + + Example output: + + .. code-block:: console + + [...] + Capabilities: [148 v1] Alternative Routing-ID Interpretation (ARI) + [...] + Capabilities: [178 v1] Single Root I/O Virtualization (SR-IOV) + [...] + Kernel driver in use: LiquidIO + +#. Load the kernel module: + + .. code-block:: console + + modprobe liquidio + +#. Bring up the PF ports: + + .. code-block:: console + + ifconfig p4p1 up + ifconfig p4p2 up + +#. Change PF MTU if required: + + .. code-block:: console + + ifconfig p4p1 mtu 9000 + ifconfig p4p2 mtu 9000 + +#. Create VF device(s): + + Echo number of VFs to be created into ``"sriov_numvfs"`` sysfs entry + of the parent PF. + + .. code-block:: console + + echo 1 > /sys/bus/pci/devices/0000:03:00.0/sriov_numvfs + echo 1 > /sys/bus/pci/devices/0000:03:00.1/sriov_numvfs + +#. Assign VF MAC address: + + Assign MAC address to the VF using iproute2 utility. The syntax is:: + + ip link set vf mac + + Example output: + + .. code-block:: console + + ip link set p4p1 vf 0 mac F2:A8:1B:5E:B4:66 + +#. Assign VF(s) to VM. + + The VF devices may be passed through to the guest VM using qemu or + virt-manager or virsh etc. + + Example qemu guest launch command: + + .. code-block:: console + + ./qemu-system-x86_64 -name lio-vm -machine accel=kvm \ + -cpu host -m 4096 -smp 4 \ + -drive file=,if=none,id=disk1,format= \ + -device virtio-blk-pci,scsi=off,drive=disk1,id=virtio-disk1,bootindex=1 \ + -device vfio-pci,host=03:00.3 -device vfio-pci,host=03:08.3 + +#. Running testpmd + + Refer to the document + :ref:`compiling and testing a PMD for a NIC ` to run + ``testpmd`` application. + + .. note:: + + Use ``igb_uio`` instead of ``vfio-pci`` in VM. + + Example output: + + .. code-block:: console + + [...] + EAL: PCI device 0000:03:00.3 on NUMA socket 0 + EAL: probe driver: 177d:9712 net_liovf + EAL: using IOMMU type 1 (Type 1) + PMD: net_liovf[03:00.3]INFO: DEVICE : CN23XX VF + EAL: PCI device 0000:03:08.3 on NUMA socket 0 + EAL: probe driver: 177d:9712 net_liovf + PMD: net_liovf[03:08.3]INFO: DEVICE : CN23XX VF + Interactive-mode selected + USER1: create a new mbuf pool : n=171456, size=2176, socket=0 + Configuring Port 0 (socket 0) + PMD: net_liovf[03:00.3]INFO: Starting port 0 + Port 0: F2:A8:1B:5E:B4:66 + Configuring Port 1 (socket 0) + PMD: net_liovf[03:08.3]INFO: Starting port 1 + Port 1: 32:76:CC:EE:56:D7 + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> + +#. Enabling VF promiscuous mode + + One VF per PF can be marked as trusted for promiscuous mode. + + .. code-block:: console + + ip link set dev vf trust on + + +Limitations +----------- + +VF MTU +~~~~~~ + +VF MTU is limited by PF MTU. Raise PF value before configuring VF for larger packet size. + +VLAN offload +~~~~~~~~~~~~ + +Tx VLAN insertion is not supported and consequently VLAN offload feature is +marked partial. + +Ring size +~~~~~~~~~ + +Number of descriptors for Rx/Tx ring should be in the range 128 to 512. + +CRC stripping +~~~~~~~~~~~~~ + +LiquidIO adapters strip ethernet FCS of every packet coming to the host interface. diff --git a/src/spdk/dpdk/doc/guides/nics/memif.rst b/src/spdk/dpdk/doc/guides/nics/memif.rst new file mode 100644 index 000000000..08a9fda86 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/memif.rst @@ -0,0 +1,292 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018-2019 Cisco Systems, Inc. + +====================== +Memif Poll Mode Driver +====================== + +Shared memory packet interface (memif) PMD allows for DPDK and any other client +using memif (DPDK, VPP, libmemif) to communicate using shared memory. Memif is +Linux only. + +The created device transmits packets in a raw format. It can be used with +Ethernet mode, IP mode, or Punt/Inject. At this moment, only Ethernet mode is +supported in DPDK memif implementation. + +Memif works in two roles: master and slave. Slave connects to master over an +existing socket. It is also a producer of shared memory file and initializes +the shared memory. Each interface can be connected to one peer interface +at same time. The peer interface is identified by id parameter. Master +creates the socket and listens for any slave connection requests. The socket +may already exist on the system. Be sure to remove any such sockets, if you +are creating a master interface, or you will see an "Address already in use" +error. Function ``rte_pmd_memif_remove()``, which removes memif interface, +will also remove a listener socket, if it is not being used by any other +interface. + +The method to enable one or more interfaces is to use the +``--vdev=net_memif0`` option on the DPDK application command line. Each +``--vdev=net_memif1`` option given will create an interface named net_memif0, +net_memif1, and so on. Memif uses unix domain socket to transmit control +messages. Each memif has a unique id per socket. This id is used to identify +peer interface. If you are connecting multiple +interfaces using same socket, be sure to specify unique ids ``id=0``, ``id=1``, +etc. Note that if you assign a socket to a master interface it becomes a +listener socket. Listener socket can not be used by a slave interface on same +client. + +.. csv-table:: **Memif configuration options** + :header: "Option", "Description", "Default", "Valid value" + + "id=0", "Used to identify peer interface", "0", "uint32_t" + "role=master", "Set memif role", "slave", "master|slave" + "bsize=1024", "Size of single packet buffer", "2048", "uint16_t" + "rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", "10", "1-14" + "socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 108" + "mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", "" + "secret=abc123", "Secret is an optional security option, which if specified, must be matched by peer", "", "string len 24" + "zero-copy=yes", "Enable/disable zero-copy slave mode. Only relevant to slave, requires '--single-file-segments' eal argument", "no", "yes|no" + +**Connection establishment** + +In order to create memif connection, two memif interfaces, each in separate +process, are needed. One interface in ``master`` role and other in +``slave`` role. It is not possible to connect two interfaces in a single +process. Each interface can be connected to one interface at same time, +identified by matching id parameter. + +Memif driver uses unix domain socket to exchange required information between +memif interfaces. Socket file path is specified at interface creation see +*Memif configuration options* table above. If socket is used by ``master`` +interface, it's marked as listener socket (in scope of current process) and +listens to connection requests from other processes. One socket can be used by +multiple interfaces. One process can have ``slave`` and ``master`` interfaces +at the same time, provided each role is assigned unique socket. + +For detailed information on memif control messages, see: net/memif/memif.h. + +Slave interface attempts to make a connection on assigned socket. Process +listening on this socket will extract the connection request and create a new +connected socket (control channel). Then it sends the 'hello' message +(``MEMIF_MSG_TYPE_HELLO``), containing configuration boundaries. Slave interface +adjusts its configuration accordingly, and sends 'init' message +(``MEMIF_MSG_TYPE_INIT``). This message among others contains interface id. Driver +uses this id to find master interface, and assigns the control channel to this +interface. If such interface is found, 'ack' message (``MEMIF_MSG_TYPE_ACK``) is +sent. Slave interface sends 'add region' message (``MEMIF_MSG_TYPE_ADD_REGION``) for +every region allocated. Master responds to each of these messages with 'ack' +message. Same behavior applies to rings. Slave sends 'add ring' message +(``MEMIF_MSG_TYPE_ADD_RING``) for every initialized ring. Master again responds to +each message with 'ack' message. To finalize the connection, slave interface +sends 'connect' message (``MEMIF_MSG_TYPE_CONNECT``). Upon receiving this message +master maps regions to its address space, initializes rings and responds with +'connected' message (``MEMIF_MSG_TYPE_CONNECTED``). Disconnect +(``MEMIF_MSG_TYPE_DISCONNECT``) can be sent by both master and slave interfaces at +any time, due to driver error or if the interface is being deleted. + +Files + +- net/memif/memif.h *- control messages definitions* +- net/memif/memif_socket.h +- net/memif/memif_socket.c + +Shared memory +~~~~~~~~~~~~~ + +**Shared memory format** + +Slave is producer and master is consumer. Memory regions, are mapped shared memory files, +created by memif slave and provided to master at connection establishment. +Regions contain rings and buffers. Rings and buffers can also be separated into multiple +regions. For no-zero-copy, rings and buffers are stored inside single memory +region to reduce the number of opened files. + +region n (no-zero-copy): + ++-----------------------+-------------------------------------------------------------------------+ +| Rings | Buffers | ++-----------+-----------+-----------------+---+---------------------------------------------------+ +| S2M rings | M2S rings | packet buffer 0 | . | pb ((1 << pmd->run.log2_ring_size)*(s2m + m2s))-1 | ++-----------+-----------+-----------------+---+---------------------------------------------------+ + +S2M OR M2S Rings: + ++--------+--------+-----------------------+ +| ring 0 | ring 1 | ring num_s2m_rings - 1| ++--------+--------+-----------------------+ + +ring 0: + ++-------------+---------------------------------------+ +| ring header | (1 << pmd->run.log2_ring_size) * desc | ++-------------+---------------------------------------+ + +Descriptors are assigned packet buffers in order of rings creation. If we have one ring +in each direction and ring size is 1024, then first 1024 buffers will belong to S2M ring and +last 1024 will belong to M2S ring. In case of zero-copy, buffers are dequeued and +enqueued as needed. + +**Descriptor format** + ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +|Quad|6| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |3|3| | | | | | | | | | | | | | |1|1| | | | | | | | | | | | | | | | +| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +|Word|3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |2|1| | | | | | | | | | | | | | |6|5| | | | | | | | | | | | | | |0| ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +|0 |length |region |flags | ++----+---------------------------------------------------------------+-------------------------------+-------------------------------+ +|1 |metadata |offset | ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| |6| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |3|3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| |3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |2|1| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |0| ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +**Flags field - flags (Quad Word 0, bits 0:15)** + ++-----+--------------------+------------------------------------------------------------------------------------------------+ +|Bits |Name |Functionality | ++=====+====================+================================================================================================+ +|0 |MEMIF_DESC_FLAG_NEXT|Is chained buffer. When set, the packet is divided into multiple buffers. May not be contiguous.| ++-----+--------------------+------------------------------------------------------------------------------------------------+ + +**Region index - region (Quad Word 0, 16:31)** + +Index of memory region, the buffer is located in. + +**Data length - length (Quad Word 0, 32:63)** + +Length of transmitted/received data. + +**Data Offset - offset (Quad Word 1, 0:31)** + +Data start offset from memory region address. *.regions[desc->region].addr + desc->offset* + +**Metadata - metadata (Quad Word 1, 32:63)** + +Buffer metadata. + +Files + +- net/memif/memif.h *- descriptor and ring definitions* +- net/memif/rte_eth_memif.c *- eth_memif_rx() eth_memif_tx()* + +Zero-copy slave +~~~~~~~~~~~~~~~ + +Zero-copy slave can be enabled with memif configuration option 'zero-copy=yes'. This option +is only relevant to slave and requires eal argument '--single-file-segments'. +This limitation is in place, because it is too expensive to identify memseg +for each packet buffer, resulting in worse performance than with zero-copy disabled. +With single file segments we can calculate offset from the beginning of the file +for each packet buffer. + +**Shared memory format** + +Region 0 is created by memif driver and contains rings. Slave interface exposes DPDK memory (memseg). +Instead of using memfd_create() to create new shared file, existing memsegs are used. +Master interface functions the same as with zero-copy disabled. + +region 0: + ++-----------------------+ +| Rings | ++-----------+-----------+ +| S2M rings | M2S rings | ++-----------+-----------+ + +region n: + ++-----------------+ +| Buffers | ++-----------------+ +|memseg | ++-----------------+ + +Buffers are dequeued and enqueued as needed. Offset descriptor field is calculated at tx. +Only single file segments mode (EAL option --single-file-segments) is supported, as calculating +offset from multiple segments is too expensive. + +Example: testpmd +---------------------------- +In this example we run two instances of testpmd application and transmit packets over memif. + +First create ``master`` interface:: + + #./build/app/testpmd -l 0-1 --proc-type=primary --file-prefix=pmd1 --vdev=net_memif,role=master -- -i + +Now create ``slave`` interface (master must be already running so the slave will connect):: + + #./build/app/testpmd -l 2-3 --proc-type=primary --file-prefix=pmd2 --vdev=net_memif -- -i + +You can also enable ``zero-copy`` on ``slave`` interface:: + + #./build/app/testpmd -l 2-3 --proc-type=primary --file-prefix=pmd2 --vdev=net_memif,zero-copy=yes --single-file-segments -- -i + +Start forwarding packets:: + + Slave: + testpmd> start + + Master: + testpmd> start tx_first + +Show status:: + + testpmd> show port stats 0 + +For more details on testpmd please refer to :doc:`../testpmd_app_ug/index`. + +Example: testpmd and VPP +------------------------ +For information on how to get and run VPP please see ``_. + +Start VPP in interactive mode (should be by default). Create memif master interface in VPP:: + + vpp# create interface memif id 0 master no-zero-copy + vpp# set interface state memif0/0 up + vpp# set interface ip address memif0/0 192.168.1.1/24 + +To see socket filename use show memif command:: + + vpp# show memif + sockets + id listener filename + 0 yes (1) /run/vpp/memif.sock + ... + +Now create memif interface by running testpmd with these command line options:: + + #./testpmd --vdev=net_memif,socket=/run/vpp/memif.sock -- -i + +Testpmd should now create memif slave interface and try to connect to master. +In testpmd set forward option to icmpecho and start forwarding:: + + testpmd> set fwd icmpecho + testpmd> start + +Send ping from VPP:: + + vpp# ping 192.168.1.2 + 64 bytes from 192.168.1.2: icmp_seq=2 ttl=254 time=36.2918 ms + 64 bytes from 192.168.1.2: icmp_seq=3 ttl=254 time=23.3927 ms + 64 bytes from 192.168.1.2: icmp_seq=4 ttl=254 time=24.2975 ms + 64 bytes from 192.168.1.2: icmp_seq=5 ttl=254 time=17.7049 ms + +Example: testpmd memif loopback +------------------------------- +In this example we will create 2 memif ports connected into loopback. +The situation is analogous to cross connecting 2 ports of the NIC by cable. + +To set the loopback, just use the same socket and id with different roles:: + + #./testpmd --vdev=net_memif0,role=master,id=0 --vdev=net_memif1,role=slave,id=0 -- -i + +Then start the communication:: + + testpmd> start tx_first + +Finally we can check port stats to see the traffic:: + + testpmd> show port stats all + testpmd> show port stats all diff --git a/src/spdk/dpdk/doc/guides/nics/mlx4.rst b/src/spdk/dpdk/doc/guides/nics/mlx4.rst new file mode 100644 index 000000000..1f1e2f6c7 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/mlx4.rst @@ -0,0 +1,493 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2012 6WIND S.A. + Copyright 2015 Mellanox Technologies, Ltd + +MLX4 poll mode driver library +============================= + +The MLX4 poll mode driver library (**librte_pmd_mlx4**) implements support +for **Mellanox ConnectX-3** and **Mellanox ConnectX-3 Pro** 10/40 Gbps adapters +as well as their virtual functions (VF) in SR-IOV context. + +Information and documentation about this family of adapters can be found on +the `Mellanox website `_. Help is also provided by +the `Mellanox community `_. + +There is also a `section dedicated to this poll mode driver +`_. + +.. note:: + + Due to external dependencies, this driver is disabled by default. It must + be enabled manually by setting ``CONFIG_RTE_LIBRTE_MLX4_PMD=y`` and + recompiling DPDK. + +Implementation details +---------------------- + +Most Mellanox ConnectX-3 devices provide two ports but expose a single PCI +bus address, thus unlike most drivers, librte_pmd_mlx4 registers itself as a +PCI driver that allocates one Ethernet device per detected port. + +For this reason, one cannot white/blacklist a single port without also +white/blacklisting the others on the same device. + +Besides its dependency on libibverbs (that implies libmlx4 and associated +kernel support), librte_pmd_mlx4 relies heavily on system calls for control +operations such as querying/updating the MTU and flow control parameters. + +For security reasons and robustness, this driver only deals with virtual +memory addresses. The way resources allocations are handled by the kernel +combined with hardware specifications that allow it to handle virtual memory +addresses directly ensure that DPDK applications cannot access random +physical memory (or memory that does not belong to the current process). + +This capability allows the PMD to coexist with kernel network interfaces +which remain functional, although they stop receiving unicast packets as +long as they share the same MAC address. + +The :ref:`flow_isolated_mode` is supported. + +Compiling librte_pmd_mlx4 causes DPDK to be linked against libibverbs. + +Configuration +------------- + +Compilation options +~~~~~~~~~~~~~~~~~~~ + +These options can be modified in the ``.config`` file. + +- ``CONFIG_RTE_LIBRTE_MLX4_PMD`` (default **n**) + + Toggle compilation of librte_pmd_mlx4 itself. + +- ``CONFIG_RTE_IBVERBS_LINK_DLOPEN`` (default **n**) + + Build PMD with additional code to make it loadable without hard + dependencies on **libibverbs** nor **libmlx4**, which may not be installed + on the target system. + + In this mode, their presence is still required for it to run properly, + however their absence won't prevent a DPDK application from starting (with + ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as + missing with ``ldd(1)``. + + It works by moving these dependencies to a purpose-built rdma-core "glue" + plug-in which must either be installed in a directory whose name is based + on ``CONFIG_RTE_EAL_PMD_PATH`` suffixed with ``-glue`` if set, or in a + standard location for the dynamic linker (e.g. ``/lib``) if left to the + default empty string (``""``). + + This option has no performance impact. + +- ``CONFIG_RTE_IBVERBS_LINK_STATIC`` (default **n**) + + Embed static flavor of the dependencies **libibverbs** and **libmlx4** + in the PMD shared library or the executable static binary. + +- ``CONFIG_RTE_LIBRTE_MLX4_DEBUG`` (default **n**) + + Toggle debugging code and stricter compilation flags. Enabling this option + adds additional run-time checks and debugging messages at the cost of + lower performance. + +This option is available in meson: + +- ``ibverbs_link`` can be ``static``, ``shared``, or ``dlopen``. + +Environment variables +~~~~~~~~~~~~~~~~~~~~~ + +- ``MLX4_GLUE_PATH`` + + A list of directories in which to search for the rdma-core "glue" plug-in, + separated by colons or semi-colons. + + Only matters when compiled with ``CONFIG_RTE_IBVERBS_LINK_DLOPEN`` + enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set, + since ``LD_LIBRARY_PATH`` has no effect in this case. + +Run-time configuration +~~~~~~~~~~~~~~~~~~~~~~ + +- librte_pmd_mlx4 brings kernel network interfaces up during initialization + because it is affected by their state. Forcing them down prevents packets + reception. + +- **ethtool** operations on related kernel interfaces also affect the PMD. + +- ``port`` parameter [int] + + This parameter provides a physical port to probe and can be specified multiple + times for additional ports. All ports are probed by default if left + unspecified. + +- ``mr_ext_memseg_en`` parameter [int] + + A nonzero value enables extending memseg when registering DMA memory. If + enabled, the number of entries in MR (Memory Region) lookup table on datapath + is minimized and it benefits performance. On the other hand, it worsens memory + utilization because registered memory is pinned by kernel driver. Even if a + page in the extended chunk is freed, that doesn't become reusable until the + entire memory is freed. + + Enabled by default. + +Kernel module parameters +~~~~~~~~~~~~~~~~~~~~~~~~ + +The **mlx4_core** kernel module has several parameters that affect the +behavior and/or the performance of librte_pmd_mlx4. Some of them are described +below. + +- **num_vfs** (integer or triplet, optionally prefixed by device address + strings) + + Create the given number of VFs on the specified devices. + +- **log_num_mgm_entry_size** (integer) + + Device-managed flow steering (DMFS) is required by DPDK applications. It is + enabled by using a negative value, the last four bits of which have a + special meaning. + + - **-1**: force device-managed flow steering (DMFS). + - **-7**: configure optimized steering mode to improve performance with the + following limitation: VLAN filtering is not supported with this mode. + This is the recommended mode in case VLAN filter is not needed. + +Limitations +----------- + +- For secondary process: + + - Forked secondary process not supported. + - External memory unregistered in EAL memseg list cannot be used for DMA + unless such memory has been registered by ``mlx4_mr_update_ext_mp()`` in + primary process and remapped to the same virtual address in secondary + process. If the external memory is registered by primary process but has + different virtual address in secondary process, unexpected error may happen. + +- CRC stripping is supported by default and always reported as "true". + The ability to enable/disable CRC stripping requires OFED version + 4.3-1.5.0.0 and above or rdma-core version v18 and above. + +- TSO (Transmit Segmentation Offload) is supported in OFED version + 4.4 and above. + +Prerequisites +------------- + +This driver relies on external libraries and kernel drivers for resources +allocations and initialization. The following dependencies are not part of +DPDK and must be installed separately: + +- **libibverbs** (provided by rdma-core package) + + User space verbs framework used by librte_pmd_mlx4. This library provides + a generic interface between the kernel and low-level user space drivers + such as libmlx4. + + It allows slow and privileged operations (context initialization, hardware + resources allocations) to be managed by the kernel and fast operations to + never leave user space. + +- **libmlx4** (provided by rdma-core package) + + Low-level user space driver library for Mellanox ConnectX-3 devices, + it is automatically loaded by libibverbs. + + This library basically implements send/receive calls to the hardware + queues. + +- **Kernel modules** + + They provide the kernel-side verbs API and low level device drivers that + manage actual hardware initialization and resources sharing with user + space processes. + + Unlike most other PMDs, these modules must remain loaded and bound to + their devices: + + - mlx4_core: hardware driver managing Mellanox ConnectX-3 devices. + - mlx4_en: Ethernet device driver that provides kernel network interfaces. + - mlx4_ib: InifiniBand device driver. + - ib_uverbs: user space driver for verbs (entry point for libibverbs). + +- **Firmware update** + + Mellanox OFED releases include firmware updates for ConnectX-3 adapters. + + Because each release provides new features, these updates must be applied to + match the kernel modules and libraries they come with. + +.. note:: + + Both libraries are BSD and GPL licensed. Linux kernel modules are GPL + licensed. + +Depending on system constraints and user preferences either RDMA core library +with a recent enough Linux kernel release (recommended) or Mellanox OFED, +which provides compatibility with older releases. + +Current RDMA core package and Linux kernel (recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Minimal Linux kernel version: 4.14. +- Minimal RDMA core version: v15 (see `RDMA core installation documentation`_). + +- Starting with rdma-core v21, static libraries can be built:: + + cd build + CFLAGS=-fPIC cmake -DIN_PLACE=1 -DENABLE_STATIC=1 -GNinja .. + ninja + +.. _`RDMA core installation documentation`: https://raw.githubusercontent.com/linux-rdma/rdma-core/master/README.md + +If rdma-core libraries are built but not installed, DPDK makefile can link them, +thanks to these environment variables: + + - ``EXTRA_CFLAGS=-I/path/to/rdma-core/build/include`` + - ``EXTRA_LDFLAGS=-L/path/to/rdma-core/build/lib`` + - ``PKG_CONFIG_PATH=/path/to/rdma-core/build/lib/pkgconfig`` + +.. _Mellanox_OFED_as_a_fallback: + +Mellanox OFED as a fallback +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- `Mellanox OFED`_ version: **4.4, 4.5, 4.6**. +- firmware version: **2.42.5000** and above. + +.. _`Mellanox OFED`: http://www.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers + +.. note:: + + Several versions of Mellanox OFED are available. Installing the version + this DPDK release was developed and tested against is strongly + recommended. Please check the `prerequisites`_. + +Installing Mellanox OFED +^^^^^^^^^^^^^^^^^^^^^^^^ + +1. Download latest Mellanox OFED. + +2. Install the required libraries and kernel modules either by installing + only the required set, or by installing the entire Mellanox OFED: + + For bare metal use:: + + ./mlnxofedinstall --dpdk --upstream-libs + + For SR-IOV hypervisors use:: + + ./mlnxofedinstall --dpdk --upstream-libs --enable-sriov --hypervisor + + For SR-IOV virtual machine use:: + + ./mlnxofedinstall --dpdk --upstream-libs --guest + +3. Verify the firmware is the correct one:: + + ibv_devinfo + +4. Set all ports links to Ethernet, follow instructions on the screen:: + + connectx_port_config + +5. Continue with :ref:`section 2 of the Quick Start Guide `. + +.. _qsg: + +Quick Start Guide +----------------- + +1. Set all ports links to Ethernet:: + + PCI= + echo eth > "/sys/bus/pci/devices/$PCI/mlx4_port0" + echo eth > "/sys/bus/pci/devices/$PCI/mlx4_port1" + + .. note:: + + If using Mellanox OFED one can permanently set the port link + to Ethernet using connectx_port_config tool provided by it. + :ref:`Mellanox_OFED_as_a_fallback`: + +.. _QSG_2: + +2. In case of bare metal or hypervisor, configure optimized steering mode + by adding the following line to ``/etc/modprobe.d/mlx4_core.conf``:: + + options mlx4_core log_num_mgm_entry_size=-7 + + .. note:: + + If VLAN filtering is used, set log_num_mgm_entry_size=-1. + Performance degradation can occur on this case. + +3. Restart the driver:: + + /etc/init.d/openibd restart + + or:: + + service openibd restart + +4. Compile DPDK and you are ready to go. See instructions on + :ref:`Development Kit Build System ` + +Performance tuning +------------------ + +1. Verify the optimized steering mode is configured:: + + cat /sys/module/mlx4_core/parameters/log_num_mgm_entry_size + +2. Use the CPU near local NUMA node to which the PCIe adapter is connected, + for better performance. For VMs, verify that the right CPU + and NUMA node are pinned according to the above. Run:: + + lstopo-no-graphics + + to identify the NUMA node to which the PCIe adapter is connected. + +3. If more than one adapter is used, and root complex capabilities allow + to put both adapters on the same NUMA node without PCI bandwidth degradation, + it is recommended to locate both adapters on the same NUMA node. + This in order to forward packets from one to the other without + NUMA performance penalty. + +4. Disable pause frames:: + + ethtool -A rx off tx off + +5. Verify IO non-posted prefetch is disabled by default. This can be checked + via the BIOS configuration. Please contact you server provider for more + information about the settings. + +.. note:: + + On some machines, depends on the machine integrator, it is beneficial + to set the PCI max read request parameter to 1K. This can be + done in the following way: + + To query the read request size use:: + + setpci -s 68.w + + If the output is different than 3XXX, set it by:: + + setpci -s 68.w=3XXX + + The XXX can be different on different systems. Make sure to configure + according to the setpci output. + +6. To minimize overhead of searching Memory Regions: + + - '--socket-mem' is recommended to pin memory by predictable amount. + - Configure per-lcore cache when creating Mempools for packet buffer. + - Refrain from dynamically allocating/freeing memory in run-time. + +Usage example +------------- + +This section demonstrates how to launch **testpmd** with Mellanox ConnectX-3 +devices managed by librte_pmd_mlx4. + +#. Load the kernel modules:: + + modprobe -a ib_uverbs mlx4_en mlx4_core mlx4_ib + + Alternatively if MLNX_OFED is fully installed, the following script can + be run:: + + /etc/init.d/openibd restart + + .. note:: + + User space I/O kernel modules (uio and igb_uio) are not used and do + not have to be loaded. + +#. Make sure Ethernet interfaces are in working order and linked to kernel + verbs. Related sysfs entries should be present:: + + ls -d /sys/class/net/*/device/infiniband_verbs/uverbs* | cut -d / -f 5 + + Example output:: + + eth2 + eth3 + eth4 + eth5 + +#. Optionally, retrieve their PCI bus addresses for whitelisting:: + + { + for intf in eth2 eth3 eth4 eth5; + do + (cd "/sys/class/net/${intf}/device/" && pwd -P); + done; + } | + sed -n 's,.*/\(.*\),-w \1,p' + + Example output:: + + -w 0000:83:00.0 + -w 0000:83:00.0 + -w 0000:84:00.0 + -w 0000:84:00.0 + + .. note:: + + There are only two distinct PCI bus addresses because the Mellanox + ConnectX-3 adapters installed on this system are dual port. + +#. Request huge pages:: + + echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages/nr_hugepages + +#. Start testpmd with basic parameters:: + + testpmd -l 8-15 -n 4 -w 0000:83:00.0 -w 0000:84:00.0 -- --rxq=2 --txq=2 -i + + Example output:: + + [...] + EAL: PCI device 0000:83:00.0 on NUMA socket 1 + EAL: probe driver: 15b3:1007 librte_pmd_mlx4 + PMD: librte_pmd_mlx4: PCI information matches, using device "mlx4_0" (VF: false) + PMD: librte_pmd_mlx4: 2 port(s) detected + PMD: librte_pmd_mlx4: port 1 MAC address is 00:02:c9:b5:b7:50 + PMD: librte_pmd_mlx4: port 2 MAC address is 00:02:c9:b5:b7:51 + EAL: PCI device 0000:84:00.0 on NUMA socket 1 + EAL: probe driver: 15b3:1007 librte_pmd_mlx4 + PMD: librte_pmd_mlx4: PCI information matches, using device "mlx4_1" (VF: false) + PMD: librte_pmd_mlx4: 2 port(s) detected + PMD: librte_pmd_mlx4: port 1 MAC address is 00:02:c9:b5:ba:b0 + PMD: librte_pmd_mlx4: port 2 MAC address is 00:02:c9:b5:ba:b1 + Interactive-mode selected + Configuring Port 0 (socket 0) + PMD: librte_pmd_mlx4: 0x867d60: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx4: 0x867d60: RX queues number update: 0 -> 2 + Port 0: 00:02:C9:B5:B7:50 + Configuring Port 1 (socket 0) + PMD: librte_pmd_mlx4: 0x867da0: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx4: 0x867da0: RX queues number update: 0 -> 2 + Port 1: 00:02:C9:B5:B7:51 + Configuring Port 2 (socket 0) + PMD: librte_pmd_mlx4: 0x867de0: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx4: 0x867de0: RX queues number update: 0 -> 2 + Port 2: 00:02:C9:B5:BA:B0 + Configuring Port 3 (socket 0) + PMD: librte_pmd_mlx4: 0x867e20: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx4: 0x867e20: RX queues number update: 0 -> 2 + Port 3: 00:02:C9:B5:BA:B1 + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 40000 Mbps - full-duplex + Port 2 Link Up - speed 10000 Mbps - full-duplex + Port 3 Link Up - speed 40000 Mbps - full-duplex + Done + testpmd> diff --git a/src/spdk/dpdk/doc/guides/nics/mlx5.rst b/src/spdk/dpdk/doc/guides/nics/mlx5.rst new file mode 100644 index 000000000..bb03df66a --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/mlx5.rst @@ -0,0 +1,1526 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2015 6WIND S.A. + Copyright 2015 Mellanox Technologies, Ltd + +.. include:: + +MLX5 poll mode driver +===================== + +The MLX5 poll mode driver library (**librte_pmd_mlx5**) provides support +for **Mellanox ConnectX-4**, **Mellanox ConnectX-4 Lx** , **Mellanox +ConnectX-5**, **Mellanox ConnectX-6**, **Mellanox ConnectX-6 Dx** and +**Mellanox BlueField** families of 10/25/40/50/100/200 Gb/s adapters +as well as their virtual functions (VF) in SR-IOV context. + +Information and documentation about these adapters can be found on the +`Mellanox website `__. Help is also provided by the +`Mellanox community `__. + +There is also a `section dedicated to this poll mode driver +`__. + +.. note:: + + Due to external dependencies, this driver is disabled in default configuration + of the "make" build. It can be enabled with ``CONFIG_RTE_LIBRTE_MLX5_PMD=y`` + or by using "meson" build system which will detect dependencies. + +Design +------ + +Besides its dependency on libibverbs (that implies libmlx5 and associated +kernel support), librte_pmd_mlx5 relies heavily on system calls for control +operations such as querying/updating the MTU and flow control parameters. + +For security reasons and robustness, this driver only deals with virtual +memory addresses. The way resources allocations are handled by the kernel, +combined with hardware specifications that allow to handle virtual memory +addresses directly, ensure that DPDK applications cannot access random +physical memory (or memory that does not belong to the current process). + +This capability allows the PMD to coexist with kernel network interfaces +which remain functional, although they stop receiving unicast packets as +long as they share the same MAC address. +This means legacy linux control tools (for example: ethtool, ifconfig and +more) can operate on the same network interfaces that owned by the DPDK +application. + +The PMD can use libibverbs and libmlx5 to access the device firmware +or directly the hardware components. +There are different levels of objects and bypassing abilities +to get the best performances: + +- Verbs is a complete high-level generic API +- Direct Verbs is a device-specific API +- DevX allows to access firmware objects +- Direct Rules manages flow steering at low-level hardware layer + +Enabling librte_pmd_mlx5 causes DPDK applications to be linked against +libibverbs. + +Features +-------- + +- Multi arch support: x86_64, POWER8, ARMv8, i686. +- Multiple TX and RX queues. +- Support for scattered TX and RX frames. +- IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues. +- RSS using different combinations of fields: L3 only, L4 only or both, + and source only, destination only or both. +- Several RSS hash keys, one for each flow type. +- Default RSS operation with no hash key specification. +- Configurable RETA table. +- Link flow control (pause frame). +- Support for multiple MAC addresses. +- VLAN filtering. +- RX VLAN stripping. +- TX VLAN insertion. +- RX CRC stripping configuration. +- Promiscuous mode on PF and VF. +- Multicast promiscuous mode on PF and VF. +- Hardware checksum offloads. +- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and + RTE_ETH_FDIR_REJECT). +- Flow API, including :ref:`flow_isolated_mode`. +- Multiple process. +- KVM and VMware ESX SR-IOV modes are supported. +- RSS hash result is supported. +- Hardware TSO for generic IP or UDP tunnel, including VXLAN and GRE. +- Hardware checksum Tx offload for generic IP or UDP tunnel, including VXLAN and GRE. +- RX interrupts. +- Statistics query including Basic, Extended and per queue. +- Rx HW timestamp. +- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE, MPLSoGRE, MPLSoUDP, IP-in-IP, Geneve, GTP. +- Tunnel HW offloads: packet type, inner/outer RSS, IP and UDP checksum verification. +- NIC HW offloads: encapsulation (vxlan, gre, mplsoudp, mplsogre), NAT, routing, TTL + increment/decrement, count, drop, mark. For details please see :ref:`mlx5_offloads_support`. +- Flow insertion rate of more then million flows per second, when using Direct Rules. +- Support for multiple rte_flow groups. +- Per packet no-inline hint flag to disable packet data copying into Tx descriptors. +- Hardware LRO. +- Hairpin. + +Limitations +----------- + +- For secondary process: + + - Forked secondary process not supported. + - External memory unregistered in EAL memseg list cannot be used for DMA + unless such memory has been registered by ``mlx5_mr_update_ext_mp()`` in + primary process and remapped to the same virtual address in secondary + process. If the external memory is registered by primary process but has + different virtual address in secondary process, unexpected error may happen. + +- When using Verbs flow engine (``dv_flow_en`` = 0), flow pattern without any + specific VLAN will match for VLAN packets as well: + + When VLAN spec is not specified in the pattern, the matching rule will be created with VLAN as a wild card. + Meaning, the flow rule:: + + flow create 0 ingress pattern eth / vlan vid is 3 / ipv4 / end ... + + Will only match vlan packets with vid=3. and the flow rule:: + + flow create 0 ingress pattern eth / ipv4 / end ... + + Will match any ipv4 packet (VLAN included). + +- When using DV flow engine (``dv_flow_en`` = 1), flow pattern without VLAN item + will match untagged packets only. + The flow rule:: + + flow create 0 ingress pattern eth / ipv4 / end ... + + Will match untagged packets only. + The flow rule:: + + flow create 0 ingress pattern eth / vlan / ipv4 / end ... + + Will match tagged packets only, with any VLAN ID value. + The flow rule:: + + flow create 0 ingress pattern eth / vlan vid is 3 / ipv4 / end ... + + Will only match tagged packets with VLAN ID 3. + +- VLAN pop offload command: + + - Flow rules having a VLAN pop offload command as one of their actions and + are lacking a match on VLAN as one of their items are not supported. + - The command is not supported on egress traffic. + +- VLAN push offload is not supported on ingress traffic. + +- VLAN set PCP offload is not supported on existing headers. + +- A multi segment packet must have not more segments than reported by dev_infos_get() + in tx_desc_lim.nb_seg_max field. This value depends on maximal supported Tx descriptor + size and ``txq_inline_min`` settings and may be from 2 (worst case forced by maximal + inline settings) to 58. + +- Flows with a VXLAN Network Identifier equal (or ends to be equal) + to 0 are not supported. + +- L3 VXLAN and VXLAN-GPE tunnels cannot be supported together with MPLSoGRE and MPLSoUDP. + +- Match on Geneve header supports the following fields only: + + - VNI + - OAM + - protocol type + - options length + Currently, the only supported options length value is 0. + +- VF: flow rules created on VF devices can only match traffic targeted at the + configured MAC addresses (see ``rte_eth_dev_mac_addr_add()``). + +- Match on GTP tunnel header item supports the following fields only: + + - v_pt_rsv_flags: E flag, S flag, PN flag + - msg_type + - teid + +- No Tx metadata go to the E-Switch steering domain for the Flow group 0. + The flows within group 0 and set metadata action are rejected by hardware. + +.. note:: + + MAC addresses not already present in the bridge table of the associated + kernel network device will be added and cleaned up by the PMD when closing + the device. In case of ungraceful program termination, some entries may + remain present and should be removed manually by other means. + +- When Multi-Packet Rx queue is configured (``mprq_en``), a Rx packet can be + externally attached to a user-provided mbuf with having EXT_ATTACHED_MBUF in + ol_flags. As the mempool for the external buffer is managed by PMD, all the + Rx mbufs must be freed before the device is closed. Otherwise, the mempool of + the external buffers will be freed by PMD and the application which still + holds the external buffers may be corrupted. + +- If Multi-Packet Rx queue is configured (``mprq_en``) and Rx CQE compression is + enabled (``rxq_cqe_comp_en``) at the same time, RSS hash result is not fully + supported. Some Rx packets may not have PKT_RX_RSS_HASH. + +- IPv6 Multicast messages are not supported on VM, while promiscuous mode + and allmulticast mode are both set to off. + To receive IPv6 Multicast messages on VM, explicitly set the relevant + MAC address using rte_eth_dev_mac_addr_add() API. + +- To support a mixed traffic pattern (some buffers from local host memory, some + buffers from other devices) with high bandwidth, a mbuf flag is used. + + An application hints the PMD whether or not it should try to inline the + given mbuf data buffer. PMD should do the best effort to act upon this request. + + The hint flag ``RTE_PMD_MLX5_FINE_GRANULARITY_INLINE`` is dynamic, + registered by application with rte_mbuf_dynflag_register(). This flag is + purely driver-specific and declared in PMD specific header ``rte_pmd_mlx5.h``, + which is intended to be used by the application. + + To query the supported specific flags in runtime, + the function ``rte_pmd_mlx5_get_dyn_flag_names`` returns the array of + currently (over present hardware and configuration) supported specific flags. + The "not inline hint" feature operating flow is the following one: + + - application starts + - probe the devices, ports are created + - query the port capabilities + - if port supporting the feature is found + - register dynamic flag ``RTE_PMD_MLX5_FINE_GRANULARITY_INLINE`` + - application starts the ports + - on ``dev_start()`` PMD checks whether the feature flag is registered and + enables the feature support in datapath + - application might set the registered flag bit in ``ol_flags`` field + of mbuf being sent and PMD will handle ones appropriately. + +- The amount of descriptors in Tx queue may be limited by data inline settings. + Inline data require the more descriptor building blocks and overall block + amount may exceed the hardware supported limits. The application should + reduce the requested Tx size or adjust data inline settings with + ``txq_inline_max`` and ``txq_inline_mpw`` devargs keys. + +- E-Switch decapsulation Flow: + + - can be applied to PF port only. + - must specify VF port action (packet redirection from PF to VF). + - optionally may specify tunnel inner source and destination MAC addresses. + +- E-Switch encapsulation Flow: + + - can be applied to VF ports only. + - must specify PF port action (packet redirection from VF to PF). + +- Raw encapsulation: + + - The input buffer, used as outer header, is not validated. + +- Raw decapsulation: + + - The decapsulation is always done up to the outermost tunnel detected by the HW. + - The input buffer, providing the removal size, is not validated. + - The buffer size must match the length of the headers to be removed. + +- ICMP/ICMP6 code/type matching, IP-in-IP and MPLS flow matching are all + mutually exclusive features which cannot be supported together + (see :ref:`mlx5_firmware_config`). + +- LRO: + + - Requires DevX and DV flow to be enabled. + - KEEP_CRC offload cannot be supported with LRO. + - The first mbuf length, without head-room, must be big enough to include the + TCP header (122B). + - Rx queue with LRO offload enabled, receiving a non-LRO packet, can forward + it with size limited to max LRO size, not to max RX packet length. + - LRO can be used with outer header of TCP packets of the standard format: + eth (with or without vlan) / ipv4 or ipv6 / tcp / payload + + Other TCP packets (e.g. with MPLS label) received on Rx queue with LRO enabled, will be received with bad checksum. + +Statistics +---------- + +MLX5 supports various methods to report statistics: + +Port statistics can be queried using ``rte_eth_stats_get()``. The received and sent statistics are through SW only and counts the number of packets received or sent successfully by the PMD. The imissed counter is the amount of packets that could not be delivered to SW because a queue was full. Packets not received due to congestion in the bus or on the NIC can be queried via the rx_discards_phy xstats counter. + +Extended statistics can be queried using ``rte_eth_xstats_get()``. The extended statistics expose a wider set of counters counted by the device. The extended port statistics counts the number of packets received or sent successfully by the port. As Mellanox NICs are using the :ref:`Bifurcated Linux Driver ` those counters counts also packet received or sent by the Linux kernel. The counters with ``_phy`` suffix counts the total events on the physical port, therefore not valid for VF. + +Finally per-flow statistics can by queried using ``rte_flow_query`` when attaching a count action for specific flow. The flow counter counts the number of packets received successfully by the port and match the specific flow. + +Configuration +------------- + +Compilation options +~~~~~~~~~~~~~~~~~~~ + +These options can be modified in the ``.config`` file. + +- ``CONFIG_RTE_LIBRTE_MLX5_PMD`` (default **n**) + + Toggle compilation of librte_pmd_mlx5 itself. + +- ``CONFIG_RTE_IBVERBS_LINK_DLOPEN`` (default **n**) + + Build PMD with additional code to make it loadable without hard + dependencies on **libibverbs** nor **libmlx5**, which may not be installed + on the target system. + + In this mode, their presence is still required for it to run properly, + however their absence won't prevent a DPDK application from starting (with + ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as + missing with ``ldd(1)``. + + It works by moving these dependencies to a purpose-built rdma-core "glue" + plug-in which must either be installed in a directory whose name is based + on ``CONFIG_RTE_EAL_PMD_PATH`` suffixed with ``-glue`` if set, or in a + standard location for the dynamic linker (e.g. ``/lib``) if left to the + default empty string (``""``). + + This option has no performance impact. + +- ``CONFIG_RTE_IBVERBS_LINK_STATIC`` (default **n**) + + Embed static flavor of the dependencies **libibverbs** and **libmlx5** + in the PMD shared library or the executable static binary. + +- ``CONFIG_RTE_LIBRTE_MLX5_DEBUG`` (default **n**) + + Toggle debugging code and stricter compilation flags. Enabling this option + adds additional run-time checks and debugging messages at the cost of + lower performance. + +.. note:: + + For BlueField, target should be set to ``arm64-bluefield-linux-gcc``. This + will enable ``CONFIG_RTE_LIBRTE_MLX5_PMD`` and set ``RTE_CACHE_LINE_SIZE`` to + 64. Default armv8a configuration of make build and meson build set it to 128 + then brings performance degradation. + +This option is available in meson: + +- ``ibverbs_link`` can be ``static``, ``shared``, or ``dlopen``. + +Environment variables +~~~~~~~~~~~~~~~~~~~~~ + +- ``MLX5_GLUE_PATH`` + + A list of directories in which to search for the rdma-core "glue" plug-in, + separated by colons or semi-colons. + + Only matters when compiled with ``CONFIG_RTE_IBVERBS_LINK_DLOPEN`` + enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set, + since ``LD_LIBRARY_PATH`` has no effect in this case. + +- ``MLX5_SHUT_UP_BF`` + + Configures HW Tx doorbell register as IO-mapped. + + By default, the HW Tx doorbell is configured as a write-combining register. + The register would be flushed to HW usually when the write-combining buffer + becomes full, but it depends on CPU design. + + Except for vectorized Tx burst routines, a write memory barrier is enforced + after updating the register so that the update can be immediately visible to + HW. + + When vectorized Tx burst is called, the barrier is set only if the burst size + is not aligned to MLX5_VPMD_TX_MAX_BURST. However, setting this environmental + variable will bring better latency even though the maximum throughput can + slightly decline. + +Run-time configuration +~~~~~~~~~~~~~~~~~~~~~~ + +- librte_pmd_mlx5 brings kernel network interfaces up during initialization + because it is affected by their state. Forcing them down prevents packets + reception. + +- **ethtool** operations on related kernel interfaces also affect the PMD. + +Run as non-root +^^^^^^^^^^^^^^^ + +In order to run as a non-root user, +some capabilities must be granted to the application:: + + setcap cap_sys_admin,cap_net_admin,cap_net_raw,cap_ipc_lock+ep + +Below are the reasons of the need for each capability: + +``cap_sys_admin`` + When using physical addresses (PA mode), with Linux >= 4.0, + for access to ``/proc/self/pagemap``. + +``cap_net_admin`` + For device configuration. + +``cap_net_raw`` + For raw ethernet queue allocation through kernel driver. + +``cap_ipc_lock`` + For DMA memory pinning. + +Driver options +^^^^^^^^^^^^^^ + +- ``rxq_cqe_comp_en`` parameter [int] + + A nonzero value enables the compression of CQE on RX side. This feature + allows to save PCI bandwidth and improve performance. Enabled by default. + + Supported on: + + - x86_64 with ConnectX-4, ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. + - POWER9 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. + +- ``rxq_cqe_pad_en`` parameter [int] + + A nonzero value enables 128B padding of CQE on RX side. The size of CQE + is aligned with the size of a cacheline of the core. If cacheline size is + 128B, the CQE size is configured to be 128B even though the device writes + only 64B data on the cacheline. This is to avoid unnecessary cache + invalidation by device's two consecutive writes on to one cacheline. + However in some architecture, it is more beneficial to update entire + cacheline with padding the rest 64B rather than striding because + read-modify-write could drop performance a lot. On the other hand, + writing extra data will consume more PCIe bandwidth and could also drop + the maximum throughput. It is recommended to empirically set this + parameter. Disabled by default. + + Supported on: + + - CPU having 128B cacheline with ConnectX-5 and BlueField. + +- ``rxq_pkt_pad_en`` parameter [int] + + A nonzero value enables padding Rx packet to the size of cacheline on PCI + transaction. This feature would waste PCI bandwidth but could improve + performance by avoiding partial cacheline write which may cause costly + read-modify-copy in memory transaction on some architectures. Disabled by + default. + + Supported on: + + - x86_64 with ConnectX-4, ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. + - POWER8 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. + +- ``mprq_en`` parameter [int] + + A nonzero value enables configuring Multi-Packet Rx queues. Rx queue is + configured as Multi-Packet RQ if the total number of Rx queues is + ``rxqs_min_mprq`` or more. Disabled by default. + + Multi-Packet Rx Queue (MPRQ a.k.a Striding RQ) can further save PCIe bandwidth + by posting a single large buffer for multiple packets. Instead of posting a + buffers per a packet, one large buffer is posted in order to receive multiple + packets on the buffer. A MPRQ buffer consists of multiple fixed-size strides + and each stride receives one packet. MPRQ can improve throughput for + small-packet traffic. + + When MPRQ is enabled, max_rx_pkt_len can be larger than the size of + user-provided mbuf even if DEV_RX_OFFLOAD_SCATTER isn't enabled. PMD will + configure large stride size enough to accommodate max_rx_pkt_len as long as + device allows. Note that this can waste system memory compared to enabling Rx + scatter and multi-segment packet. + +- ``mprq_log_stride_num`` parameter [int] + + Log 2 of the number of strides for Multi-Packet Rx queue. Configuring more + strides can reduce PCIe traffic further. If configured value is not in the + range of device capability, the default value will be set with a warning + message. The default value is 4 which is 16 strides per a buffer, valid only + if ``mprq_en`` is set. + + The size of Rx queue should be bigger than the number of strides. + +- ``mprq_log_stride_size`` parameter [int] + + Log 2 of the size of a stride for Multi-Packet Rx queue. Configuring a smaller + stride size can save some memory and reduce probability of a depletion of all + available strides due to unreleased packets by an application. If configured + value is not in the range of device capability, the default value will be set + with a warning message. The default value is 11 which is 2048 bytes per a + stride, valid only if ``mprq_en`` is set. With ``mprq_log_stride_size`` set + it is possible for a pcaket to span across multiple strides. This mode allows + support of jumbo frames (9K) with MPRQ. The memcopy of some packets (or part + of a packet if Rx scatter is configured) may be required in case there is no + space left for a head room at the end of a stride which incurs some + performance penalty. + +- ``mprq_max_memcpy_len`` parameter [int] + + The maximum length of packet to memcpy in case of Multi-Packet Rx queue. Rx + packet is mem-copied to a user-provided mbuf if the size of Rx packet is less + than or equal to this parameter. Otherwise, PMD will attach the Rx packet to + the mbuf by external buffer attachment - ``rte_pktmbuf_attach_extbuf()``. + A mempool for external buffers will be allocated and managed by PMD. If Rx + packet is externally attached, ol_flags field of the mbuf will have + EXT_ATTACHED_MBUF and this flag must be preserved. ``RTE_MBUF_HAS_EXTBUF()`` + checks the flag. The default value is 128, valid only if ``mprq_en`` is set. + +- ``rxqs_min_mprq`` parameter [int] + + Configure Rx queues as Multi-Packet RQ if the total number of Rx queues is + greater or equal to this value. The default value is 12, valid only if + ``mprq_en`` is set. + +- ``txq_inline`` parameter [int] + + Amount of data to be inlined during TX operations. This parameter is + deprecated and converted to the new parameter ``txq_inline_max`` providing + partial compatibility. + +- ``txqs_min_inline`` parameter [int] + + Enable inline data send only when the number of TX queues is greater or equal + to this value. + + This option should be used in combination with ``txq_inline_max`` and + ``txq_inline_mpw`` below and does not affect ``txq_inline_min`` settings above. + + If this option is not specified the default value 16 is used for BlueField + and 8 for other platforms + + The data inlining consumes the CPU cycles, so this option is intended to + auto enable inline data if we have enough Tx queues, which means we have + enough CPU cores and PCI bandwidth is getting more critical and CPU + is not supposed to be bottleneck anymore. + + The copying data into WQE improves latency and can improve PPS performance + when PCI back pressure is detected and may be useful for scenarios involving + heavy traffic on many queues. + + Because additional software logic is necessary to handle this mode, this + option should be used with care, as it may lower performance when back + pressure is not expected. + + If inline data are enabled it may affect the maximal size of Tx queue in + descriptors because the inline data increase the descriptor size and + queue size limits supported by hardware may be exceeded. + +- ``txq_inline_min`` parameter [int] + + Minimal amount of data to be inlined into WQE during Tx operations. NICs + may require this minimal data amount to operate correctly. The exact value + may depend on NIC operation mode, requested offloads, etc. It is strongly + recommended to omit this parameter and use the default values. Anyway, + applications using this parameter should take into consideration that + specifying an inconsistent value may prevent the NIC from sending packets. + + If ``txq_inline_min`` key is present the specified value (may be aligned + by the driver in order not to exceed the limits and provide better descriptor + space utilization) will be used by the driver and it is guaranteed that + requested amount of data bytes are inlined into the WQE beside other inline + settings. This key also may update ``txq_inline_max`` value (default + or specified explicitly in devargs) to reserve the space for inline data. + + If ``txq_inline_min`` key is not present, the value may be queried by the + driver from the NIC via DevX if this feature is available. If there is no DevX + enabled/supported the value 18 (supposing L2 header including VLAN) is set + for ConnectX-4 and ConnectX-4 Lx, and 0 is set by default for ConnectX-5 + and newer NICs. If packet is shorter the ``txq_inline_min`` value, the entire + packet is inlined. + + For ConnectX-4 NIC, driver does not allow specifying value below 18 + (minimal L2 header, including VLAN), error will be raised. + + For ConnectX-4 Lx NIC, it is allowed to specify values below 18, but + it is not recommended and may prevent NIC from sending packets over + some configurations. + + Please, note, this minimal data inlining disengages eMPW feature (Enhanced + Multi-Packet Write), because last one does not support partial packet inlining. + This is not very critical due to minimal data inlining is mostly required + by ConnectX-4 and ConnectX-4 Lx, these NICs do not support eMPW feature. + +- ``txq_inline_max`` parameter [int] + + Specifies the maximal packet length to be completely inlined into WQE + Ethernet Segment for ordinary SEND method. If packet is larger than specified + value, the packet data won't be copied by the driver at all, data buffer + is addressed with a pointer. If packet length is less or equal all packet + data will be copied into WQE. This may improve PCI bandwidth utilization for + short packets significantly but requires the extra CPU cycles. + + The data inline feature is controlled by number of Tx queues, if number of Tx + queues is larger than ``txqs_min_inline`` key parameter, the inline feature + is engaged, if there are not enough Tx queues (which means not enough CPU cores + and CPU resources are scarce), data inline is not performed by the driver. + Assigning ``txqs_min_inline`` with zero always enables the data inline. + + The default ``txq_inline_max`` value is 290. The specified value may be adjusted + by the driver in order not to exceed the limit (930 bytes) and to provide better + WQE space filling without gaps, the adjustment is reflected in the debug log. + Also, the default value (290) may be decreased in run-time if the large transmit + queue size is requested and hardware does not support enough descriptor + amount, in this case warning is emitted. If ``txq_inline_max`` key is + specified and requested inline settings can not be satisfied then error + will be raised. + +- ``txq_inline_mpw`` parameter [int] + + Specifies the maximal packet length to be completely inlined into WQE for + Enhanced MPW method. If packet is large the specified value, the packet data + won't be copied, and data buffer is addressed with pointer. If packet length + is less or equal, all packet data will be copied into WQE. This may improve PCI + bandwidth utilization for short packets significantly but requires the extra + CPU cycles. + + The data inline feature is controlled by number of TX queues, if number of Tx + queues is larger than ``txqs_min_inline`` key parameter, the inline feature + is engaged, if there are not enough Tx queues (which means not enough CPU cores + and CPU resources are scarce), data inline is not performed by the driver. + Assigning ``txqs_min_inline`` with zero always enables the data inline. + + The default ``txq_inline_mpw`` value is 268. The specified value may be adjusted + by the driver in order not to exceed the limit (930 bytes) and to provide better + WQE space filling without gaps, the adjustment is reflected in the debug log. + Due to multiple packets may be included to the same WQE with Enhanced Multi + Packet Write Method and overall WQE size is limited it is not recommended to + specify large values for the ``txq_inline_mpw``. Also, the default value (268) + may be decreased in run-time if the large transmit queue size is requested + and hardware does not support enough descriptor amount, in this case warning + is emitted. If ``txq_inline_mpw`` key is specified and requested inline + settings can not be satisfied then error will be raised. + +- ``txqs_max_vec`` parameter [int] + + Enable vectorized Tx only when the number of TX queues is less than or + equal to this value. This parameter is deprecated and ignored, kept + for compatibility issue to not prevent driver from probing. + +- ``txq_mpw_hdr_dseg_en`` parameter [int] + + A nonzero value enables including two pointers in the first block of TX + descriptor. The parameter is deprecated and ignored, kept for compatibility + issue. + +- ``txq_max_inline_len`` parameter [int] + + Maximum size of packet to be inlined. This limits the size of packet to + be inlined. If the size of a packet is larger than configured value, the + packet isn't inlined even though there's enough space remained in the + descriptor. Instead, the packet is included with pointer. This parameter + is deprecated and converted directly to ``txq_inline_mpw`` providing full + compatibility. Valid only if eMPW feature is engaged. + +- ``txq_mpw_en`` parameter [int] + + A nonzero value enables Enhanced Multi-Packet Write (eMPW) for ConnectX-5, + ConnectX-6, ConnectX-6 Dx and BlueField. eMPW allows the TX burst function to pack + up multiple packets in a single descriptor session in order to save PCI bandwidth + and improve performance at the cost of a slightly higher CPU usage. When + ``txq_inline_mpw`` is set along with ``txq_mpw_en``, TX burst function copies + entire packet data on to TX descriptor instead of including pointer of packet. + + The Enhanced Multi-Packet Write feature is enabled by default if NIC supports + it, can be disabled by explicit specifying 0 value for ``txq_mpw_en`` option. + Also, if minimal data inlining is requested by non-zero ``txq_inline_min`` + option or reported by the NIC, the eMPW feature is disengaged. + +- ``tx_db_nc`` parameter [int] + + The rdma core library can map doorbell register in two ways, depending on the + environment variable "MLX5_SHUT_UP_BF": + + - As regular cached memory (usually with write combining attribute), if the + variable is either missing or set to zero. + - As non-cached memory, if the variable is present and set to not "0" value. + + The type of mapping may slightly affect the Tx performance, the optimal choice + is strongly relied on the host architecture and should be deduced practically. + + If ``tx_db_nc`` is set to zero, the doorbell is forced to be mapped to regular + memory (with write combining), the PMD will perform the extra write memory barrier + after writing to doorbell, it might increase the needed CPU clocks per packet + to send, but latency might be improved. + + If ``tx_db_nc`` is set to one, the doorbell is forced to be mapped to non + cached memory, the PMD will not perform the extra write memory barrier + after writing to doorbell, on some architectures it might improve the + performance. + + If ``tx_db_nc`` is set to two, the doorbell is forced to be mapped to regular + memory, the PMD will use heuristics to decide whether write memory barrier + should be performed. For bursts with size multiple of recommended one (64 pkts) + it is supposed the next burst is coming and no need to issue the extra memory + barrier (it is supposed to be issued in the next coming burst, at least after + descriptor writing). It might increase latency (on some hosts till next + packets transmit) and should be used with care. + + If ``tx_db_nc`` is omitted or set to zero, the preset (if any) environment + variable "MLX5_SHUT_UP_BF" value is used. If there is no "MLX5_SHUT_UP_BF", + the default ``tx_db_nc`` value is zero for ARM64 hosts and one for others. + +- ``tx_vec_en`` parameter [int] + + A nonzero value enables Tx vector on ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField NICs if the number of global Tx queues on the port is less than + ``txqs_max_vec``. The parameter is deprecated and ignored. + +- ``rx_vec_en`` parameter [int] + + A nonzero value enables Rx vector if the port is not configured in + multi-segment otherwise this parameter is ignored. + + Enabled by default. + +- ``vf_nl_en`` parameter [int] + + A nonzero value enables Netlink requests from the VF to add/remove MAC + addresses or/and enable/disable promiscuous/all multicast on the Netdevice. + Otherwise the relevant configuration must be run with Linux iproute2 tools. + This is a prerequisite to receive this kind of traffic. + + Enabled by default, valid only on VF devices ignored otherwise. + +- ``l3_vxlan_en`` parameter [int] + + A nonzero value allows L3 VXLAN and VXLAN-GPE flow creation. To enable + L3 VXLAN or VXLAN-GPE, users has to configure firmware and enable this + parameter. This is a prerequisite to receive this kind of traffic. + + Disabled by default. + +- ``dv_xmeta_en`` parameter [int] + + A nonzero value enables extensive flow metadata support if device is + capable and driver supports it. This can enable extensive support of + ``MARK`` and ``META`` item of ``rte_flow``. The newly introduced + ``SET_TAG`` and ``SET_META`` actions do not depend on ``dv_xmeta_en``. + + There are some possible configurations, depending on parameter value: + + - 0, this is default value, defines the legacy mode, the ``MARK`` and + ``META`` related actions and items operate only within NIC Tx and + NIC Rx steering domains, no ``MARK`` and ``META`` information crosses + the domain boundaries. The ``MARK`` item is 24 bits wide, the ``META`` + item is 32 bits wide and match supported on egress only. + + - 1, this engages extensive metadata mode, the ``MARK`` and ``META`` + related actions and items operate within all supported steering domains, + including FDB, ``MARK`` and ``META`` information may cross the domain + boundaries. The ``MARK`` item is 24 bits wide, the ``META`` item width + depends on kernel and firmware configurations and might be 0, 16 or + 32 bits. Within NIC Tx domain ``META`` data width is 32 bits for + compatibility, the actual width of data transferred to the FDB domain + depends on kernel configuration and may be vary. The actual supported + width can be retrieved in runtime by series of rte_flow_validate() + trials. + + - 2, this engages extensive metadata mode, the ``MARK`` and ``META`` + related actions and items operate within all supported steering domains, + including FDB, ``MARK`` and ``META`` information may cross the domain + boundaries. The ``META`` item is 32 bits wide, the ``MARK`` item width + depends on kernel and firmware configurations and might be 0, 16 or + 24 bits. The actual supported width can be retrieved in runtime by + series of rte_flow_validate() trials. + + +------+-----------+-----------+-------------+-------------+ + | Mode | ``MARK`` | ``META`` | ``META`` Tx | FDB/Through | + +======+===========+===========+=============+=============+ + | 0 | 24 bits | 32 bits | 32 bits | no | + +------+-----------+-----------+-------------+-------------+ + | 1 | 24 bits | vary 0-32 | 32 bits | yes | + +------+-----------+-----------+-------------+-------------+ + | 2 | vary 0-32 | 32 bits | 32 bits | yes | + +------+-----------+-----------+-------------+-------------+ + + If there is no E-Switch configuration the ``dv_xmeta_en`` parameter is + ignored and the device is configured to operate in legacy mode (0). + + Disabled by default (set to 0). + + The Direct Verbs/Rules (engaged with ``dv_flow_en`` = 1) supports all + of the extensive metadata features. The legacy Verbs supports FLAG and + MARK metadata actions over NIC Rx steering domain only. + +- ``dv_flow_en`` parameter [int] + + A nonzero value enables the DV flow steering assuming it is supported + by the driver (RDMA Core library version is rdma-core-24.0 or higher). + + Enabled by default if supported. + +- ``dv_esw_en`` parameter [int] + + A nonzero value enables E-Switch using Direct Rules. + + Enabled by default if supported. + +- ``mr_ext_memseg_en`` parameter [int] + + A nonzero value enables extending memseg when registering DMA memory. If + enabled, the number of entries in MR (Memory Region) lookup table on datapath + is minimized and it benefits performance. On the other hand, it worsens memory + utilization because registered memory is pinned by kernel driver. Even if a + page in the extended chunk is freed, that doesn't become reusable until the + entire memory is freed. + + Enabled by default. + +- ``representor`` parameter [list] + + This parameter can be used to instantiate DPDK Ethernet devices from + existing port (or VF) representors configured on the device. + + It is a standard parameter whose format is described in + :ref:`ethernet_device_standard_device_arguments`. + + For instance, to probe port representors 0 through 2:: + + representor=[0-2] + +- ``max_dump_files_num`` parameter [int] + + The maximum number of files per PMD entity that may be created for debug information. + The files will be created in /var/log directory or in current directory. + + set to 128 by default. + +- ``lro_timeout_usec`` parameter [int] + + The maximum allowed duration of an LRO session, in micro-seconds. + PMD will set the nearest value supported by HW, which is not bigger than + the input ``lro_timeout_usec`` value. + If this parameter is not specified, by default PMD will set + the smallest value supported by HW. + +- ``hp_buf_log_sz`` parameter [int] + + The total data buffer size of a hairpin queue (logarithmic form), in bytes. + PMD will set the data buffer size to 2 ** ``hp_buf_log_sz``, both for RX & TX. + The capacity of the value is specified by the firmware and the initialization + will get a failure if it is out of scope. + The range of the value is from 11 to 19 right now, and the supported frame + size of a single packet for hairpin is from 512B to 128KB. It might change if + different firmware release is being used. By using a small value, it could + reduce memory consumption but not work with a large frame. If the value is + too large, the memory consumption will be high and some potential performance + degradation will be introduced. + By default, the PMD will set this value to 16, which means that 9KB jumbo + frames will be supported. + +.. _mlx5_firmware_config: + +Firmware configuration +~~~~~~~~~~~~~~~~~~~~~~ + +Firmware features can be configured as key/value pairs. + +The command to set a value is:: + + mlxconfig -d set = + +The command to query a value is:: + + mlxconfig -d query | grep + +The device name for the command ``mlxconfig`` can be either the PCI address, +or the mst device name found with:: + + mst status + +Below are some firmware configurations listed. + +- link type:: + + LINK_TYPE_P1 + LINK_TYPE_P2 + value: 1=Infiniband 2=Ethernet 3=VPI(auto-sense) + +- enable SR-IOV:: + + SRIOV_EN=1 + +- maximum number of SR-IOV virtual functions:: + + NUM_OF_VFS= + +- enable DevX (required by Direct Rules and other features):: + + UCTX_EN=1 + +- aggressive CQE zipping:: + + CQE_COMPRESSION=1 + +- L3 VXLAN and VXLAN-GPE destination UDP port:: + + IP_OVER_VXLAN_EN=1 + IP_OVER_VXLAN_PORT= + +- enable VXLAN-GPE tunnel flow matching:: + + FLEX_PARSER_PROFILE_ENABLE=0 + or + FLEX_PARSER_PROFILE_ENABLE=2 + +- enable IP-in-IP tunnel flow matching:: + + FLEX_PARSER_PROFILE_ENABLE=0 + +- enable MPLS flow matching:: + + FLEX_PARSER_PROFILE_ENABLE=1 + +- enable ICMP/ICMP6 code/type fields matching:: + + FLEX_PARSER_PROFILE_ENABLE=2 + +- enable Geneve flow matching:: + + FLEX_PARSER_PROFILE_ENABLE=0 + or + FLEX_PARSER_PROFILE_ENABLE=1 + +- enable GTP flow matching:: + + FLEX_PARSER_PROFILE_ENABLE=3 + +Prerequisites +------------- + +This driver relies on external libraries and kernel drivers for resources +allocations and initialization. The following dependencies are not part of +DPDK and must be installed separately: + +- **libibverbs** + + User space Verbs framework used by librte_pmd_mlx5. This library provides + a generic interface between the kernel and low-level user space drivers + such as libmlx5. + + It allows slow and privileged operations (context initialization, hardware + resources allocations) to be managed by the kernel and fast operations to + never leave user space. + +- **libmlx5** + + Low-level user space driver library for Mellanox + ConnectX-4/ConnectX-5/ConnectX-6/BlueField devices, it is automatically loaded + by libibverbs. + + This library basically implements send/receive calls to the hardware + queues. + +- **Kernel modules** + + They provide the kernel-side Verbs API and low level device drivers that + manage actual hardware initialization and resources sharing with user + space processes. + + Unlike most other PMDs, these modules must remain loaded and bound to + their devices: + + - mlx5_core: hardware driver managing Mellanox + ConnectX-4/ConnectX-5/ConnectX-6/BlueField devices and related Ethernet kernel + network devices. + - mlx5_ib: InifiniBand device driver. + - ib_uverbs: user space driver for Verbs (entry point for libibverbs). + +- **Firmware update** + + Mellanox OFED/EN releases include firmware updates for + ConnectX-4/ConnectX-5/ConnectX-6/BlueField adapters. + + Because each release provides new features, these updates must be applied to + match the kernel modules and libraries they come with. + +.. note:: + + Both libraries are BSD and GPL licensed. Linux kernel modules are GPL + licensed. + +Installation +~~~~~~~~~~~~ + +Either RDMA Core library with a recent enough Linux kernel release +(recommended) or Mellanox OFED/EN, which provides compatibility with older +releases. + +RDMA Core with Linux Kernel +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Minimal kernel version : v4.14 or the most recent 4.14-rc (see `Linux installation documentation`_) +- Minimal rdma-core version: v15+ commit 0c5f5765213a ("Merge pull request #227 from yishaih/tm") + (see `RDMA Core installation documentation`_) +- When building for i686 use: + + - rdma-core version 18.0 or above built with 32bit support. + - Kernel version 4.14.41 or above. + +- Starting with rdma-core v21, static libraries can be built:: + + cd build + CFLAGS=-fPIC cmake -DIN_PLACE=1 -DENABLE_STATIC=1 -GNinja .. + ninja + +.. _`Linux installation documentation`: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/plain/Documentation/admin-guide/README.rst +.. _`RDMA Core installation documentation`: https://raw.githubusercontent.com/linux-rdma/rdma-core/master/README.md + +If rdma-core libraries are built but not installed, DPDK makefile can link them, +thanks to these environment variables: + + - ``EXTRA_CFLAGS=-I/path/to/rdma-core/build/include`` + - ``EXTRA_LDFLAGS=-L/path/to/rdma-core/build/lib`` + - ``PKG_CONFIG_PATH=/path/to/rdma-core/build/lib/pkgconfig`` + +Mellanox OFED/EN +^^^^^^^^^^^^^^^^ + +- Mellanox OFED version: **4.5** and above / + Mellanox EN version: **4.5** and above +- firmware version: + + - ConnectX-4: **12.21.1000** and above. + - ConnectX-4 Lx: **14.21.1000** and above. + - ConnectX-5: **16.21.1000** and above. + - ConnectX-5 Ex: **16.21.1000** and above. + - ConnectX-6: **20.27.0090** and above. + - ConnectX-6 Dx: **22.27.0090** and above. + - BlueField: **18.25.1010** and above. + +While these libraries and kernel modules are available on OpenFabrics +Alliance's `website `__ and provided by package +managers on most distributions, this PMD requires Ethernet extensions that +may not be supported at the moment (this is a work in progress). + +`Mellanox OFED +`__ and +`Mellanox EN +`__ +include the necessary support and should be used in the meantime. For DPDK, +only libibverbs, libmlx5, mlnx-ofed-kernel packages and firmware updates are +required from that distribution. + +.. note:: + + Several versions of Mellanox OFED/EN are available. Installing the version + this DPDK release was developed and tested against is strongly + recommended. Please check the `prerequisites`_. + +Supported NICs +-------------- + +The following Mellanox device families are supported by the same mlx5 driver: + + - ConnectX-4 + - ConnectX-4 Lx + - ConnectX-5 + - ConnectX-5 Ex + - ConnectX-6 + - ConnectX-6 Dx + - BlueField + +Below are detailed device names: + +* Mellanox\ |reg| ConnectX\ |reg|-4 10G MCX4111A-XCAT (1x10G) +* Mellanox\ |reg| ConnectX\ |reg|-4 10G MCX412A-XCAT (2x10G) +* Mellanox\ |reg| ConnectX\ |reg|-4 25G MCX4111A-ACAT (1x25G) +* Mellanox\ |reg| ConnectX\ |reg|-4 25G MCX412A-ACAT (2x25G) +* Mellanox\ |reg| ConnectX\ |reg|-4 40G MCX413A-BCAT (1x40G) +* Mellanox\ |reg| ConnectX\ |reg|-4 40G MCX4131A-BCAT (1x40G) +* Mellanox\ |reg| ConnectX\ |reg|-4 40G MCX415A-BCAT (1x40G) +* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX413A-GCAT (1x50G) +* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX4131A-GCAT (1x50G) +* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX414A-BCAT (2x50G) +* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX415A-GCAT (1x50G) +* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX416A-BCAT (2x50G) +* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX416A-GCAT (2x50G) +* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX415A-CCAT (1x100G) +* Mellanox\ |reg| ConnectX\ |reg|-4 100G MCX416A-CCAT (2x100G) +* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 10G MCX4111A-XCAT (1x10G) +* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 10G MCX4121A-XCAT (2x10G) +* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 25G MCX4111A-ACAT (1x25G) +* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 25G MCX4121A-ACAT (2x25G) +* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 40G MCX4131A-BCAT (1x40G) +* Mellanox\ |reg| ConnectX\ |reg|-5 100G MCX556A-ECAT (2x100G) +* Mellanox\ |reg| ConnectX\ |reg|-5 Ex EN 100G MCX516A-CDAT (2x100G) +* Mellanox\ |reg| ConnectX\ |reg|-6 200G MCX654106A-HCAT (2x200G) +* Mellanox\ |reg| ConnectX\ |reg|-6 Dx EN 100G MCX623106AN-CDAT (2x100G) +* Mellanox\ |reg| ConnectX\ |reg|-6 Dx EN 200G MCX623105AN-VDAT (1x200G) + +Quick Start Guide on OFED/EN +---------------------------- + +1. Download latest Mellanox OFED/EN. For more info check the `prerequisites`_. + + +2. Install the required libraries and kernel modules either by installing + only the required set, or by installing the entire Mellanox OFED/EN:: + + ./mlnxofedinstall --upstream-libs --dpdk + +3. Verify the firmware is the correct one:: + + ibv_devinfo + +4. Verify all ports links are set to Ethernet:: + + mlxconfig -d query | grep LINK_TYPE + LINK_TYPE_P1 ETH(2) + LINK_TYPE_P2 ETH(2) + + Link types may have to be configured to Ethernet:: + + mlxconfig -d set LINK_TYPE_P1/2=1/2/3 + + * LINK_TYPE_P1=<1|2|3> , 1=Infiniband 2=Ethernet 3=VPI(auto-sense) + + For hypervisors, verify SR-IOV is enabled on the NIC:: + + mlxconfig -d query | grep SRIOV_EN + SRIOV_EN True(1) + + If needed, configure SR-IOV:: + + mlxconfig -d set SRIOV_EN=1 NUM_OF_VFS=16 + mlxfwreset -d reset + +5. Restart the driver:: + + /etc/init.d/openibd restart + + or:: + + service openibd restart + + If link type was changed, firmware must be reset as well:: + + mlxfwreset -d reset + + For hypervisors, after reset write the sysfs number of virtual functions + needed for the PF. + + To dynamically instantiate a given number of virtual functions (VFs):: + + echo [num_vfs] > /sys/class/infiniband/mlx5_0/device/sriov_numvfs + +6. Compile DPDK and you are ready to go. See instructions on + :ref:`Development Kit Build System ` + +Enable switchdev mode +--------------------- + +Switchdev mode is a mode in E-Switch, that binds between representor and VF. +Representor is a port in DPDK that is connected to a VF in such a way +that assuming there are no offload flows, each packet that is sent from the VF +will be received by the corresponding representor. While each packet that is +sent to a representor will be received by the VF. +This is very useful in case of SRIOV mode, where the first packet that is sent +by the VF will be received by the DPDK application which will decide if this +flow should be offloaded to the E-Switch. After offloading the flow packet +that the VF that are matching the flow will not be received any more by +the DPDK application. + +1. Enable SRIOV mode:: + + mlxconfig -d set SRIOV_EN=true + +2. Configure the max number of VFs:: + + mlxconfig -d set NUM_OF_VFS= + +3. Reset the FW:: + + mlxfwreset -d reset + +3. Configure the actual number of VFs:: + + echo /sys/class/net//device/sriov_numvfs + +4. Unbind the device (can be rebind after the switchdev mode):: + + echo -n " /sys/bus/pci/drivers/mlx5_core/unbind + +5. Enbale switchdev mode:: + + echo switchdev > /sys/class/net//compat/devlink/mode + +Performance tuning +------------------ + +1. Configure aggressive CQE Zipping for maximum performance:: + + mlxconfig -d s CQE_COMPRESSION=1 + + To set it back to the default CQE Zipping mode use:: + + mlxconfig -d s CQE_COMPRESSION=0 + +2. In case of virtualization: + + - Make sure that hypervisor kernel is 3.16 or newer. + - Configure boot with ``iommu=pt``. + - Use 1G huge pages. + - Make sure to allocate a VM on huge pages. + - Make sure to set CPU pinning. + +3. Use the CPU near local NUMA node to which the PCIe adapter is connected, + for better performance. For VMs, verify that the right CPU + and NUMA node are pinned according to the above. Run:: + + lstopo-no-graphics + + to identify the NUMA node to which the PCIe adapter is connected. + +4. If more than one adapter is used, and root complex capabilities allow + to put both adapters on the same NUMA node without PCI bandwidth degradation, + it is recommended to locate both adapters on the same NUMA node. + This in order to forward packets from one to the other without + NUMA performance penalty. + +5. Disable pause frames:: + + ethtool -A rx off tx off + +6. Verify IO non-posted prefetch is disabled by default. This can be checked + via the BIOS configuration. Please contact you server provider for more + information about the settings. + +.. note:: + + On some machines, depends on the machine integrator, it is beneficial + to set the PCI max read request parameter to 1K. This can be + done in the following way: + + To query the read request size use:: + + setpci -s 68.w + + If the output is different than 3XXX, set it by:: + + setpci -s 68.w=3XXX + + The XXX can be different on different systems. Make sure to configure + according to the setpci output. + +7. To minimize overhead of searching Memory Regions: + + - '--socket-mem' is recommended to pin memory by predictable amount. + - Configure per-lcore cache when creating Mempools for packet buffer. + - Refrain from dynamically allocating/freeing memory in run-time. + +.. _mlx5_offloads_support: + +Supported hardware offloads +--------------------------- + +.. table:: Minimal SW/HW versions for queue offloads + + ============== ===== ===== ========= ===== ========== ========== + Offload DPDK Linux rdma-core OFED firmware hardware + ============== ===== ===== ========= ===== ========== ========== + common base 17.11 4.14 16 4.2-1 12.21.1000 ConnectX-4 + checksums 17.11 4.14 16 4.2-1 12.21.1000 ConnectX-4 + Rx timestamp 17.11 4.14 16 4.2-1 12.21.1000 ConnectX-4 + TSO 17.11 4.14 16 4.2-1 12.21.1000 ConnectX-4 + LRO 19.08 N/A N/A 4.6-4 16.25.6406 ConnectX-5 + ============== ===== ===== ========= ===== ========== ========== + +.. table:: Minimal SW/HW versions for rte_flow offloads + + +-----------------------+-----------------+-----------------+ + | Offload | with E-Switch | with NIC | + +=======================+=================+=================+ + | Count | | DPDK 19.05 | | DPDK 19.02 | + | | | OFED 4.6 | | OFED 4.6 | + | | | rdma-core 24 | | rdma-core 23 | + | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + | Drop | | DPDK 19.05 | | DPDK 18.11 | + | | | OFED 4.6 | | OFED 4.5 | + | | | rdma-core 24 | | rdma-core 23 | + | | | ConnectX-5 | | ConnectX-4 | + +-----------------------+-----------------+-----------------+ + | Queue / RSS | | | | DPDK 18.11 | + | | | N/A | | OFED 4.5 | + | | | | | rdma-core 23 | + | | | | | ConnectX-4 | + +-----------------------+-----------------+-----------------+ + | Encapsulation | | DPDK 19.05 | | DPDK 19.02 | + | (VXLAN / NVGRE / RAW) | | OFED 4.7-1 | | OFED 4.6 | + | | | rdma-core 24 | | rdma-core 23 | + | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + | Encapsulation | | DPDK 19.11 | | DPDK 19.11 | + | GENEVE | | OFED 4.7-3 | | OFED 4.7-3 | + | | | rdma-core 27 | | rdma-core 27 | + | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + | | Header rewrite | | DPDK 19.05 | | DPDK 19.02 | + | | (set_ipv4_src / | | OFED 4.7-1 | | OFED 4.7-1 | + | | set_ipv4_dst / | | rdma-core 24 | | rdma-core 24 | + | | set_ipv6_src / | | ConnectX-5 | | ConnectX-5 | + | | set_ipv6_dst / | | | | | + | | set_tp_src / | | | | | + | | set_tp_dst / | | | | | + | | dec_ttl / | | | | | + | | set_ttl / | | | | | + | | set_mac_src / | | | | | + | | set_mac_dst) | | | | | + +-----------------------+-----------------+-----------------+ + | | Header rewrite | | DPDK 20.02 | | DPDK 20.02 | + | | (set_dscp) | | OFED 5.0 | | OFED 5.0 | + | | | | rdma-core 24 | | rdma-core 24 | + | | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + | Jump | | DPDK 19.05 | | DPDK 19.02 | + | | | OFED 4.7-1 | | OFED 4.7-1 | + | | | rdma-core 24 | | N/A | + | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + | Mark / Flag | | DPDK 19.05 | | DPDK 18.11 | + | | | OFED 4.6 | | OFED 4.5 | + | | | rdma-core 24 | | rdma-core 23 | + | | | ConnectX-5 | | ConnectX-4 | + +-----------------------+-----------------+-----------------+ + | Port ID | | DPDK 19.05 | | N/A | + | | | OFED 4.7-1 | | N/A | + | | | rdma-core 24 | | N/A | + | | | ConnectX-5 | | N/A | + +-----------------------+-----------------+-----------------+ + | | VLAN | | DPDK 19.11 | | DPDK 19.11 | + | | (of_pop_vlan / | | OFED 4.7-1 | | OFED 4.7-1 | + | | of_push_vlan / | | ConnectX-5 | | ConnectX-5 | + | | of_set_vlan_pcp / | | | | | + | | of_set_vlan_vid) | | | | | + +-----------------------+-----------------+-----------------+ + | Hairpin | | | | DPDK 19.11 | + | | | N/A | | OFED 4.7-3 | + | | | | | rdma-core 26 | + | | | | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + | Meta data | | DPDK 19.11 | | DPDK 19.11 | + | | | OFED 4.7-3 | | OFED 4.7-3 | + | | | rdma-core 26 | | rdma-core 26 | + | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + | Metering | | DPDK 19.11 | | DPDK 19.11 | + | | | OFED 4.7-3 | | OFED 4.7-3 | + | | | rdma-core 26 | | rdma-core 26 | + | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + +Notes for metadata +------------------ + +MARK and META items are interrelated with datapath - they might move from/to +the applications in mbuf fields. Hence, zero value for these items has the +special meaning - it means "no metadata are provided", not zero values are +treated by applications and PMD as valid ones. + +Moreover in the flow engine domain the value zero is acceptable to match and +set, and we should allow to specify zero values as rte_flow parameters for the +META and MARK items and actions. In the same time zero mask has no meaning and +should be rejected on validation stage. + +Notes for rte_flow +------------------ + +Flows are not cached in the driver. +When stopping a device port, all the flows created on this port from the +application will be flushed automatically in the background. +After stopping the device port, all flows on this port become invalid and +not represented in the system. +All references to these flows held by the application should be discarded +directly but neither destroyed nor flushed. + +The application should re-create the flows as required after the port restart. + +Notes for testpmd +----------------- + +Compared to librte_pmd_mlx4 that implements a single RSS configuration per +port, librte_pmd_mlx5 supports per-protocol RSS configuration. + +Since ``testpmd`` defaults to IP RSS mode and there is currently no +command-line parameter to enable additional protocols (UDP and TCP as well +as IP), the following commands must be entered from its CLI to get the same +behavior as librte_pmd_mlx4:: + + > port stop all + > port config all rss all + > port start all + +Usage example +------------- + +This section demonstrates how to launch **testpmd** with Mellanox +ConnectX-4/ConnectX-5/ConnectX-6/BlueField devices managed by librte_pmd_mlx5. + +#. Load the kernel modules:: + + modprobe -a ib_uverbs mlx5_core mlx5_ib + + Alternatively if MLNX_OFED/MLNX_EN is fully installed, the following script + can be run:: + + /etc/init.d/openibd restart + + .. note:: + + User space I/O kernel modules (uio and igb_uio) are not used and do + not have to be loaded. + +#. Make sure Ethernet interfaces are in working order and linked to kernel + verbs. Related sysfs entries should be present:: + + ls -d /sys/class/net/*/device/infiniband_verbs/uverbs* | cut -d / -f 5 + + Example output:: + + eth30 + eth31 + eth32 + eth33 + +#. Optionally, retrieve their PCI bus addresses for whitelisting:: + + { + for intf in eth2 eth3 eth4 eth5; + do + (cd "/sys/class/net/${intf}/device/" && pwd -P); + done; + } | + sed -n 's,.*/\(.*\),-w \1,p' + + Example output:: + + -w 0000:05:00.1 + -w 0000:06:00.0 + -w 0000:06:00.1 + -w 0000:05:00.0 + +#. Request huge pages:: + + echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages/nr_hugepages + +#. Start testpmd with basic parameters:: + + testpmd -l 8-15 -n 4 -w 05:00.0 -w 05:00.1 -w 06:00.0 -w 06:00.1 -- --rxq=2 --txq=2 -i + + Example output:: + + [...] + EAL: PCI device 0000:05:00.0 on NUMA socket 0 + EAL: probe driver: 15b3:1013 librte_pmd_mlx5 + PMD: librte_pmd_mlx5: PCI information matches, using device "mlx5_0" (VF: false) + PMD: librte_pmd_mlx5: 1 port(s) detected + PMD: librte_pmd_mlx5: port 1 MAC address is e4:1d:2d:e7:0c:fe + EAL: PCI device 0000:05:00.1 on NUMA socket 0 + EAL: probe driver: 15b3:1013 librte_pmd_mlx5 + PMD: librte_pmd_mlx5: PCI information matches, using device "mlx5_1" (VF: false) + PMD: librte_pmd_mlx5: 1 port(s) detected + PMD: librte_pmd_mlx5: port 1 MAC address is e4:1d:2d:e7:0c:ff + EAL: PCI device 0000:06:00.0 on NUMA socket 0 + EAL: probe driver: 15b3:1013 librte_pmd_mlx5 + PMD: librte_pmd_mlx5: PCI information matches, using device "mlx5_2" (VF: false) + PMD: librte_pmd_mlx5: 1 port(s) detected + PMD: librte_pmd_mlx5: port 1 MAC address is e4:1d:2d:e7:0c:fa + EAL: PCI device 0000:06:00.1 on NUMA socket 0 + EAL: probe driver: 15b3:1013 librte_pmd_mlx5 + PMD: librte_pmd_mlx5: PCI information matches, using device "mlx5_3" (VF: false) + PMD: librte_pmd_mlx5: 1 port(s) detected + PMD: librte_pmd_mlx5: port 1 MAC address is e4:1d:2d:e7:0c:fb + Interactive-mode selected + Configuring Port 0 (socket 0) + PMD: librte_pmd_mlx5: 0x8cba80: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx5: 0x8cba80: RX queues number update: 0 -> 2 + Port 0: E4:1D:2D:E7:0C:FE + Configuring Port 1 (socket 0) + PMD: librte_pmd_mlx5: 0x8ccac8: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx5: 0x8ccac8: RX queues number update: 0 -> 2 + Port 1: E4:1D:2D:E7:0C:FF + Configuring Port 2 (socket 0) + PMD: librte_pmd_mlx5: 0x8cdb10: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx5: 0x8cdb10: RX queues number update: 0 -> 2 + Port 2: E4:1D:2D:E7:0C:FA + Configuring Port 3 (socket 0) + PMD: librte_pmd_mlx5: 0x8ceb58: TX queues number update: 0 -> 2 + PMD: librte_pmd_mlx5: 0x8ceb58: RX queues number update: 0 -> 2 + Port 3: E4:1D:2D:E7:0C:FB + Checking link statuses... + Port 0 Link Up - speed 40000 Mbps - full-duplex + Port 1 Link Up - speed 40000 Mbps - full-duplex + Port 2 Link Up - speed 10000 Mbps - full-duplex + Port 3 Link Up - speed 10000 Mbps - full-duplex + Done + testpmd> + +How to dump flows +----------------- + +This section demonstrates how to dump flows. Currently, it's possible to dump +all flows with assistance of external tools. + +#. 2 ways to get flow raw file: + + - Using testpmd CLI: + + .. code-block:: console + + testpmd> flow dump + + - call rte_flow_dev_dump api: + + .. code-block:: console + + rte_flow_dev_dump(port, file, NULL); + +#. Dump human-readable flows from raw file: + + Get flow parsing tool from: https://github.com/Mellanox/mlx_steering_dump + + .. code-block:: console + + mlx_steering_dump.py -f diff --git a/src/spdk/dpdk/doc/guides/nics/mvneta.rst b/src/spdk/dpdk/doc/guides/nics/mvneta.rst new file mode 100644 index 000000000..c8b00ddf2 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/mvneta.rst @@ -0,0 +1,171 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Marvell International Ltd. + Copyright(c) 2018 Semihalf. + All rights reserved. + +MVNETA Poll Mode Driver +======================= + +The MVNETA PMD (librte_pmd_mvneta) provides poll mode driver support +for the Marvell NETA 1/2.5 Gbps adapter. + +Detailed information about SoCs that use PPv2 can be obtained here: + +* https://www.marvell.com/embedded-processors/armada-3700/ + +.. Note:: + + Due to external dependencies, this driver is disabled by default. It must + be enabled manually by setting relevant configuration option manually. + Please refer to `Config File Options`_ section for further details. + + +Features +-------- + +Features of the MVNETA PMD are: + +- Start/stop +- tx/rx_queue_setup +- tx/rx_burst +- Speed capabilities +- Jumbo frame +- MTU update +- Promiscuous mode +- Unicast MAC filter +- Link status +- CRC offload +- L3 checksum offload +- L4 checksum offload +- Packet type parsing +- Basic stats + + +Limitations +----------- + +- Flushing vlans added for filtering is not possible due to MUSDK missing + functionality. Current workaround is to reset board so that NETA has a + chance to start in a sane state. + +Prerequisites +------------- + +- Custom Linux Kernel sources + + .. code-block:: console + + git clone https://github.com/MarvellEmbeddedProcessors/linux-marvell.git -b linux-4.4.120-armada-18.09 + + +- MUSDK (Marvell User-Space SDK) sources + + .. code-block:: console + + git clone https://github.com/MarvellEmbeddedProcessors/musdk-marvell.git -b musdk-armada-18.09 + + MUSDK is a light-weight library that provides direct access to Marvell's + NETA. Alternatively prebuilt MUSDK library can be + requested from `Marvell Extranet `_. Once + approval has been granted, library can be found by typing ``musdk`` in + the search box. + + MUSDK must be configured with the following features: + + .. code-block:: console + + --enable-pp2=no --enable-neta + +- DPDK environment + + Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup + DPDK environment. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_MVNETA_PMD`` (default ``n``) + + Toggle compilation of the librte_pmd_mvneta driver. + +Runtime options +~~~~~~~~~~~~~~~ + +The following ``devargs`` options can be enabled at runtime. They must +be passed as part of EAL arguments. + +- ``iface`` (mandatory, with no default value) + + The name of port (owned by MUSDK) that should be enabled in DPDK. + This options can be repeated resulting in a list of ports to be + enabled. For instance below will enable ``eth0`` and ``eth1`` ports. + +.. code-block:: console + + ./testpmd --vdev=net_mvneta,iface=eth0,iface=eth1 \ + -c 3 -- -i --p 3 -a + + +Building DPDK +------------- + +Driver needs precompiled MUSDK library during compilation. + +.. code-block:: console + + export CROSS_COMPILE=/bin/aarch64-linux-gnu- + ./bootstrap + ./configure --host=aarch64-linux-gnu --enable-pp2=no --enable-neta + make install + +MUSDK will be installed to `usr/local` under current directory. +For the detailed build instructions please consult ``doc/musdk_get_started.txt``. + +Before the DPDK build process the environmental variable ``LIBMUSDK_PATH`` with +the path to the MUSDK installation directory needs to be exported. + +.. code-block:: console + + export LIBMUSDK_PATH=/usr/local + export CROSS=aarch64-linux-gnu- + make config T=arm64-armv8a-linux-gcc + sed -ri 's,(MVNETA_PMD=)n,\1y,' build/.config + make + +Usage Example +------------- + +MVNETA PMD requires extra out of tree kernel modules to function properly. +`musdk_uio` and `mv_neta_uio` sources are part of the MUSDK. Please consult +``doc/musdk_get_started.txt`` for the detailed build instructions. + +.. code-block:: console + + insmod musdk_uio.ko + insmod mv_neta_uio.ko + +Additionally interfaces used by DPDK application need to be put up: + +.. code-block:: console + + ip link set eth0 up + ip link set eth1 up + +In order to run testpmd example application following command can be used: + +.. code-block:: console + + ./testpmd --vdev=net_mvneta,iface=eth0,iface=eth1 -c 3 -- \ + -i --p 3 -a --txd 256 --rxd 128 --rxq=1 --txq=1 --nb-cores=1 + + +In order to run l2fwd example application following command can be used: + +.. code-block:: console + + ./l2fwd --vdev=net_mvneta,iface=eth0,iface=eth1 -c 3 -- -T 1 -p 3 diff --git a/src/spdk/dpdk/doc/guides/nics/mvpp2.rst b/src/spdk/dpdk/doc/guides/nics/mvpp2.rst new file mode 100644 index 000000000..19cab1cbc --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/mvpp2.rst @@ -0,0 +1,785 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Marvell International Ltd. + Copyright(c) 2017 Semihalf. + +.. _mvpp2_poll_mode_driver: + +MVPP2 Poll Mode Driver +====================== + +The MVPP2 PMD (librte_pmd_mvpp2) provides poll mode driver support +for the Marvell PPv2 (Packet Processor v2) 1/10 Gbps adapter. + +Detailed information about SoCs that use PPv2 can be obtained here: + +* https://www.marvell.com/embedded-processors/armada-70xx/ +* https://www.marvell.com/embedded-processors/armada-80xx/ + +.. Note:: + + Due to external dependencies, this driver is disabled by default. It must + be enabled manually by setting relevant configuration option manually. + Please refer to `Config File Options`_ section for further details. + + +Features +-------- + +Features of the MVPP2 PMD are: + +- Speed capabilities +- Link status +- Tx Queue start/stop +- MTU update +- Jumbo frame +- Promiscuous mode +- Allmulticast mode +- Unicast MAC filter +- Multicast MAC filter +- RSS hash +- VLAN filter +- CRC offload +- L3 checksum offload +- L4 checksum offload +- Packet type parsing +- Basic stats +- :ref:`Extended stats ` +- RX flow control +- Scattered TX frames +- :ref:`QoS ` +- :ref:`Flow API ` +- :ref:`Traffic metering and policing ` +- :ref:`Traffic Management API ` + +Limitations +----------- + +- Number of lcores is limited to 9 by MUSDK internal design. If more lcores + need to be allocated, locking will have to be considered. Number of available + lcores can be changed via ``MRVL_MUSDK_HIFS_RESERVED`` define in + ``mrvl_ethdev.c`` source file. + +- Flushing vlans added for filtering is not possible due to MUSDK missing + functionality. Current workaround is to reset board so that PPv2 has a + chance to start in a sane state. + +- MUSDK architecture does not support changing configuration in run time. + All necessary configurations should be done before first dev_start(). + +- RX queue start/stop is not supported. + +- Current implementation does not support replacement of buffers in the HW buffer pool + at run time, so it is responsibility of the application to ensure that MTU does not exceed the configured buffer size. + +- Configuring TX flow control currently is not supported. + +- In current implementation, mechanism for acknowledging transmitted packets (``tx_done_cleanup``) is not supported. + +- Running more than one DPDK-MUSDK application simultaneously is not supported. + + +Prerequisites +------------- + +- Custom Linux Kernel sources + + .. code-block:: console + + git clone https://github.com/MarvellEmbeddedProcessors/linux-marvell.git -b linux-4.4.120-armada-18.09 + +- Out of tree `mvpp2x_sysfs` kernel module sources + + .. code-block:: console + + git clone https://github.com/MarvellEmbeddedProcessors/mvpp2x-marvell.git -b mvpp2x-armada-18.09 + +- MUSDK (Marvell User-Space SDK) sources + + .. code-block:: console + + git clone https://github.com/MarvellEmbeddedProcessors/musdk-marvell.git -b musdk-armada-18.09 + + MUSDK is a light-weight library that provides direct access to Marvell's + PPv2 (Packet Processor v2). Alternatively prebuilt MUSDK library can be + requested from `Marvell Extranet `_. Once + approval has been granted, library can be found by typing ``musdk`` in + the search box. + + To get better understanding of the library one can consult documentation + available in the ``doc`` top level directory of the MUSDK sources. + +- DPDK environment + + Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup + DPDK environment. + + +Config File Options +------------------- + +The following options can be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_MVPP2_PMD`` (default ``n``) + + Toggle compilation of the librte mvpp2 driver. + + .. Note:: + + When MVPP2 PMD is enabled ``CONFIG_RTE_LIBRTE_MVNETA_PMD`` must be disabled + + +Building DPDK +------------- + +Driver needs precompiled MUSDK library during compilation. + +.. code-block:: console + + export CROSS_COMPILE=/bin/aarch64-linux-gnu- + ./bootstrap + ./configure --host=aarch64-linux-gnu + make install + +MUSDK will be installed to `usr/local` under current directory. +For the detailed build instructions please consult ``doc/musdk_get_started.txt``. + +Before the DPDK build process the environmental variable ``LIBMUSDK_PATH`` with +the path to the MUSDK installation directory needs to be exported. + +For additional instructions regarding DPDK cross compilation please refer to :doc:`Cross compile DPDK for ARM64 <../linux_gsg/cross_build_dpdk_for_arm64>`. + +.. code-block:: console + + export LIBMUSDK_PATH=/usr/local + export CROSS=/bin/aarch64-linux-gnu- + export RTE_KERNELDIR= + export RTE_TARGET=arm64-armv8a-linux-gcc + + make config T=arm64-armv8a-linux-gcc + sed -i "s/MVNETA_PMD=y/MVNETA_PMD=n/" build/.config + sed -i "s/MVPP2_PMD=n/MVPP2_PMD=y/" build/.config + make + +Usage Example +------------- + +MVPP2 PMD requires extra out of tree kernel modules to function properly. +`musdk_cma` sources are part of the MUSDK. Please consult +``doc/musdk_get_started.txt`` for the detailed build instructions. +For `mvpp2x_sysfs` please consult ``Documentation/pp22_sysfs.txt`` for the +detailed build instructions. + +.. code-block:: console + + insmod musdk_cma.ko + insmod mvpp2x_sysfs.ko + +Additionally interfaces used by DPDK application need to be put up: + +.. code-block:: console + + ip link set eth0 up + ip link set eth2 up + +In order to run testpmd example application following command can be used: + +.. code-block:: console + + ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2 -c 7 -- \ + --burst=128 --txd=2048 --rxd=1024 --rxq=2 --txq=2 --nb-cores=2 \ + -i -a --rss-udp + +.. _extstats: + +Extended stats +-------------- + +MVPP2 PMD supports the following extended statistics: + + - ``rx_bytes``: number of RX bytes + - ``rx_packets``: number of RX packets + - ``rx_unicast_packets``: number of RX unicast packets + - ``rx_errors``: number of RX MAC errors + - ``rx_fullq_dropped``: number of RX packets dropped due to full RX queue + - ``rx_bm_dropped``: number of RX packets dropped due to no available buffers in the HW pool + - ``rx_early_dropped``: number of RX packets that were early dropped + - ``rx_fifo_dropped``: number of RX packets dropped due to RX fifo overrun + - ``rx_cls_dropped``: number of RX packets dropped by classifier + - ``tx_bytes``: number of TX bytes + - ``tx_packets``: number of TX packets + - ``tx_unicast_packets``: number of TX unicast packets + - ``tx_errors``: number of TX MAC errors + + +.. _qossupport: + +QoS Configuration +----------------- + +QoS configuration is done through external configuration file. Path to the +file must be given as `cfg` in driver's vdev parameter list. + +Configuration syntax +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: console + + [policer ] + token_unit = + color = + cir = + ebs = + cbs = + + [port default] + default_tc = + mapping_priority = + + rate_limit_enable = + rate_limit = + burst_size = + + default_policer = + + [port tc ] + rxq = + pcp = + dscp = + default_color = + + [port tc ] + rxq = + pcp = + dscp = + + [port txq ] + sched_mode = + wrr_weight = + + rate_limit_enable = + rate_limit = + burst_size = + +Where: + +- ````: DPDK Port number (0..n). + +- ````: Default traffic class (e.g. 0) + +- ````: QoS priority for mapping (`ip`, `vlan`, `ip/vlan` or `vlan/ip`). + +- ````: Traffic Class to be configured. + +- ````: List of DPDK RX queues (e.g. 0 1 3-4) + +- ````: List of PCP values to handle in particular TC (e.g. 0 1 3-4 7). + +- ````: List of DSCP values to handle in particular TC (e.g. 0-12 32-48 63). + +- ````: Id of the policer configuration section to be used as default. + +- ````: Id of the policer configuration section (0..31). + +- ````: Policer token unit (`bytes` or `packets`). + +- ````: Policer color mode (`aware` or `blind`). + +- ````: Committed information rate in unit of kilo bits per second (data rate) or packets per second. + +- ````: Committed burst size in unit of kilo bytes or number of packets. + +- ````: Excess burst size in unit of kilo bytes or number of packets. + +- ````: Default color for specific tc. + +- ````: Enables per port or per txq rate limiting (`0`/`1` to disable/enable). + +- ````: Committed information rate, in kilo bits per second. + +- ````: Committed burst size, in kilo bytes. + +- ````: Egress scheduler mode (`wrr` or `sp`). + +- ````: Txq weight. + +Setting PCP/DSCP values for the default TC is not required. All PCP/DSCP +values not assigned explicitly to particular TC will be handled by the +default TC. + +Configuration file example +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: console + + [policer 0] + token_unit = bytes + color = blind + cir = 100000 + ebs = 64 + cbs = 64 + + [port 0 default] + default_tc = 0 + mapping_priority = ip + + rate_limit_enable = 1 + rate_limit = 1000 + burst_size = 2000 + + [port 0 tc 0] + rxq = 0 1 + + [port 0 txq 0] + sched_mode = wrr + wrr_weight = 10 + + [port 0 txq 1] + sched_mode = wrr + wrr_weight = 100 + + [port 0 txq 2] + sched_mode = sp + + [port 0 tc 1] + rxq = 2 + pcp = 5 6 7 + dscp = 26-38 + + [port 1 default] + default_tc = 0 + mapping_priority = vlan/ip + + default_policer = 0 + + [port 1 tc 0] + rxq = 0 + dscp = 10 + + [port 1 tc 1] + rxq = 1 + dscp = 11-20 + + [port 1 tc 2] + rxq = 2 + dscp = 30 + + [port 1 txq 0] + rate_limit_enable = 1 + rate_limit = 10000 + burst_size = 2000 + +Usage example +^^^^^^^^^^^^^ + +.. code-block:: console + + ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2,cfg=/home/user/mrvl.conf \ + -c 7 -- -i -a --disable-hw-vlan-strip --rxq=3 --txq=3 + +.. _flowapi: + +Flow API +-------- + +PPv2 offers packet classification capabilities via classifier engine which +can be configured via generic flow API offered by DPDK. + +The :ref:`flow_isolated_mode` is supported. + +For an additional description please refer to DPDK :doc:`../prog_guide/rte_flow`. + +Supported flow actions +~~~~~~~~~~~~~~~~~~~~~~ + +Following flow action items are supported by the driver: + +* DROP +* QUEUE + +Supported flow items +~~~~~~~~~~~~~~~~~~~~ + +Following flow items and their respective fields are supported by the driver: + +* ETH + + * source MAC + * destination MAC + * ethertype + +* VLAN + + * PCP + * VID + +* IPV4 + + * DSCP + * protocol + * source address + * destination address + +* IPV6 + + * flow label + * next header + * source address + * destination address + +* UDP + + * source port + * destination port + +* TCP + + * source port + * destination port + +Classifier match engine +~~~~~~~~~~~~~~~~~~~~~~~ + +Classifier has an internal match engine which can be configured to +operate in either exact or maskable mode. + +Mode is selected upon creation of the first unique flow rule as follows: + +* maskable, if key size is up to 8 bytes. +* exact, otherwise, i.e for keys bigger than 8 bytes. + +Where the key size equals the number of bytes of all fields specified +in the flow items. + +.. table:: Examples of key size calculation + + +----------------------------------------------------------------------------+-------------------+-------------+ + | Flow pattern | Key size in bytes | Used engine | + +============================================================================+===================+=============+ + | ETH (destination MAC) / VLAN (VID) | 6 + 2 = 8 | Maskable | + +----------------------------------------------------------------------------+-------------------+-------------+ + | VLAN (VID) / IPV4 (source address) | 2 + 4 = 6 | Maskable | + +----------------------------------------------------------------------------+-------------------+-------------+ + | TCP (source port, destination port) | 2 + 2 = 4 | Maskable | + +----------------------------------------------------------------------------+-------------------+-------------+ + | VLAN (priority) / IPV4 (source address) | 1 + 4 = 5 | Maskable | + +----------------------------------------------------------------------------+-------------------+-------------+ + | IPV4 (destination address) / UDP (source port, destination port) | 6 + 2 + 2 = 10 | Exact | + +----------------------------------------------------------------------------+-------------------+-------------+ + | VLAN (VID) / IPV6 (flow label, destination address) | 2 + 3 + 16 = 21 | Exact | + +----------------------------------------------------------------------------+-------------------+-------------+ + | IPV4 (DSCP, source address, destination address) | 1 + 4 + 4 = 9 | Exact | + +----------------------------------------------------------------------------+-------------------+-------------+ + | IPV6 (flow label, source address, destination address) | 3 + 16 + 16 = 35 | Exact | + +----------------------------------------------------------------------------+-------------------+-------------+ + +From the user perspective maskable mode means that masks specified +via flow rules are respected. In case of exact match mode, masks +which do not provide exact matching (all bits masked) are ignored. + +If the flow matches more than one classifier rule the first +(with the lowest index) matched takes precedence. + +Flow rules usage example +~~~~~~~~~~~~~~~~~~~~~~~~ + +Before proceeding run testpmd user application: + +.. code-block:: console + + ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2 -c 3 -- -i --p 3 -a --disable-hw-vlan-strip + +Example #1 +^^^^^^^^^^ + +.. code-block:: console + + testpmd> flow create 0 ingress pattern eth src is 10:11:12:13:14:15 / end actions drop / end + +In this case key size is 6 bytes thus maskable type is selected. Testpmd +will set mask to ff:ff:ff:ff:ff:ff i.e traffic explicitly matching +above rule will be dropped. + +Example #2 +^^^^^^^^^^ + +.. code-block:: console + + testpmd> flow create 0 ingress pattern ipv4 src spec 10.10.10.0 src mask 255.255.255.0 / tcp src spec 0x10 src mask 0x10 / end action drop / end + +In this case key size is 8 bytes thus maskable type is selected. +Flows which have IPv4 source addresses ranging from 10.10.10.0 to 10.10.10.255 +and tcp source port set to 16 will be dropped. + +Example #3 +^^^^^^^^^^ + +.. code-block:: console + + testpmd> flow create 0 ingress pattern vlan vid spec 0x10 vid mask 0x10 / ipv4 src spec 10.10.1.1 src mask 255.255.0.0 dst spec 11.11.11.1 dst mask 255.255.255.0 / end actions drop / end + +In this case key size is 10 bytes thus exact type is selected. +Even though each item has partial mask set, masks will be ignored. +As a result only flows with VID set to 16 and IPv4 source and destination +addresses set to 10.10.1.1 and 11.11.11.1 respectively will be dropped. + +Limitations +~~~~~~~~~~~ + +Following limitations need to be taken into account while creating flow rules: + +* For IPv4 exact match type the key size must be up to 12 bytes. +* For IPv6 exact match type the key size must be up to 36 bytes. +* Following fields cannot be partially masked (all masks are treated as + if they were exact): + + * ETH: ethertype + * VLAN: PCP, VID + * IPv4: protocol + * IPv6: next header + * TCP/UDP: source port, destination port + +* Only one classifier table can be created thus all rules in the table + have to match table format. Table format is set during creation of + the first unique flow rule. +* Up to 5 fields can be specified per flow rule. +* Up to 20 flow rules can be added. + +For additional information about classifier please consult +``doc/musdk_cls_user_guide.txt``. + +.. _mtrapi: + +Traffic metering and policing +----------------------------- + +MVPP2 PMD supports DPDK traffic metering and policing that allows the following: + +1. Meter ingress traffic. +2. Do policing. +3. Gather statistics. + +For an additional description please refer to DPDK :doc:`Traffic Metering and Policing API <../prog_guide/traffic_metering_and_policing>`. + +The policer objects defined by this feature can work with the default policer defined via config file as described in :ref:`QoS Support `. + +Limitations +~~~~~~~~~~~ + +The following capabilities are not supported: + +- MTR object meter DSCP table update +- MTR object policer action update +- MTR object enabled statistics + +Usage example +~~~~~~~~~~~~~ + +1. Run testpmd user app: + + .. code-block:: console + + ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2 -c 6 -- -i -p 3 -a --txd 1024 --rxd 1024 + +2. Create meter profile: + + .. code-block:: console + + testpmd> add port meter profile 0 0 srtcm_rfc2697 2000 256 256 + +3. Create meter: + + .. code-block:: console + + testpmd> create port meter 0 0 0 yes d d d 0 1 0 + +4. Create flow rule witch meter attached: + + .. code-block:: console + + testpmd> flow create 0 ingress pattern ipv4 src is 10.10.10.1 / end actions meter mtr_id 0 / end + +For a detailed usage description please refer to "Traffic Metering and Policing" section in DPDK :doc:`Testpmd Runtime Functions <../testpmd_app_ug/testpmd_funcs>`. + + + +.. _tmapi: + +Traffic Management API +---------------------- + +MVPP2 PMD supports generic DPDK Traffic Management API which allows to +configure the following features: + +1. Hierarchical scheduling +2. Traffic shaping +3. Congestion management +4. Packet marking + +Internally TM is represented by a hierarchy (tree) of nodes. +Node which has a parent is called a leaf whereas node without +parent is called a non-leaf (root). +MVPP2 PMD supports two level hierarchy where level 0 represents ports and level 1 represents tx queues of a given port. + +.. figure:: img/mvpp2_tm.* + +Nodes hold following types of settings: + +- for egress scheduler configuration: weight +- for egress rate limiter: private shaper +- bitmask indicating which statistics counters will be read + +Hierarchy is always constructed from the top, i.e first a root node is added +then some number of leaf nodes. Number of leaf nodes cannot exceed number +of configured tx queues. + +After hierarchy is complete it can be committed. + + +For an additional description please refer to DPDK :doc:`Traffic Management API <../prog_guide/traffic_management>`. + +Limitations +~~~~~~~~~~~ + +The following capabilities are not supported: + +- Traffic manager WRED profile and WRED context +- Traffic manager shared shaper update +- Traffic manager packet marking +- Maximum number of levels in hierarchy is 2 +- Currently dynamic change of a hierarchy is not supported + +Usage example +~~~~~~~~~~~~~ + +For a detailed usage description please refer to "Traffic Management" section in DPDK :doc:`Testpmd Runtime Functions <../testpmd_app_ug/testpmd_funcs>`. + +1. Run testpmd as follows: + + .. code-block:: console + + ./testpmd --vdev=net_mrvl,iface=eth0,iface=eth2,cfg=./qos_config -c 7 -- \ + -i -p 3 --disable-hw-vlan-strip --rxq 3 --txq 3 --txd 1024 --rxd 1024 + +2. Stop all ports: + + .. code-block:: console + + testpmd> port stop all + +3. Add shaper profile: + + .. code-block:: console + + testpmd> add port tm node shaper profile 0 0 900000 70000 0 + + Parameters have following meaning:: + + 0 - Id of a port. + 0 - Id of a new shaper profile. + 900000 - Shaper rate in bytes/s. + 70000 - Bucket size in bytes. + 0 - Packet length adjustment - ignored. + +4. Add non-leaf node for port 0: + + .. code-block:: console + + testpmd> add port tm nonleaf node 0 3 -1 0 0 0 0 0 1 3 0 + + Parameters have following meaning:: + + 0 - Id of a port + 3 - Id of a new node. + -1 - Indicate that root does not have a parent. + 0 - Priority of the node. + 0 - Weight of the node. + 0 - Id of a level. Since this is a root 0 is passed. + 0 - Id of the shaper profile. + 0 - Number of SP priorities. + 3 - Enable statistics for both number of transmitted packets and bytes. + 0 - Number of shared shapers. + +5. Add leaf node for tx queue 0: + + .. code-block:: console + + testpmd> add port tm leaf node 0 0 3 0 30 1 -1 0 0 1 0 + + Parameters have following meaning:: + + 0 - Id of a port. + 0 - Id of a new node. + 3 - Id of the parent node. + 0 - Priority of a node. + 30 - WRR weight. + 1 - Id of a level. Since this is a leaf node 1 is passed. + -1 - Id of a shaper. -1 indicates that shaper is not attached. + 0 - Congestion management is not supported. + 0 - Congestion management is not supported. + 1 - Enable statistics counter for number of transmitted packets. + 0 - Number of shared shapers. + +6. Add leaf node for tx queue 1: + + .. code-block:: console + + testpmd> add port tm leaf node 0 1 3 0 60 1 -1 0 0 1 0 + + Parameters have following meaning:: + + 0 - Id of a port. + 1 - Id of a new node. + 3 - Id of the parent node. + 0 - Priority of a node. + 60 - WRR weight. + 1 - Id of a level. Since this is a leaf node 1 is passed. + -1 - Id of a shaper. -1 indicates that shaper is not attached. + 0 - Congestion management is not supported. + 0 - Congestion management is not supported. + 1 - Enable statistics counter for number of transmitted packets. + 0 - Number of shared shapers. + +7. Add leaf node for tx queue 2: + + .. code-block:: console + + testpmd> add port tm leaf node 0 2 3 0 99 1 -1 0 0 1 0 + + Parameters have following meaning:: + + 0 - Id of a port. + 2 - Id of a new node. + 3 - Id of the parent node. + 0 - Priority of a node. + 99 - WRR weight. + 1 - Id of a level. Since this is a leaf node 1 is passed. + -1 - Id of a shaper. -1 indicates that shaper is not attached. + 0 - Congestion management is not supported. + 0 - Congestion management is not supported. + 1 - Enable statistics counter for number of transmitted packets. + 0 - Number of shared shapers. + +8. Commit hierarchy: + + .. code-block:: console + + testpmd> port tm hierarchy commit 0 no + + Parameters have following meaning:: + + 0 - Id of a port. + no - Do not flush TM hierarchy if commit fails. + +9. Start all ports + + .. code-block:: console + + testpmd> port start all + + + +10. Enable forwarding + + .. code-block:: console + + testpmd> start diff --git a/src/spdk/dpdk/doc/guides/nics/netvsc.rst b/src/spdk/dpdk/doc/guides/nics/netvsc.rst new file mode 100644 index 000000000..6dbb9a551 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/netvsc.rst @@ -0,0 +1,118 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) Microsoft Corporation. + +Netvsc poll mode driver +======================= + +The Netvsc Poll Mode driver (PMD) provides support for the paravirtualized +network device for Microsoft Hyper-V. It can be used with +Window Server 2008/2012/2016, Windows 10. +The device offers multi-queue support (if kernel and host support it), +checksum and segmentation offloads. + + +Features and Limitations of Hyper-V PMD +--------------------------------------- + +In this release, the hyper PMD driver provides the basic functionality of packet reception and transmission. + +* It supports merge-able buffers per packet when receiving packets and scattered buffer per packet + when transmitting packets. The packet size supported is from 64 to 65536. + +* The PMD supports multicast packets and promiscuous mode subject to restrictions on the host. + In order to this to work, the guest network configuration on Hyper-V must be configured to allow MAC address + spoofing. + +* The device has only a single MAC address. + Hyper-V driver does not support MAC or VLAN filtering because the Hyper-V host does not support it. + +* VLAN tags are always stripped and presented in mbuf tci field. + +* The Hyper-V driver does not use or support interrupts. Link state change + callback is done via change events in the packet ring. + +* The maximum number of queues is limited by the host (currently 64). + When used with 4.16 kernel only a single queue is available. + +* This driver supports SR-IOV network acceleration. + If SR-IOV is enabled then the driver will transparently manage the interface, + and send and receive packets using the VF path. + The VDEV_NETVSC and FAILSAFE drivers are *not* used when using netvsc PMD. + +Installation +------------ + +The Netvsc PMD is a standalone driver, similar to virtio and vmxnet3. +Using Netvsc PMD requires that the associated VMBUS device be bound to the userspace +I/O device driver for Hyper-V (uio_hv_generic). By default, all netvsc devices +will be bound to the Linux kernel driver; in order to use netvsc PMD the +device must first be overridden. + +The first step is to identify the network device to override. +VMBUS uses Universal Unique Identifiers +(`UUID`_) to identify devices on the bus similar to how PCI uses Domain:Bus:Function. +The UUID associated with a Linux kernel network device can be determined +by looking at the sysfs information. To find the UUID for eth1 and +store it in a shell variable: + + .. code-block:: console + + DEV_UUID=$(basename $(readlink /sys/class/net/eth1/device)) + + +.. _`UUID`: https://en.wikipedia.org/wiki/Universally_unique_identifier + +There are several possible ways to assign the uio device driver for a device. +The easiest way (but only on 4.18 or later) +is to use the `driverctl Device Driver control utility`_ to override +the normal kernel device. + + .. code-block:: console + + driverctl -b vmbus set-override $DEV_UUID uio_hv_generic + +.. _`driverctl Device Driver control utility`: https://gitlab.com/driverctl/driverctl + +Any settings done with driverctl are by default persistent and will be reapplied +on reboot. + +On older kernels, the same effect can be had by manual sysfs bind and unbind +operations: + + .. code-block:: console + + NET_UUID="f8615163-df3e-46c5-913f-f2d2f965ed0e" + modprobe uio_hv_generic + echo $NET_UUID > /sys/bus/vmbus/drivers/uio_hv_generic/new_id + echo $DEV_UUID > /sys/bus/vmbus/drivers/hv_netvsc/unbind + echo $DEV_UUID > /sys/bus/vmbus/drivers/uio_hv_generic/bind + +.. Note:: + + The dpdk-devbind.py script can not be used since it only handles PCI devices. + + +Prerequisites +------------- + +The following prerequisites apply: + +* Linux kernel support for UIO on vmbus is done with the uio_hv_generic driver. + Full support of multiple queues requires the 4.17 kernel. It is possible + to use the netvsc PMD with 4.16 kernel but it is limited to a single queue. + + +Netvsc PMD arguments +-------------------- + +The user can specify below argument in devargs. + +#. ``latency``: + + A netvsc device uses a mailbox page to indicate to the host that there + is something in the transmit queue. The host scans this page at a + periodic interval. This parameter allows adjusting the value that + is used by the host. Smaller values improve transmit latency, and larger + values save CPU cycles. This parameter is in microseconds. + If the value is too large or too small it will be + ignored by the host. (Default: 50) diff --git a/src/spdk/dpdk/doc/guides/nics/nfb.rst b/src/spdk/dpdk/doc/guides/nics/nfb.rst new file mode 100644 index 000000000..10f33a025 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/nfb.rst @@ -0,0 +1,164 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2019 Cesnet + Copyright 2019 Netcope Technologies + +NFB poll mode driver library +================================= + +The NFB poll mode driver library implements support for the Netcope +FPGA Boards (**NFB-40G2, NFB-100G2, NFB-200G2QL**) and Silicom **FB2CGG3** card, +FPGA-based programmable NICs. The NFB PMD uses interface provided by the libnfb +library to communicate with these cards over the nfb layer. + +More information about the +`NFB cards `_ +and used technology +(`Netcope Development Kit `_) +can be found on the `Netcope Technologies website `_. + +.. note:: + + This driver has external dependencies. + Therefore it is disabled in default configuration files. + It can be enabled by setting ``CONFIG_RTE_LIBRTE_NFB_PMD=y`` + and recompiling. + +.. note:: + + Currently the driver is supported only on x86_64 architectures. + Only x86_64 versions of the external libraries are provided. + +Prerequisites +------------- + +This PMD requires kernel modules which are responsible for initialization and +allocation of resources needed for nfb layer function. +Communication between PMD and kernel modules is mediated by libnfb library. +These kernel modules and library are not part of DPDK and must be installed +separately: + +* **libnfb library** + + The library provides API for initialization of nfb transfers, receiving and + transmitting data segments. + +* **Kernel modules** + + * nfb + + Kernel modules manage initialization of hardware, allocation and + sharing of resources for user space applications. + +Dependencies can be found here: +`Netcope common `_. + +Versions of the packages +~~~~~~~~~~~~~~~~~~~~~~~~ + +The minimum version of the provided packages: + +* for DPDK from 19.05 + +Configuration +------------- + +These configuration options can be modified before compilation in the +``.config`` file: + +* ``CONFIG_RTE_LIBRTE_NFB_PMD`` default value: **n** + + Value **y** enables compilation of nfb PMD. + + +Timestamps + +The PMD supports hardware timestamps of frame receipt on physical network interface. In order to use +the timestamps, the hardware timestamping unit must be enabled (follow the documentation of the NFB +products) and the device argument `timestamp=1` must be used. + +.. code-block:: console + + $RTE_TARGET/app/testpmd -w b3:00.0,timestamp=1 -- + +When the timestamps are enabled with the *devarg*, a timestamp validity flag is set in the MBUFs +containing received frames and timestamp is inserted into the `rte_mbuf` struct. + +The timestamp is an `uint64_t` field. Its lower 32 bits represent *seconds* portion of the timestamp +(number of seconds elapsed since 1.1.1970 00:00:00 UTC) and its higher 32 bits represent +*nanosecond* portion of the timestamp (number of nanoseconds elapsed since the beginning of the +second in the *seconds* portion. + + +Using the NFB PMD +---------------------- + +Kernel modules have to be loaded before running the DPDK application. + +NFB card architecture +--------------------- + +The NFB cards are multi-port multi-queue cards, where (generally) data from any +Ethernet port may be sent to any queue. +They are represented in DPDK as a single port. + +NFB-200G2QL card employs an add-on cable which allows to connect it to two +physical PCI-E slots at the same time (see the diagram below). +This is done to allow 200 Gbps of traffic to be transferred through the PCI-E +bus (note that a single PCI-E 3.0 x16 slot provides only 125 Gbps theoretical +throughput). + +Although each slot may be connected to a different CPU and therefore to a different +NUMA node, the card is represented as a single port in DPDK. To work with data +from the individual queues on the right NUMA node, connection of NUMA nodes on +first and last queue (each NUMA node has half of the queues) need to be checked. + +.. figure:: img/szedata2_nfb200g_architecture.* + :align: center + + NFB-200G2QL high-level diagram + +Limitations +----------- + +Driver is usable only on Linux architecture, namely on CentOS. + +Since a card is always represented as a single port, but can be connected to two +NUMA nodes, there is need for manual check where master/slave is connected. + +Example of usage +---------------- + +Read packets from 0. and 1. receive queue and write them to 0. and 1. +transmit queue: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 2 \ + -- --port-topology=chained --rxq=2 --txq=2 --nb-cores=2 -i -a + +Example output: + +.. code-block:: console + + [...] + EAL: PCI device 0000:06:00.0 on NUMA socket -1 + EAL: probe driver: 1b26:c1c1 net_nfb + PMD: Initializing NFB device (0000:06:00.0) + PMD: Available DMA queues RX: 8 TX: 8 + PMD: NFB device (0000:06:00.0) successfully initialized + Interactive-mode selected + Auto-start selected + Configuring Port 0 (socket 0) + Port 0: 00:11:17:00:00:00 + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Done + Start automatic packet forwarding + io packet forwarding - CRC stripping disabled - packets/burst=32 + nb forwarding cores=2 - nb forwarding ports=1 + RX queues=2 - RX desc=128 - RX free threshold=0 + RX threshold registers: pthresh=0 hthresh=0 wthresh=0 + TX queues=2 - TX desc=512 - TX free threshold=0 + TX threshold registers: pthresh=0 hthresh=0 wthresh=0 + TX RS bit threshold=0 - TXQ flags=0x0 + testpmd> diff --git a/src/spdk/dpdk/doc/guides/nics/nfp.rst b/src/spdk/dpdk/doc/guides/nics/nfp.rst new file mode 100644 index 000000000..5f2a0698f --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/nfp.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2015-2017 Netronome Systems, Inc. All rights reserved. + All rights reserved. + +NFP poll mode driver library +============================ + +Netronome's sixth generation of flow processors pack 216 programmable +cores and over 100 hardware accelerators that uniquely combine packet, +flow, security and content processing in a single device that scales +up to 400-Gb/s. + +This document explains how to use DPDK with the Netronome Poll Mode +Driver (PMD) supporting Netronome's Network Flow Processor 6xxx +(NFP-6xxx) and Netronome's Flow Processor 4xxx (NFP-4xxx). + +NFP is a SRIOV capable device and the PMD driver supports the physical +function (PF) and the virtual functions (VFs). + +Dependencies +------------ + +Before using the Netronome's DPDK PMD some NFP configuration, +which is not related to DPDK, is required. The system requires +installation of **Netronome's BSP (Board Support Package)** along +with a specific NFP firmware application. Netronome's NSP ABI +version should be 0.20 or higher. + +If you have a NFP device you should already have the code and +documentation for this configuration. Contact +**support@netronome.com** to obtain the latest available firmware. + +The NFP Linux netdev kernel driver for VFs has been a part of the +vanilla kernel since kernel version 4.5, and support for the PF +since kernel version 4.11. Support for older kernels can be obtained +on Github at +**https://github.com/Netronome/nfp-drv-kmods** along with the build +instructions. + +NFP PMD needs to be used along with UIO ``igb_uio`` or VFIO (``vfio-pci``) +Linux kernel driver. + +Building the software +--------------------- + +Netronome's PMD code is provided in the **drivers/net/nfp** directory. +Although NFP PMD has Netronome´s BSP dependencies, it is possible to +compile it along with other DPDK PMDs even if no BSP was installed previously. +Of course, a DPDK app will require such a BSP installed for using the +NFP PMD, along with a specific NFP firmware application. + +Default PMD configuration is at the **common_linux configuration** file: + +- **CONFIG_RTE_LIBRTE_NFP_PMD=y** + +Once the DPDK is built all the DPDK apps and examples include support for +the NFP PMD. + + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +Using the PF +------------ + +NFP PMD supports using the NFP PF as another DPDK port, but it does not +have any functionality for controlling VFs. In fact, it is not possible to use +the PMD with the VFs if the PF is being used by DPDK, that is, with the NFP PF +bound to ``igb_uio`` or ``vfio-pci`` kernel drivers. Future DPDK versions will +have a PMD able to work with the PF and VFs at the same time and with the PF +implementing VF management along with other PF-only functionalities/offloads. + +The PMD PF has extra work to do which will delay the DPDK app initialization +like uploading the firmware and configure the Link state properly when starting or +stopping a PF port. Since DPDK 18.05 the firmware upload happens when +a PF is initialized, which was not always true with older DPDK versions. + +Depending on the Netronome product installed in the system, firmware files +should be available under ``/lib/firmware/netronome``. DPDK PMD supporting the +PF looks for a firmware file in this order: + + 1) First try to find a firmware image specific for this device using the + NFP serial number: + + serial-00-15-4d-12-20-65-10-ff.nffw + + 2) Then try the PCI name: + + pci-0000:04:00.0.nffw + + 3) Finally try the card type and media: + + nic_AMDA0099-0001_2x25.nffw + +Netronome's software packages install firmware files under ``/lib/firmware/netronome`` +to support all the Netronome's SmartNICs and different firmware applications. +This is usually done using file names based on SmartNIC type and media and with a +directory per firmware application. Options 1 and 2 for firmware filenames allow +more than one SmartNIC, same type of SmartNIC or different ones, and to upload a +different firmware to each SmartNIC. + + +PF multiport support +-------------------- + +Some NFP cards support several physical ports with just one single PCI device. +The DPDK core is designed with a 1:1 relationship between PCI devices and DPDK +ports, so NFP PMD PF support requires handling the multiport case specifically. +During NFP PF initialization, the PMD will extract the information about the +number of PF ports from the firmware and will create as many DPDK ports as +needed. + +Because the unusual relationship between a single PCI device and several DPDK +ports, there are some limitations when using more than one PF DPDK port: there +is no support for RX interrupts and it is not possible either to use those PF +ports with the device hotplug functionality. + + +PF multiprocess support +----------------------- + +Due to how the driver needs to access the NFP through a CPP interface, which implies +to use specific registers inside the chip, the number of secondary processes with PF +ports is limited to only one. + +This limitation will be solved in future versions but having basic multiprocess support +is important for allowing development and debugging through the PF using a secondary +process which will create a CPP bridge for user space tools accessing the NFP. + + +System configuration +-------------------- + +#. **Enable SR-IOV on the NFP device:** The current NFP PMD supports the PF and + the VFs on a NFP device. However, it is not possible to work with both at the + same time because the VFs require the PF being bound to the NFP PF Linux + netdev driver. Make sure you are working with a kernel with NFP PF support or + get the drivers from the above Github repository and follow the instructions + for building and installing it. + + VFs need to be enabled before they can be used with the PMD. + Before enabling the VFs it is useful to obtain information about the + current NFP PCI device detected by the system: + + .. code-block:: console + + lspci -d19ee: + + Now, for example, configure two virtual functions on a NFP-6xxx device + whose PCI system identity is "0000:03:00.0": + + .. code-block:: console + + echo 2 > /sys/bus/pci/devices/0000:03:00.0/sriov_numvfs + + The result of this command may be shown using lspci again: + + .. code-block:: console + + lspci -d19ee: -k + + Two new PCI devices should appear in the output of the above command. The + -k option shows the device driver, if any, that devices are bound to. + Depending on the modules loaded at this point the new PCI devices may be + bound to nfp_netvf driver. diff --git a/src/spdk/dpdk/doc/guides/nics/null.rst b/src/spdk/dpdk/doc/guides/nics/null.rst new file mode 100644 index 000000000..c68d0d605 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/null.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2020 Intel Corporation. + +NULL Poll Mode Driver +===================== + +NULL PMD is a simple virtual driver mainly for testing. It always returns success for all packets for Rx/Tx. + +On Rx it returns requested number of empty packets (all zero). On Tx it just frees all sent packets. + + +Usage +----- + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 --vdev net_null0 --vdev net_null1 -- -i + + +Runtime Config Options +---------------------- + +- ``copy`` [optional, default disabled] + + It copies data of the packet before Rx/Tx. For Rx it uses another empty dummy mbuf for this. + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 --vdev "net_null0,copy=1" -- -i + +- ``size`` [optional, default=64 bytes] + + Custom packet length value to use.r + If ``copy`` is enabled, this is the length of copy operation. + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 --vdev "net_null0,size=256" -- -i + +- ``no-rx`` [optional, default disabled] + + Makes PMD more like ``/dev/null``. On Rx no packets received, on Tx all packets are freed. + This option can't co-exist with ``copy`` option. diff --git a/src/spdk/dpdk/doc/guides/nics/octeontx.rst b/src/spdk/dpdk/doc/guides/nics/octeontx.rst new file mode 100644 index 000000000..c8655bf37 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/octeontx.rst @@ -0,0 +1,186 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2017 Cavium, Inc + +OCTEON TX Poll Mode driver +========================== + +The OCTEON TX ETHDEV PMD (**librte_pmd_octeontx**) provides poll mode ethdev +driver support for the inbuilt network device found in the **Cavium OCTEON TX** +SoC family as well as their virtual functions (VF) in SR-IOV context. + +More information can be found at `Cavium, Inc Official Website +`_. + +Features +-------- + +Features of the OCTEON TX Ethdev PMD are: + +- Packet type information +- Promiscuous mode +- Port hardware statistics +- Jumbo frames +- Scatter-Gather IO support +- Link state information +- MAC/VLAN filtering +- MTU update +- SR-IOV VF +- Multiple queues for TX +- Lock-free Tx queue +- HW offloaded `ethdev Rx queue` to `eventdev event queue` packet injection + +Supported OCTEON TX SoCs +------------------------ + +- CN83xx + +Unsupported features +-------------------- + +The features supported by the device and not yet supported by this PMD include: + +- Receive Side Scaling (RSS) +- Scattered and gather for TX and RX +- Ingress classification support +- Egress hierarchical scheduling, traffic shaping, and marking + +Prerequisites +------------- + +See :doc:`../platform/octeontx` for setup information. + +Pre-Installation Configuration +------------------------------ + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_OCTEONTX_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_octeontx`` driver. + +Driver compilation and testing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +To compile the OCTEON TX PMD for Linux arm64 gcc target, run the +following ``make`` command: + +.. code-block:: console + + cd + make config T=arm64-thunderx-linux-gcc install + +#. Running testpmd: + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to run testpmd. + + Example output: + + .. code-block:: console + + ./arm64-thunderx-linux-gcc/app/testpmd -c 700 \ + --base-virtaddr=0x100000000000 \ + --mbuf-pool-ops-name="octeontx_fpavf" \ + --vdev='event_octeontx' \ + --vdev='eth_octeontx,nr_port=2' \ + -- --rxq=1 --txq=1 --nb-core=2 \ + --total-num-mbufs=16384 -i + ..... + EAL: Detected 24 lcore(s) + EAL: Probing VFIO support... + EAL: VFIO support initialized + ..... + EAL: PCI device 0000:07:00.1 on NUMA socket 0 + EAL: probe driver: 177d:a04b octeontx_ssovf + ..... + EAL: PCI device 0001:02:00.7 on NUMA socket 0 + EAL: probe driver: 177d:a0dd octeontx_pkivf + ..... + EAL: PCI device 0001:03:01.0 on NUMA socket 0 + EAL: probe driver: 177d:a049 octeontx_pkovf + ..... + PMD: octeontx_probe(): created ethdev eth_octeontx for port 0 + PMD: octeontx_probe(): created ethdev eth_octeontx for port 1 + ..... + Configuring Port 0 (socket 0) + Port 0: 00:0F:B7:11:94:46 + Configuring Port 1 (socket 0) + Port 1: 00:0F:B7:11:94:47 + ..... + Checking link statuses... + Port 0 Link Up - speed 40000 Mbps - full-duplex + Port 1 Link Up - speed 40000 Mbps - full-duplex + Done + testpmd> + + +Initialization +-------------- + +The OCTEON TX ethdev pmd is exposed as a vdev device which consists of a set +of PKI and PKO PCIe VF devices. On EAL initialization, +PKI/PKO PCIe VF devices will be probed and then the vdev device can be created +from the application code, or from the EAL command line based on +the number of probed/bound PKI/PKO PCIe VF device to DPDK by + +* Invoking ``rte_vdev_init("eth_octeontx")`` from the application + +* Using ``--vdev="eth_octeontx"`` in the EAL options, which will call + rte_vdev_init() internally + +Device arguments +~~~~~~~~~~~~~~~~ +Each ethdev port is mapped to a physical port(LMAC), Application can specify +the number of interesting ports with ``nr_ports`` argument. + +Dependency +~~~~~~~~~~ +``eth_octeontx`` pmd is depend on ``event_octeontx`` eventdev device and +``octeontx_fpavf`` external mempool handler. + +Example: + +.. code-block:: console + + ./your_dpdk_application --mbuf-pool-ops-name="octeontx_fpavf" \ + --vdev='event_octeontx' \ + --vdev="eth_octeontx,nr_port=2" + +Limitations +----------- + +``octeontx_fpavf`` external mempool handler dependency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The OCTEON TX SoC family NIC has inbuilt HW assisted external mempool manager. +This driver will only work with ``octeontx_fpavf`` external mempool handler +as it is the most performance effective way for packet allocation and Tx buffer +recycling on OCTEON TX SoC platform. + +CRC stripping +~~~~~~~~~~~~~ + +The OCTEON TX SoC family NICs strip the CRC for every packets coming into the +host interface irrespective of the offload configuration. + +Maximum packet length +~~~~~~~~~~~~~~~~~~~~~ + +The OCTEON TX SoC family NICs support a maximum of a 32K jumbo frame. The value +is fixed and cannot be changed. So, even when the ``rxmode.max_rx_pkt_len`` +member of ``struct rte_eth_conf`` is set to a value lower than 32k, frames +up to 32k bytes can still reach the host interface. + +Maximum mempool size +~~~~~~~~~~~~~~~~~~~~ + +The maximum mempool size supplied to Rx queue setup should be less than 128K. +When running testpmd on OCTEON TX the application can limit the number of mbufs +by using the option ``--total-num-mbufs=131072``. diff --git a/src/spdk/dpdk/doc/guides/nics/octeontx2.rst b/src/spdk/dpdk/doc/guides/nics/octeontx2.rst new file mode 100644 index 000000000..24089ce67 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/octeontx2.rst @@ -0,0 +1,406 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(C) 2019 Marvell International Ltd. + +OCTEON TX2 Poll Mode driver +=========================== + +The OCTEON TX2 ETHDEV PMD (**librte_pmd_octeontx2**) provides poll mode ethdev +driver support for the inbuilt network device found in **Marvell OCTEON TX2** +SoC family as well as for their virtual functions (VF) in SR-IOV context. + +More information can be found at `Marvell Official Website +`_. + +Features +-------- + +Features of the OCTEON TX2 Ethdev PMD are: + +- Packet type information +- Promiscuous mode +- Jumbo frames +- SR-IOV VF +- Lock-free Tx queue +- Multiple queues for TX and RX +- Receiver Side Scaling (RSS) +- MAC/VLAN filtering +- Multicast MAC filtering +- Generic flow API +- Inner and Outer Checksum offload +- VLAN/QinQ stripping and insertion +- Port hardware statistics +- Link state information +- Link flow control +- MTU update +- Scatter-Gather IO support +- Vector Poll mode driver +- Debug utilities - Context dump and error interrupt support +- IEEE1588 timestamping +- HW offloaded `ethdev Rx queue` to `eventdev event queue` packet injection +- Support Rx interrupt +- Inline IPsec processing support +- :ref:`Traffic Management API ` + +Prerequisites +------------- + +See :doc:`../platform/octeontx2` for setup information. + +Compile time Config Options +--------------------------- + +The following options may be modified in the ``config`` file. + +- ``CONFIG_RTE_LIBRTE_OCTEONTX2_PMD`` (default ``y``) + + Toggle compilation of the ``librte_pmd_octeontx2`` driver. + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +To compile the OCTEON TX2 PMD for Linux arm64 gcc, +use arm64-octeontx2-linux-gcc as target. + +#. Running testpmd: + + Follow instructions available in the document + :ref:`compiling and testing a PMD for a NIC ` + to run testpmd. + + Example output: + + .. code-block:: console + + ./build/app/testpmd -c 0x300 -w 0002:02:00.0 -- --portmask=0x1 --nb-cores=1 --port-topology=loop --rxq=1 --txq=1 + EAL: Detected 24 lcore(s) + EAL: Detected 1 NUMA nodes + EAL: Multi-process socket /var/run/dpdk/rte/mp_socket + EAL: No available hugepages reported in hugepages-2048kB + EAL: Probing VFIO support... + EAL: VFIO support initialized + EAL: PCI device 0002:02:00.0 on NUMA socket 0 + EAL: probe driver: 177d:a063 net_octeontx2 + EAL: using IOMMU type 1 (Type 1) + testpmd: create a new mbuf pool : n=267456, size=2176, socket=0 + testpmd: preferred mempool ops selected: octeontx2_npa + Configuring Port 0 (socket 0) + PMD: Port 0: Link Up - speed 40000 Mbps - full-duplex + + Port 0: link state change event + Port 0: 36:10:66:88:7A:57 + Checking link statuses... + Done + No commandline core given, start packet forwarding + io packet forwarding - ports=1 - cores=1 - streams=1 - NUMA support enabled, MP allocation mode: native + Logical Core 9 (socket 0) forwards packets on 1 streams: + RX P=0/Q=0 (socket 0) -> TX P=0/Q=0 (socket 0) peer=02:00:00:00:00:00 + + io packet forwarding packets/burst=32 + nb forwarding cores=1 - nb forwarding ports=1 + port 0: RX queue number: 1 Tx queue number: 1 + Rx offloads=0x0 Tx offloads=0x10000 + RX queue: 0 + RX desc=512 - RX free threshold=0 + RX threshold registers: pthresh=0 hthresh=0 wthresh=0 + RX Offloads=0x0 + TX queue: 0 + TX desc=512 - TX free threshold=0 + TX threshold registers: pthresh=0 hthresh=0 wthresh=0 + TX offloads=0x10000 - TX RS bit threshold=0 + Press enter to exit + +Runtime Config Options +---------------------- + +- ``Rx&Tx scalar mode enable`` (default ``0``) + + Ethdev supports both scalar and vector mode, it may be selected at runtime + using ``scalar_enable`` ``devargs`` parameter. + +- ``RSS reta size`` (default ``64``) + + RSS redirection table size may be configured during runtime using ``reta_size`` + ``devargs`` parameter. + + For example:: + + -w 0002:02:00.0,reta_size=256 + + With the above configuration, reta table of size 256 is populated. + +- ``Flow priority levels`` (default ``3``) + + RTE Flow priority levels can be configured during runtime using + ``flow_max_priority`` ``devargs`` parameter. + + For example:: + + -w 0002:02:00.0,flow_max_priority=10 + + With the above configuration, priority level was set to 10 (0-9). Max + priority level supported is 32. + +- ``Reserve Flow entries`` (default ``8``) + + RTE flow entries can be pre allocated and the size of pre allocation can be + selected runtime using ``flow_prealloc_size`` ``devargs`` parameter. + + For example:: + + -w 0002:02:00.0,flow_prealloc_size=4 + + With the above configuration, pre alloc size was set to 4. Max pre alloc + size supported is 32. + +- ``Max SQB buffer count`` (default ``512``) + + Send queue descriptor buffer count may be limited during runtime using + ``max_sqb_count`` ``devargs`` parameter. + + For example:: + + -w 0002:02:00.0,max_sqb_count=64 + + With the above configuration, each send queue's decscriptor buffer count is + limited to a maximum of 64 buffers. + +- ``Switch header enable`` (default ``none``) + + A port can be configured to a specific switch header type by using + ``switch_header`` ``devargs`` parameter. + + For example:: + + -w 0002:02:00.0,switch_header="higig2" + + With the above configuration, higig2 will be enabled on that port and the + traffic on this port should be higig2 traffic only. Supported switch header + types are "higig2", "dsa" and "chlen90b". + +- ``RSS tag as XOR`` (default ``0``) + + C0 HW revision onward, The HW gives an option to configure the RSS adder as + + * ``rss_adder<7:0> = flow_tag<7:0> ^ flow_tag<15:8> ^ flow_tag<23:16> ^ flow_tag<31:24>`` + + * ``rss_adder<7:0> = flow_tag<7:0>`` + + Latter one aligns with standard NIC behavior vs former one is a legacy + RSS adder scheme used in OCTEON TX2 products. + + By default, the driver runs in the latter mode from C0 HW revision onward. + Setting this flag to 1 to select the legacy mode. + + For example to select the legacy mode(RSS tag adder as XOR):: + + -w 0002:02:00.0,tag_as_xor=1 + +- ``Max SPI for inbound inline IPsec`` (default ``1``) + + Max SPI supported for inbound inline IPsec processing can be specified by + ``ipsec_in_max_spi`` ``devargs`` parameter. + + For example:: + + -w 0002:02:00.0,ipsec_in_max_spi=128 + + With the above configuration, application can enable inline IPsec processing + on 128 SAs (SPI 0-127). + +.. note:: + + Above devarg parameters are configurable per device, user needs to pass the + parameters to all the PCIe devices if application requires to configure on + all the ethdev ports. + +- ``Lock NPA contexts in NDC`` + + Lock NPA aura and pool contexts in NDC cache. + The device args take hexadecimal bitmask where each bit represent the + corresponding aura/pool id. + + For example:: + + -w 0002:02:00.0,npa_lock_mask=0xf + +.. _otx2_tmapi: + +Traffic Management API +---------------------- + +OCTEON TX2 PMD supports generic DPDK Traffic Management API which allows to +configure the following features: + +#. Hierarchical scheduling +#. Single rate - Two color, Two rate - Three color shaping + +Both DWRR and Static Priority(SP) hierarchial scheduling is supported. + +Every parent can have atmost 10 SP Children and unlimited DWRR children. + +Both PF & VF supports traffic management API with PF supporting 6 levels +and VF supporting 5 levels of topology. + +Limitations +----------- + +``mempool_octeontx2`` external mempool handler dependency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The OCTEON TX2 SoC family NIC has inbuilt HW assisted external mempool manager. +``net_octeontx2`` pmd only works with ``mempool_octeontx2`` mempool handler +as it is performance wise most effective way for packet allocation and Tx buffer +recycling on OCTEON TX2 SoC platform. + +CRC stripping +~~~~~~~~~~~~~ + +The OCTEON TX2 SoC family NICs strip the CRC for every packet being received by +the host interface irrespective of the offload configuration. + +Multicast MAC filtering +~~~~~~~~~~~~~~~~~~~~~~~ + +``net_octeontx2`` pmd supports multicast mac filtering feature only on physical +function devices. + +SDP interface support +~~~~~~~~~~~~~~~~~~~~~ +OCTEON TX2 SDP interface support is limited to PF device, No VF support. + +Inline Protocol Processing +~~~~~~~~~~~~~~~~~~~~~~~~~~ +``net_octeontx2`` pmd doesn't support the following features for packets to be +inline protocol processed. +- TSO offload +- VLAN/QinQ offload +- Fragmentation + +Debugging Options +----------------- + +.. _table_octeontx2_ethdev_debug_options: + +.. table:: OCTEON TX2 ethdev debug options + + +---+------------+-------------------------------------------------------+ + | # | Component | EAL log command | + +===+============+=======================================================+ + | 1 | NIX | --log-level='pmd\.net.octeontx2,8' | + +---+------------+-------------------------------------------------------+ + | 2 | NPC | --log-level='pmd\.net.octeontx2\.flow,8' | + +---+------------+-------------------------------------------------------+ + +RTE Flow Support +---------------- + +The OCTEON TX2 SoC family NIC has support for the following patterns and +actions. + +Patterns: + +.. _table_octeontx2_supported_flow_item_types: + +.. table:: Item types + + +----+--------------------------------+ + | # | Pattern Type | + +====+================================+ + | 1 | RTE_FLOW_ITEM_TYPE_ETH | + +----+--------------------------------+ + | 2 | RTE_FLOW_ITEM_TYPE_VLAN | + +----+--------------------------------+ + | 3 | RTE_FLOW_ITEM_TYPE_E_TAG | + +----+--------------------------------+ + | 4 | RTE_FLOW_ITEM_TYPE_IPV4 | + +----+--------------------------------+ + | 5 | RTE_FLOW_ITEM_TYPE_IPV6 | + +----+--------------------------------+ + | 6 | RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4| + +----+--------------------------------+ + | 7 | RTE_FLOW_ITEM_TYPE_MPLS | + +----+--------------------------------+ + | 8 | RTE_FLOW_ITEM_TYPE_ICMP | + +----+--------------------------------+ + | 9 | RTE_FLOW_ITEM_TYPE_UDP | + +----+--------------------------------+ + | 10 | RTE_FLOW_ITEM_TYPE_TCP | + +----+--------------------------------+ + | 11 | RTE_FLOW_ITEM_TYPE_SCTP | + +----+--------------------------------+ + | 12 | RTE_FLOW_ITEM_TYPE_ESP | + +----+--------------------------------+ + | 13 | RTE_FLOW_ITEM_TYPE_GRE | + +----+--------------------------------+ + | 14 | RTE_FLOW_ITEM_TYPE_NVGRE | + +----+--------------------------------+ + | 15 | RTE_FLOW_ITEM_TYPE_VXLAN | + +----+--------------------------------+ + | 16 | RTE_FLOW_ITEM_TYPE_GTPC | + +----+--------------------------------+ + | 17 | RTE_FLOW_ITEM_TYPE_GTPU | + +----+--------------------------------+ + | 18 | RTE_FLOW_ITEM_TYPE_GENEVE | + +----+--------------------------------+ + | 19 | RTE_FLOW_ITEM_TYPE_VXLAN_GPE | + +----+--------------------------------+ + | 20 | RTE_FLOW_ITEM_TYPE_IPV6_EXT | + +----+--------------------------------+ + | 21 | RTE_FLOW_ITEM_TYPE_VOID | + +----+--------------------------------+ + | 22 | RTE_FLOW_ITEM_TYPE_ANY | + +----+--------------------------------+ + | 23 | RTE_FLOW_ITEM_TYPE_GRE_KEY | + +----+--------------------------------+ + | 24 | RTE_FLOW_ITEM_TYPE_HIGIG2 | + +----+--------------------------------+ + +.. note:: + + ``RTE_FLOW_ITEM_TYPE_GRE_KEY`` works only when checksum and routing + bits in the GRE header are equal to 0. + +Actions: + +.. _table_octeontx2_supported_ingress_action_types: + +.. table:: Ingress action types + + +----+--------------------------------+ + | # | Action Type | + +====+================================+ + | 1 | RTE_FLOW_ACTION_TYPE_VOID | + +----+--------------------------------+ + | 2 | RTE_FLOW_ACTION_TYPE_MARK | + +----+--------------------------------+ + | 3 | RTE_FLOW_ACTION_TYPE_FLAG | + +----+--------------------------------+ + | 4 | RTE_FLOW_ACTION_TYPE_COUNT | + +----+--------------------------------+ + | 5 | RTE_FLOW_ACTION_TYPE_DROP | + +----+--------------------------------+ + | 6 | RTE_FLOW_ACTION_TYPE_QUEUE | + +----+--------------------------------+ + | 7 | RTE_FLOW_ACTION_TYPE_RSS | + +----+--------------------------------+ + | 8 | RTE_FLOW_ACTION_TYPE_SECURITY | + +----+--------------------------------+ + | 9 | RTE_FLOW_ACTION_TYPE_PF | + +----+--------------------------------+ + | 10 | RTE_FLOW_ACTION_TYPE_VF | + +----+--------------------------------+ + +.. _table_octeontx2_supported_egress_action_types: + +.. table:: Egress action types + + +----+--------------------------------+ + | # | Action Type | + +====+================================+ + | 1 | RTE_FLOW_ACTION_TYPE_COUNT | + +----+--------------------------------+ + | 2 | RTE_FLOW_ACTION_TYPE_DROP | + +----+--------------------------------+ diff --git a/src/spdk/dpdk/doc/guides/nics/overview.rst b/src/spdk/dpdk/doc/guides/nics/overview.rst new file mode 100644 index 000000000..20cd52b09 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/overview.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2016 6WIND S.A. + +Overview of Networking Drivers +============================== + +The networking drivers may be classified in two categories: + +- physical for real devices +- virtual for emulated devices + +Some physical devices may be shaped through a virtual layer as for +SR-IOV. +The interface seen in the virtual environment is a VF (Virtual Function). + +The ethdev layer exposes an API to use the networking functions +of these devices. +The bottom half part of ethdev is implemented by the drivers. +Thus some features may not be implemented. + +There are more differences between drivers regarding some internal properties, +portability or even documentation availability. +Most of these differences are summarized below. + +More details about features can be found in :doc:`features`. + +.. _table_net_pmd_features: + +.. include:: overview_table.txt + +.. Note:: + + Features marked with "P" are partially supported. Refer to the appropriate + NIC guide in the following sections for details. diff --git a/src/spdk/dpdk/doc/guides/nics/pcap_ring.rst b/src/spdk/dpdk/doc/guides/nics/pcap_ring.rst new file mode 100644 index 000000000..cf230ae40 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/pcap_ring.rst @@ -0,0 +1,322 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2015 Intel Corporation. + +Libpcap and Ring Based Poll Mode Drivers +======================================== + +In addition to Poll Mode Drivers (PMDs) for physical and virtual hardware, +the DPDK also includes pure-software PMDs, two of these drivers are: + +* A libpcap -based PMD (librte_pmd_pcap) that reads and writes packets using libpcap, + - both from files on disk, as well as from physical NIC devices using standard Linux kernel drivers. + +* A ring-based PMD (librte_pmd_ring) that allows a set of software FIFOs (that is, rte_ring) + to be accessed using the PMD APIs, as though they were physical NICs. + +.. note:: + + The libpcap -based PMD is disabled by default in the build configuration files, + owing to an external dependency on the libpcap development files which must be installed on the board. + Once the libpcap development files are installed, + the library can be enabled by setting CONFIG_RTE_LIBRTE_PMD_PCAP=y and recompiling the DPDK. + +Using the Drivers from the EAL Command Line +------------------------------------------- + +For ease of use, the DPDK EAL also has been extended to allow pseudo-Ethernet devices, +using one or more of these drivers, +to be created at application startup time during EAL initialization. + +To do so, the --vdev= parameter must be passed to the EAL. +This takes take options to allow ring and pcap-based Ethernet to be allocated and used transparently by the application. +This can be used, for example, for testing on a virtual machine where there are no Ethernet ports. + +Libpcap-based PMD +~~~~~~~~~~~~~~~~~ + +Pcap-based devices can be created using the virtual device --vdev option. +The device name must start with the net_pcap prefix followed by numbers or letters. +The name is unique for each device. Each device can have multiple stream options and multiple devices can be used. +Multiple device definitions can be arranged using multiple --vdev. +Device name and stream options must be separated by commas as shown below: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,stream_opt0=..,stream_opt1=..' \ + --vdev='net_pcap1,stream_opt0=..' + +Device Streams +^^^^^^^^^^^^^^ + +Multiple ways of stream definitions can be assessed and combined as long as the following two rules are respected: + +* A device is provided with two different streams - reception and transmission. + +* A device is provided with one network interface name used for reading and writing packets. + +The different stream types are: + +* rx_pcap: Defines a reception stream based on a pcap file. + The driver reads each packet within the given pcap file as if it was receiving it from the wire. + The value is a path to a valid pcap file. + + rx_pcap=/path/to/file.pcap + +* tx_pcap: Defines a transmission stream based on a pcap file. + The driver writes each received packet to the given pcap file. + The value is a path to a pcap file. + The file is overwritten if it already exists and it is created if it does not. + + tx_pcap=/path/to/file.pcap + +* rx_iface: Defines a reception stream based on a network interface name. + The driver reads packets from the given interface using the Linux kernel driver for that interface. + The driver captures both the incoming and outgoing packets on that interface. + The value is an interface name. + + rx_iface=eth0 + +* rx_iface_in: Defines a reception stream based on a network interface name. + The driver reads packets from the given interface using the Linux kernel driver for that interface. + The driver captures only the incoming packets on that interface. + The value is an interface name. + + rx_iface_in=eth0 + +* tx_iface: Defines a transmission stream based on a network interface name. + The driver sends packets to the given interface using the Linux kernel driver for that interface. + The value is an interface name. + + tx_iface=eth0 + +* iface: Defines a device mapping a network interface. + The driver both reads and writes packets from and to the given interface. + The value is an interface name. + + iface=eth0 + +Runtime Config Options +^^^^^^^^^^^^^^^^^^^^^^ + +- Use PCAP interface physical MAC + + In case ``iface=`` configuration is set, user may want to use the selected interface's physical MAC + address. This can be done with a ``devarg`` ``phy_mac``, for example:: + + --vdev 'net_pcap0,iface=eth0,phy_mac=1' + +- Use the RX PCAP file to infinitely receive packets + + In case ``rx_pcap=`` configuration is set, user may want to use the selected PCAP file for rudimental + performance testing. This can be done with a ``devarg`` ``infinite_rx``, for example:: + + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,infinite_rx=1' + + When this mode is used, it is recommended to drop all packets on transmit by not providing a tx_pcap or tx_iface. + + This option is device wide, so all queues on a device will either have this enabled or disabled. + This option should only be provided once per device. + +- Drop all packets on transmit + + The user may want to drop all packets on tx for a device. This can be done by not providing a tx_pcap or tx_iface, for example:: + + --vdev 'net_pcap0,rx_pcap=file_rx.pcap' + + In this case, one tx drop queue is created for each rxq on that device. + + - Receive no packets on Rx + + The user may want to run without receiving any packets on Rx. This can be done by not providing a rx_pcap or rx_iface, for example:: + + --vdev 'net_pcap0,tx_pcap=file_tx.pcap' + +In this case, one dummy rx queue is created for each tx queue argument passed + +Examples of Usage +^^^^^^^^^^^^^^^^^ + +Read packets from one pcap file and write them to another: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_pcap=file_tx.pcap' \ + -- --port-topology=chained + +Read packets from a network interface and write them to a pcap file: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,rx_iface=eth0,tx_pcap=file_tx.pcap' \ + -- --port-topology=chained + +Read packets from a pcap file and write them to a network interface: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_iface=eth1' \ + -- --port-topology=chained + +Forward packets through two network interfaces: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,iface=eth0' --vdev='net_pcap1;iface=eth1' + +Enable 2 tx queues on a network interface: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,rx_iface=eth1,tx_iface=eth1,tx_iface=eth1' \ + -- --txq 2 + +Read only incoming packets from a network interface and write them back to the same network interface: + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,rx_iface_in=eth1,tx_iface=eth1' + +Using libpcap-based PMD with the testpmd Application +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One of the first things that testpmd does before starting to forward packets is to flush the RX streams +by reading the first 512 packets on every RX stream and discarding them. +When using a libpcap-based PMD this behavior can be turned off using the following command line option: + +.. code-block:: console + + --no-flush-rx + +It is also available in the runtime command line: + +.. code-block:: console + + set flush_rx on/off + +It is useful for the case where the rx_pcap is being used and no packets are meant to be discarded. +Otherwise, the first 512 packets from the input pcap file will be discarded by the RX flushing operation. + +.. code-block:: console + + $RTE_TARGET/app/testpmd -l 0-3 -n 4 \ + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_pcap=file_tx.pcap' \ + -- --port-topology=chained --no-flush-rx + +.. note:: + + The network interface provided to the PMD should be up. The PMD will return + an error if interface is down, and the PMD itself won't change the status + of the external network interface. + + +Rings-based PMD +~~~~~~~~~~~~~~~ + +To run a DPDK application on a machine without any Ethernet devices, a pair of ring-based rte_ethdevs can be used as below. +The device names passed to the --vdev option must start with net_ring and take no additional parameters. +Multiple devices may be specified, separated by commas. + +.. code-block:: console + + ./testpmd -l 1-3 -n 4 --vdev=net_ring0 --vdev=net_ring1 -- -i + EAL: Detected lcore 1 as core 1 on socket 0 + ... + + Interactive-mode selected + Configuring Port 0 (socket 0) + Configuring Port 1 (socket 0) + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + + testpmd> start tx_first + io packet forwarding - CRC stripping disabled - packets/burst=16 + nb forwarding cores=1 - nb forwarding ports=2 + RX queues=1 - RX desc=128 - RX free threshold=0 + RX threshold registers: pthresh=8 hthresh=8 wthresh=4 + TX queues=1 - TX desc=512 - TX free threshold=0 + TX threshold registers: pthresh=36 hthresh=0 wthresh=0 + TX RS bit threshold=0 - TXQ flags=0x0 + + testpmd> stop + Telling cores to stop... + Waiting for lcores to finish... + +.. image:: img/forward_stats.* + +.. code-block:: console + + +++++++++++++++ Accumulated forward statistics for allports++++++++++ + RX-packets: 462384736 RX-dropped: 0 RX-total: 462384736 + TX-packets: 462384768 TX-dropped: 0 TX-total: 462384768 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ + + Done. + + +Using the Poll Mode Driver from an Application +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Both drivers can provide similar APIs to allow the user to create a PMD, that is, +rte_ethdev structure, instances at run-time in the end-application, +for example, using rte_eth_from_rings() or rte_eth_from_pcaps() APIs. +For the rings-based PMD, this functionality could be used, for example, +to allow data exchange between cores using rings to be done in exactly the +same way as sending or receiving packets from an Ethernet device. +For the libpcap-based PMD, it allows an application to open one or more pcap files +and use these as a source of packet input to the application. + +Usage Examples +^^^^^^^^^^^^^^ + +To create two pseudo-Ethernet ports where all traffic sent to a port is looped back +for reception on the same port (error handling omitted for clarity): + +.. code-block:: c + + #define RING_SIZE 256 + #define NUM_RINGS 2 + #define SOCKET0 0 + + struct rte_ring *ring[NUM_RINGS]; + int port0, port1; + + ring[0] = rte_ring_create("R0", RING_SIZE, SOCKET0, RING_F_SP_ENQ|RING_F_SC_DEQ); + ring[1] = rte_ring_create("R1", RING_SIZE, SOCKET0, RING_F_SP_ENQ|RING_F_SC_DEQ); + + /* create two ethdev's */ + + port0 = rte_eth_from_rings("net_ring0", ring, NUM_RINGS, ring, NUM_RINGS, SOCKET0); + port1 = rte_eth_from_rings("net_ring1", ring, NUM_RINGS, ring, NUM_RINGS, SOCKET0); + + +To create two pseudo-Ethernet ports where the traffic is switched between them, +that is, traffic sent to port 0 is read back from port 1 and vice-versa, +the final two lines could be changed as below: + +.. code-block:: c + + port0 = rte_eth_from_rings("net_ring0", &ring[0], 1, &ring[1], 1, SOCKET0); + port1 = rte_eth_from_rings("net_ring1", &ring[1], 1, &ring[0], 1, SOCKET0); + +This type of configuration could be useful in a pipeline model, for example, +where one may want to have inter-core communication using pseudo Ethernet devices rather than raw rings, +for reasons of API consistency. + +Enqueuing and dequeuing items from an rte_ring using the rings-based PMD may be slower than using the native rings API. +This is because DPDK Ethernet drivers make use of function pointers to call the appropriate enqueue or dequeue functions, +while the rte_ring specific functions are direct function calls in the code and are often inlined by the compiler. + + Once an ethdev has been created, for either a ring or a pcap-based PMD, + it should be configured and started in the same way as a regular Ethernet device, that is, + by calling rte_eth_dev_configure() to set the number of receive and transmit queues, + then calling rte_eth_rx_queue_setup() / tx_queue_setup() for each of those queues and + finally calling rte_eth_dev_start() to allow transmission and reception of packets to begin. diff --git a/src/spdk/dpdk/doc/guides/nics/pfe.rst b/src/spdk/dpdk/doc/guides/nics/pfe.rst new file mode 100644 index 000000000..c1b4a3e47 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/pfe.rst @@ -0,0 +1,180 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright 2019 NXP + +PFE Poll Mode Driver +====================== + +The PFE NIC PMD (**librte_pmd_pfe**) provides poll mode driver +support for the inbuilt NIC found in the **NXP LS1012** SoC. + +More information can be found at `NXP Official Website +`_. + +PFE +--- + +This section provides an overview of the NXP PFE +and how it is integrated into the DPDK. + +Contents summary + +- PFE overview +- PFE features +- Supported PFE SoCs +- Prerequisites +- Driver compilation and testing +- Limitations + +PFE Overview +~~~~~~~~~~~~ + +PFE is a hardware programmable packet forwarding engine to provide +high performance Ethernet interfaces. The diagram below shows a +system level overview of PFE: + +.. code-block:: console + + ====================================================+=============== + US +-----------------------------------------+ | Kernel Space + | | | + | PFE Ethernet Driver | | + +-----------------------------------------+ | + ^ | ^ | | + PFE RXQ| |TXQ RXQ| |TXQ | + PMD | | | | | + | v | v | +----------+ + +---------+ +----------+ | | pfe.ko | + | net_pfe0| | net_pfe1 | | +----------+ + +---------+ +----------+ | + ^ | ^ | | + TXQ| |RXQ TXQ| |RXQ | + | | | | | + | v | v | + +------------------------+ | + | | | + | PFE HIF driver | | + +------------------------+ | + ^ | | + RX | TX | | + RING| RING| | + | v | + +--------------+ | + | | | + ==================| HIF |==================+=============== + +-----------+ +--------------+ + | | | | HW + | PFE +--------------+ | + | +-----+ +-----+ | + | | MAC | | MAC | | + | | | | | | + +-------+-----+----------------+-----+----+ + | PHY | | PHY | + +-----+ +-----+ + + +The HIF, PFE, MAC and PHY are the hardware blocks, the pfe.ko is a kernel +module, the PFE HIF driver and the PFE ethernet driver combined represent +as DPDK PFE poll mode driver are running in the userspace. + +The PFE hardware supports one HIF (host interface) RX ring and one TX ring +to send and receive packets through packet forwarding engine. Both network +interface traffic is multiplexed and send over HIF queue. + +net_pfe0 and net_pfe1 are logical ethernet interfaces, created by HIF client +driver. HIF driver is responsible for send and receive packets between +host interface and these logical interfaces. PFE ethernet driver is a +hardware independent and register with the HIF client driver to transmit and +receive packets from HIF via logical interfaces. + +pfe.ko is required for PHY initialisation and also responsible for creating +the character device "pfe_us_cdev" which will be used for interacting with +the kernel layer for link status. + +PFE Features +~~~~~~~~~~~~ + +- L3/L4 checksum offload +- Packet type parsing +- Basic stats +- MTU update +- Promiscuous mode +- Allmulticast mode +- Link status +- ARMv8 + +Supported PFE SoCs +~~~~~~~~~~~~~~~~~~ + +- LS1012 + +Prerequisites +~~~~~~~~~~~~~ + +Below are some pre-requisites for executing PFE PMD on a PFE +compatible board: + +1. **ARM 64 Tool Chain** + + For example, the `*aarch64* Linaro Toolchain `_. + +2. **Linux Kernel** + + It can be obtained from `NXP's Github hosting `_. + +3. **Rootfile system** + + Any *aarch64* supporting filesystem can be used. For example, + Ubuntu 16.04 LTS (Xenial) or 18.04 (Bionic) userland which can be obtained + from `here `_. + +4. The ethernet device will be registered as virtual device, so pfe has dependency on + **rte_bus_vdev** library and it is mandatory to use `--vdev` with value `net_pfe` to + run DPDK application. + +The following dependencies are not part of DPDK and must be installed +separately: + +- **NXP Linux LSDK** + + NXP Layerscape software development kit (LSDK) includes support for family + of QorIQ® ARM-Architecture-based system on chip (SoC) processors + and corresponding boards. + + It includes the Linux board support packages (BSPs) for NXP SoCs, + a fully operational tool chain, kernel and board specific modules. + + LSDK and related information can be obtained from: `LSDK `_ + +- **pfe kernel module** + + pfe kernel module can be obtained from NXP Layerscape software development kit at + location `/lib/modules//kernel/drivers/staging/fsl_ppfe` in rootfs. + Module should be loaded using below command: + + .. code-block:: console + + insmod pfe.ko us=1 + + +Driver compilation and testing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Follow instructions available in the document +:ref:`compiling and testing a PMD for a NIC ` +to launch **testpmd** + +Additionally, PFE driver needs `--vdev` as an input with value `net_pfe` +to execute DPDK application. There is an optional parameter `intf` available +to specify port ID. PFE driver supports only two interfaces, so valid values +for `intf` are 0 and 1. +see the command below: + + .. code-block:: console + + --vdev="net_pfe0,intf=0" --vdev="net_pfe1,intf=1" -- ... + + +Limitations +~~~~~~~~~~~ + +- Multi buffer pool cannot be supported. diff --git a/src/spdk/dpdk/doc/guides/nics/qede.rst b/src/spdk/dpdk/doc/guides/nics/qede.rst new file mode 100644 index 000000000..5b2f86895 --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/qede.rst @@ -0,0 +1,333 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2016 QLogic Corporation + Copyright(c) 2017 Cavium, Inc + +QEDE Poll Mode Driver +====================== + +The QEDE poll mode driver library (**librte_pmd_qede**) implements support +for **QLogic FastLinQ QL4xxxx 10G/25G/40G/50G/100G Intelligent Ethernet Adapters (IEA) and Converged Network Adapters (CNA)** family of adapters as well as SR-IOV virtual functions (VF). It is supported on +several standard Linux distros like RHEL, SLES, Ubuntu etc. +It is compile-tested under FreeBSD OS. + +More information can be found at `QLogic Corporation's Website +`_. + +Supported Features +------------------ + +- Unicast/Multicast filtering +- Promiscuous mode +- Allmulti mode +- Port hardware statistics +- Jumbo frames +- Multiple MAC address +- MTU change +- Default pause flow control +- Multiprocess aware +- Scatter-Gather +- Multiple Rx/Tx queues +- RSS (with RETA/hash table/key) +- TSS +- Stateless checksum offloads (IPv4/IPv6/TCP/UDP) +- LRO/TSO +- VLAN offload - Filtering and stripping +- N-tuple filter and flow director (limited support) +- NPAR (NIC Partitioning) +- SR-IOV VF +- GRE Tunneling offload +- GENEVE Tunneling offload +- VXLAN Tunneling offload +- MPLSoUDP Tx Tunneling offload +- Generic flow API + +Non-supported Features +---------------------- + +- SR-IOV PF + +Co-existence considerations +--------------------------- + +- QLogic FastLinQ QL4xxxx CNAs support Ethernet, RDMA, iSCSI and FCoE + functionalities. These functionalities are supported using + QLogic Linux kernel drivers qed, qede, qedr, qedi and qedf. DPDK is + supported on these adapters using qede PMD. + +- When SR-IOV is not enabled on the adapter, + QLogic Linux kernel drivers (qed, qede, qedr, qedi and qedf) and qede + PMD can’t be attached to different PFs on a given QLogic FastLinQ + QL4xxx adapter. + A given adapter needs to be completely used by DPDK or Linux drivers + Before binding DPDK driver to one or more PFs on the adapter, + please make sure to unbind Linux drivers from all PFs of the adapter. + If there are multiple adapters on the system, one or more adapters + can be used by DPDK driver completely and other adapters can be used + by Linux drivers completely. + +- When SR-IOV is enabled on the adapter, + Linux kernel drivers (qed, qede, qedr, qedi and qedf) can be bound + to the PFs of a given adapter and either qede PMD or Linux drivers + (qed and qede) can be bound to the VFs of the adapter. + +- For sharing an adapter between DPDK and Linux drivers, SRIOV needs + to be enabled. Bind all the PFs to Linux Drivers(qed/qede). Create + a VF on PFs where DPDK is desired and bind these VFs to qede_pmd. + Binding of PFs simultaneously to DPDK and Linux drivers on a given + adapter is not supported. + +Supported QLogic Adapters +------------------------- + +- QLogic FastLinQ QL4xxxx 10G/25G/40G/50G/100G Intelligent Ethernet Adapters (IEA) and Converged Network Adapters (CNA) + +Prerequisites +------------- + +- Requires storm firmware version **8.40.33.0**. Firmware may be available + inbox in certain newer Linux distros under the standard directory + ``E.g. /lib/firmware/qed/qed_init_values-8.40.33.0.bin``. + If the required firmware files are not available then download it from + `linux-firmware git repository `_. + +- Requires the NIC be updated minimally with **8.30.x.x** Management firmware(MFW) version supported for that NIC. + It is highly recommended that the NIC be updated with the latest available management firmware version to get latest feature set. + Management Firmware and Firmware Upgrade Utility for Cavium FastLinQ(r) branded adapters can be downloaded from + `Driver Download Center `_. + For downloading Firmware Upgrade Utility, select NIC category, model and Linux distro. + To update the management firmware, refer to the instructions in the Firmware Upgrade Utility Readme document. + For OEM branded adapters please follow the instruction provided by the OEM to update the Management Firmware on the NIC. + +- SR-IOV requires Linux PF driver version **8.20.x.x** or higher. + If the required PF driver is not available then download it from + `QLogic Driver Download Center `_. + For downloading PF driver, select adapter category, model and Linux distro. + +Performance note +~~~~~~~~~~~~~~~~ + +- For better performance, it is recommended to use 4K or higher RX/TX rings. + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``.config`` file. Please note that +enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_QEDE_PMD`` (default **y**) + + Toggle compilation of QEDE PMD driver. + +- ``CONFIG_RTE_LIBRTE_QEDE_DEBUG_TX`` (default **n**) + + Toggle display of transmit fast path run-time messages. + +- ``CONFIG_RTE_LIBRTE_QEDE_DEBUG_RX`` (default **n**) + + Toggle display of receive fast path run-time messages. + +- ``CONFIG_RTE_LIBRTE_QEDE_FW`` (default **""**) + + Gives absolute path of firmware file. + ``Eg: "/lib/firmware/qed/qed_init_values-8.40.33.0.bin"`` + Empty string indicates driver will pick up the firmware file + from the default location /lib/firmware/qed. + CAUTION this option is more for custom firmware, it is not + recommended for use under normal condition. + +Config notes +~~~~~~~~~~~~ + +When there are multiple adapters and/or large number of Rx/Tx queues +configured on the adapters, the default (2560) number of memzone +descriptors may not be enough. Please increase the number of memzone +descriptors to a higher number as needed. When sufficient number of +memzone descriptors are not configured, user can potentially run into +following error. + + .. code-block:: console + + EAL: memzone_reserve_aligned_thread_unsafe(): No more room in config + +Driver compilation and testing +------------------------------ + +Refer to the document :ref:`compiling and testing a PMD for a NIC ` +for details. + +RTE Flow Support +---------------- + +QLogic FastLinQ QL4xxxx NICs has support for the following patterns and +actions. + +Patterns: + +.. _table_qede_supported_flow_item_types: + +.. table:: Item types + + +----+--------------------------------+ + | # | Pattern Type | + +====+================================+ + | 1 | RTE_FLOW_ITEM_TYPE_IPV4 | + +----+--------------------------------+ + | 2 | RTE_FLOW_ITEM_TYPE_IPV6 | + +----+--------------------------------+ + | 3 | RTE_FLOW_ITEM_TYPE_UDP | + +----+--------------------------------+ + | 4 | RTE_FLOW_ITEM_TYPE_TCP | + +----+--------------------------------+ + +Actions: + +.. _table_qede_supported_ingress_action_types: + +.. table:: Ingress action types + + +----+--------------------------------+ + | # | Action Type | + +====+================================+ + | 1 | RTE_FLOW_ACTION_TYPE_QUEUE | + +----+--------------------------------+ + | 2 | RTE_FLOW_ACTION_TYPE_DROP | + +----+--------------------------------+ + +SR-IOV: Prerequisites and Sample Application Notes +-------------------------------------------------- + +This section provides instructions to configure SR-IOV with Linux OS. + +**Note**: librte_pmd_qede will be used to bind to SR-IOV VF device and Linux native kernel driver (qede) will function as SR-IOV PF driver. Requires PF driver to be 8.20.x.x or higher. + +#. Verify SR-IOV and ARI capability is enabled on the adapter using ``lspci``: + + .. code-block:: console + + lspci -s -vvv + + Example output: + + .. code-block:: console + + [...] + Capabilities: [1b8 v1] Alternative Routing-ID Interpretation (ARI) + [...] + Capabilities: [1c0 v1] Single Root I/O Virtualization (SR-IOV) + [...] + Kernel driver in use: igb_uio + +#. Load the kernel module: + + .. code-block:: console + + modprobe qede + + Example output: + + .. code-block:: console + + systemd-udevd[4848]: renamed network interface eth0 to ens5f0 + systemd-udevd[4848]: renamed network interface eth1 to ens5f1 + +#. Bring up the PF ports: + + .. code-block:: console + + ifconfig ens5f0 up + ifconfig ens5f1 up + +#. Create VF device(s): + + Echo the number of VFs to be created into ``"sriov_numvfs"`` sysfs entry + of the parent PF. + + Example output: + + .. code-block:: console + + echo 2 > /sys/devices/pci0000:00/0000:00:03.0/0000:81:00.0/sriov_numvfs + + +#. Assign VF MAC address: + + Assign MAC address to the VF using iproute2 utility. The syntax is:: + + ip link set vf mac + + Example output: + + .. code-block:: console + + ip link set ens5f0 vf 0 mac 52:54:00:2f:9d:e8 + + +#. PCI Passthrough: + + The VF devices may be passed through to the guest VM using ``virt-manager`` or + ``virsh``. QEDE PMD should be used to bind the VF devices in the guest VM + using the instructions from Driver compilation and testing section above. + + +#. Running testpmd + (Supply ``--log-level="pmd.net.qede.driver:info`` to view informational messages): + + Refer to the document + :ref:`compiling and testing a PMD for a NIC ` to run + ``testpmd`` application. + + Example output: + + .. code-block:: console + + testpmd -l 0,4-11 -n 4 -- -i --nb-cores=8 --portmask=0xf --rxd=4096 \ + --txd=4096 --txfreet=4068 --enable-rx-cksum --rxq=4 --txq=4 \ + --rss-ip --rss-udp + + [...] + + EAL: PCI device 0000:84:00.0 on NUMA socket 1 + EAL: probe driver: 1077:1634 rte_qede_pmd + EAL: Not managed by a supported kernel driver, skipped + EAL: PCI device 0000:84:00.1 on NUMA socket 1 + EAL: probe driver: 1077:1634 rte_qede_pmd + EAL: Not managed by a supported kernel driver, skipped + EAL: PCI device 0000:88:00.0 on NUMA socket 1 + EAL: probe driver: 1077:1656 rte_qede_pmd + EAL: PCI memory mapped at 0x7f738b200000 + EAL: PCI memory mapped at 0x7f738b280000 + EAL: PCI memory mapped at 0x7f738b300000 + PMD: Chip details : BB1 + PMD: Driver version : QEDE PMD 8.7.9.0_1.0.0 + PMD: Firmware version : 8.7.7.0 + PMD: Management firmware version : 8.7.8.0 + PMD: Firmware file : /lib/firmware/qed/qed_init_values_zipped-8.7.7.0.bin + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_common_dev_init:macaddr \ + 00:0e:1e:d2:09:9c + [...] + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_tx_queue_setup:txq 0 num_desc 4096 \ + tx_free_thresh 4068 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_tx_queue_setup:txq 1 num_desc 4096 \ + tx_free_thresh 4068 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_tx_queue_setup:txq 2 num_desc 4096 \ + tx_free_thresh 4068 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_tx_queue_setup:txq 3 num_desc 4096 \ + tx_free_thresh 4068 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_rx_queue_setup:rxq 0 num_desc 4096 \ + rx_buf_size=2148 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_rx_queue_setup:rxq 1 num_desc 4096 \ + rx_buf_size=2148 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_rx_queue_setup:rxq 2 num_desc 4096 \ + rx_buf_size=2148 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_rx_queue_setup:rxq 3 num_desc 4096 \ + rx_buf_size=2148 socket 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_dev_start:port 0 + [QEDE PMD: (84:00.0:dpdk-port-0)]qede_dev_start:link status: down + [...] + Checking link statuses... + Port 0 Link Up - speed 25000 Mbps - full-duplex + Port 1 Link Up - speed 25000 Mbps - full-duplex + Port 2 Link Up - speed 25000 Mbps - full-duplex + Port 3 Link Up - speed 25000 Mbps - full-duplex + Done + testpmd> diff --git a/src/spdk/dpdk/doc/guides/nics/sfc_efx.rst b/src/spdk/dpdk/doc/guides/nics/sfc_efx.rst new file mode 100644 index 000000000..be1c2fe1d --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/sfc_efx.rst @@ -0,0 +1,404 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2019-2020 Xilinx, Inc. + Copyright(c) 2016-2019 Solarflare Communications Inc. + + This software was jointly developed between OKTET Labs (under contract + for Solarflare) and Solarflare Communications, Inc. + +Solarflare libefx-based Poll Mode Driver +======================================== + +The SFC EFX PMD (**librte_pmd_sfc_efx**) provides poll mode driver support +for **Solarflare SFN7xxx and SFN8xxx** family of 10/40 Gbps adapters and +**Solarflare XtremeScale X2xxx** family of 10/25/40/50/100 Gbps adapters. +SFC EFX PMD has support for the latest Linux and FreeBSD operating systems. + +More information can be found at `Solarflare Communications website +`_. + + +Features +-------- + +SFC EFX PMD has support for: + +- Multiple transmit and receive queues + +- Link state information including link status change interrupt + +- IPv4/IPv6 TCP/UDP transmit checksum offload + +- Inner IPv4/IPv6 TCP/UDP transmit checksum offload + +- Port hardware statistics + +- Extended statistics (see Solarflare Server Adapter User's Guide for + the statistics description) + +- Basic flow control + +- MTU update + +- Jumbo frames up to 9K + +- Promiscuous mode + +- Allmulticast mode + +- TCP segmentation offload (TSO) including VXLAN and GENEVE encapsulated + +- Multicast MAC filter + +- IPv4/IPv6 TCP/UDP receive checksum offload + +- Inner IPv4/IPv6 TCP/UDP receive checksum offload + +- Received packet type information + +- Receive side scaling (RSS) + +- RSS hash + +- Scattered Rx DMA for packet that are larger that a single Rx descriptor + +- Receive queue interrupts + +- Deferred receive and transmit queue start + +- Transmit VLAN insertion (if running firmware variant supports it) + +- Flow API + +- Loopback + + +Non-supported Features +---------------------- + +The features not yet supported include: + +- Priority-based flow control + +- Configurable RX CRC stripping (always stripped) + +- Header split on receive + +- VLAN filtering + +- VLAN stripping + +- LRO + + +Limitations +----------- + +Due to requirements on receive buffer alignment and usage of the receive +buffer for the auxiliary packet information provided by the NIC up to +extra 269 (14 bytes prefix plus up to 255 bytes for end padding) bytes may be +required in the receive buffer. +It should be taken into account when mbuf pool for receive is created. + + +Equal stride super-buffer mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When the receive queue uses equal stride super-buffer DMA mode, one HW Rx +descriptor carries many Rx buffers which contiguously follow each other +with some stride (equal to total size of rte_mbuf as mempool object). +Each Rx buffer is an independent rte_mbuf. +However dedicated mempool manager must be used when mempool for the Rx +queue is created. The manager must support dequeue of the contiguous +block of objects and provide mempool info API to get the block size. + +Another limitation of a equal stride super-buffer mode, imposed by the +firmware, is that it allows for a single RSS context. + + +Tunnels support +--------------- + +NVGRE, VXLAN and GENEVE tunnels are supported on SFN8xxx and X2xxx family +adapters with full-feature firmware variant running. +**sfboot** should be used to configure NIC to run full-feature firmware variant. +See Solarflare Server Adapter User's Guide for details. + +SFN8xxx and X2xxx family adapters provide either inner or outer packet classes. +If adapter firmware advertises support for tunnels then the PMD +configures the hardware to report inner classes, and outer classes are +not reported in received packets. +However, for VXLAN and GENEVE tunnels the PMD does report UDP as the +outer layer 4 packet type. + +SFN8xxx and X2xxx family adapters report GENEVE packets as VXLAN. +If UDP ports are configured for only one tunnel type then it is safe to +treat VXLAN packet type indication as the corresponding UDP tunnel type. + + +Flow API support +---------------- + +Supported attributes: + +- Ingress + +Supported pattern items: + +- VOID + +- ETH (exact match of source/destination addresses, individual/group match + of destination address, EtherType in the outer frame and exact match of + destination addresses, individual/group match of destination address in + the inner frame) + +- VLAN (exact match of VID, double-tagging is supported) + +- IPV4 (exact match of source/destination addresses, + IP transport protocol) + +- IPV6 (exact match of source/destination addresses, + IP transport protocol) + +- TCP (exact match of source/destination ports) + +- UDP (exact match of source/destination ports) + +- VXLAN (exact match of VXLAN network identifier) + +- GENEVE (exact match of virtual network identifier, only Ethernet (0x6558) + protocol type is supported) + +- NVGRE (exact match of virtual subnet ID) + +Supported actions: + +- VOID + +- QUEUE + +- RSS + +- DROP + +- FLAG (supported only with ef10_essb Rx datapath) + +- MARK (supported only with ef10_essb Rx datapath) + +Validating flow rules depends on the firmware variant. + +The :ref:`flow_isolated_mode` is supported. + +Ethernet destination individual/group match +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ethernet item supports I/G matching, if only the corresponding bit is set +in the mask of destination address. If destination address in the spec is +multicast, it matches all multicast (and broadcast) packets, otherwise it +matches unicast packets that are not filtered by other flow rules. + +Exceptions to flow rules +~~~~~~~~~~~~~~~~~~~~~~~~ + +There is a list of exceptional flow rule patterns which will not be +accepted by the PMD. A pattern will be rejected if at least one of the +conditions is met: + +- Filtering by IPv4 or IPv6 EtherType without pattern items of internet + layer and above. + +- The last item is IPV4 or IPV6, and it's empty. + +- Filtering by TCP or UDP IP transport protocol without pattern items of + transport layer and above. + +- The last item is TCP or UDP, and it's empty. + + +Supported NICs +-------------- + +- Solarflare XtremeScale Adapters: + + - Solarflare X2522 Dual Port SFP28 10/25GbE Adapter + + - Solarflare X2541 Single Port QSFP28 10/25G/100G Adapter + + - Solarflare X2542 Dual Port QSFP28 10/25G/100G Adapter + +- Solarflare Flareon [Ultra] Server Adapters: + + - Solarflare SFN8522 Dual Port SFP+ Server Adapter + + - Solarflare SFN8522M Dual Port SFP+ Server Adapter + + - Solarflare SFN8042 Dual Port QSFP+ Server Adapter + + - Solarflare SFN8542 Dual Port QSFP+ Server Adapter + + - Solarflare SFN8722 Dual Port SFP+ OCP Server Adapter + + - Solarflare SFN7002F Dual Port SFP+ Server Adapter + + - Solarflare SFN7004F Quad Port SFP+ Server Adapter + + - Solarflare SFN7042Q Dual Port QSFP+ Server Adapter + + - Solarflare SFN7122F Dual Port SFP+ Server Adapter + + - Solarflare SFN7124F Quad Port SFP+ Server Adapter + + - Solarflare SFN7142Q Dual Port QSFP+ Server Adapter + + - Solarflare SFN7322F Precision Time Synchronization Server Adapter + + +Prerequisites +------------- + +- Requires firmware version: + + - SFN7xxx: **4.7.1.1001** or higher + + - SFN8xxx: **6.0.2.1004** or higher + +Visit `Solarflare Support Downloads `_ to get +Solarflare Utilities (either Linux or FreeBSD) with the latest firmware. +Follow instructions from Solarflare Server Adapter User's Guide to +update firmware and configure the adapter. + + +Pre-Installation Configuration +------------------------------ + + +Config File Options +~~~~~~~~~~~~~~~~~~~ + +The following options can be modified in the ``.config`` file. +Please note that enabling debugging options may affect system performance. + +- ``CONFIG_RTE_LIBRTE_SFC_EFX_PMD`` (default **y**) + + Enable compilation of Solarflare libefx-based poll-mode driver. + +- ``CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG`` (default **n**) + + Enable compilation of the extra run-time consistency checks. + + +Per-Device Parameters +~~~~~~~~~~~~~~~~~~~~~ + +The following per-device parameters can be passed via EAL PCI device +whitelist option like "-w 02:00.0,arg1=value1,...". + +Case-insensitive 1/y/yes/on or 0/n/no/off may be used to specify +boolean parameters value. + +- ``rx_datapath`` [auto|efx|ef10|ef10_esps] (default **auto**) + + Choose receive datapath implementation. + **auto** allows the driver itself to make a choice based on firmware + features available and required by the datapath implementation. + **efx** chooses libefx-based datapath which supports Rx scatter. + **ef10** chooses EF10 (SFN7xxx, SFN8xxx, X2xxx) native datapath which is + more efficient than libefx-based and provides richer packet type + classification. + **ef10_esps** chooses SFNX2xxx equal stride packed stream datapath + which may be used on DPDK firmware variant only + (see notes about its limitations above). + +- ``tx_datapath`` [auto|efx|ef10|ef10_simple] (default **auto**) + + Choose transmit datapath implementation. + **auto** allows the driver itself to make a choice based on firmware + features available and required by the datapath implementation. + **efx** chooses libefx-based datapath which supports VLAN insertion + (full-feature firmware variant only), TSO and multi-segment mbufs. + Mbuf segments may come from different mempools, and mbuf reference + counters are treated responsibly. + **ef10** chooses EF10 (SFN7xxx, SFN8xxx, X2xxx) native datapath which is + more efficient than libefx-based but has no VLAN insertion support yet. + Mbuf segments may come from different mempools, and mbuf reference + counters are treated responsibly. + **ef10_simple** chooses EF10 (SFN7xxx, SFN8xxx, X2xxx) native datapath which + is even more faster then **ef10** but does not support multi-segment + mbufs, disallows multiple mempools and neglects mbuf reference counters. + +- ``perf_profile`` [auto|throughput|low-latency] (default **throughput**) + + Choose hardware tuning to be optimized for either throughput or + low-latency. + **auto** allows NIC firmware to make a choice based on + installed licenses and firmware variant configured using **sfboot**. + +- ``stats_update_period_ms`` [long] (default **1000**) + + Adjust period in milliseconds to update port hardware statistics. + The accepted range is 0 to 65535. The value of **0** may be used + to disable periodic statistics update. One should note that it's + only possible to set an arbitrary value on SFN8xxx and X2xxx provided that + firmware version is 6.2.1.1033 or higher, otherwise any positive + value will select a fixed update period of **1000** milliseconds + +- ``fw_variant`` [dont-care|full-feature|ultra-low-latency| + capture-packed-stream|dpdk] (default **dont-care**) + + Choose the preferred firmware variant to use. In order for the selected + option to have an effect, the **sfboot** utility must be configured with the + **auto** firmware-variant option. The preferred firmware variant applies to + all ports on the NIC. + **dont-care** ensures that the driver can attach to an unprivileged function. + The datapath firmware type to use is controlled by the **sfboot** + utility. + **full-feature** chooses full featured firmware. + **ultra-low-latency** chooses firmware with fewer features but lower latency. + **capture-packed-stream** chooses firmware for SolarCapture packed stream + mode. + **dpdk** chooses DPDK firmware with equal stride super-buffer Rx mode + for higher Rx packet rate and packet marks support and firmware subvariant + without checksumming on transmit for higher Tx packet rate if + checksumming is not required. + +- ``rxd_wait_timeout_ns`` [long] (default **200 us**) + + Adjust timeout in nanoseconds to head-of-line block to wait for + Rx descriptors. + The accepted range is 0 to 400 ms. + Flow control should be enabled to make it work. + The value of **0** disables it and packets are dropped immediately. + When a packet is dropped because of no Rx descriptors, + ``rx_nodesc_drop_cnt`` counter grows. + The feature is supported only by the DPDK firmware variant when equal + stride super-buffer Rx mode is used. + + +Dynamic Logging Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One may leverage EAL option "--log-level" to change default levels +for the log types supported by the driver. The option is used with +an argument typically consisting of two parts separated by a colon. + +Level value is the last part which takes a symbolic name (or integer). +Log type is the former part which may shell match syntax. +Depending on the choice of the expression, the given log level may +be used either for some specific log type or for a subset of types. + +SFC EFX PMD provides the following log types available for control: + +- ``pmd.net.sfc.driver`` (default level is **notice**) + + Affects driver-wide messages unrelated to any particular devices. + +- ``pmd.net.sfc.main`` (default level is **notice**) + + Matches a subset of per-port log types registered during runtime. + A full name for a particular type may be obtained by appending a + dot and a PCI device identifier (``XXXX:XX:XX.X``) to the prefix. + +- ``pmd.net.sfc.mcdi`` (default level is **notice**) + + Extra logging of the communication with the NIC's management CPU. + The format of the log is consumed by the Solarflare netlogdecode + cross-platform tool. May be managed per-port, as explained above. diff --git a/src/spdk/dpdk/doc/guides/nics/softnic.rst b/src/spdk/dpdk/doc/guides/nics/softnic.rst new file mode 100644 index 000000000..c8962d90b --- /dev/null +++ b/src/spdk/dpdk/doc/guides/nics/softnic.rst @@ -0,0 +1,370 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018 Intel Corporation. + +Soft NIC Poll Mode Driver +========================= + +The Soft NIC allows building custom NIC pipelines in software. The Soft NIC pipeline +is DIY and reconfigurable through ``firmware`` (DPDK Packet Framework script). + +The Soft NIC leverages the DPDK Packet Framework libraries (librte_port, +librte_table and librte_pipeline) to make it modular, flexible and extensible +with new functionality. Please refer to DPDK Programmer's Guide, Chapter +``Packet Framework`` and DPDK Sample Application User Guide, +Chapter ``IP Pipeline Application`` for more details. + +The Soft NIC is configured through the standard DPDK ethdev API (ethdev, flow, +QoS, security). The internal framework is not externally visible. + +Key benefits: + - Can be used to augment missing features to HW NICs. + - Allows consumption of advanced DPDK features without application redesign. + - Allows out-of-the-box performance boost of DPDK consumers applications simply by + instantiating this type of Ethernet device. + +Flow +---- +* ``Device creation``: Each Soft NIC instance is a virtual device. + +* ``Device start``: The Soft NIC firmware script is executed every time the device + is started. The firmware script typically creates several internal objects, + such as: memory pools, SW queues, traffic manager, action profiles, pipelines, + etc. + +* ``Device stop``: All the internal objects that were previously created by the + firmware script during device start are now destroyed. + +* ``Device run``: Each Soft NIC device needs one or several CPU cores to run. + The firmware script maps each internal pipeline to a CPU core. Multiple + pipelines can be mapped to the same CPU core. In order for a given pipeline + assigned to CPU core X to run, the application needs to periodically call on + CPU core X the `rte_pmd_softnic_run()` function for the current Soft NIC + device. + +* ``Application run``: The application reads packets from the Soft NIC device RX + queues and writes packets to the Soft NIC device TX queues. + +Supported Operating Systems +--------------------------- + +Any Linux distribution fulfilling the conditions described in ``System Requirements`` +section of :ref:`the DPDK documentation ` or refer to *DPDK +Release Notes*. + +Build options +------------- + +The default PMD configuration available in the common_linux configuration file: + +CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y + +Once the DPDK is built, all the DPDK applications include support for the +Soft NIC PMD. + +Soft NIC PMD arguments +---------------------- + +The user can specify below arguments in EAL ``--vdev`` options to create the +Soft NIC device instance: + + --vdev "net_softnic0,firmware=firmware.cli,conn_port=8086" + +#. ``firmware``: path to the firmware script used for Soft NIC configuration. + The example "firmware" script is provided at `drivers/net/softnic/`. + (Optional: No, Default = NA) + +#. ``conn_port``: tcp connection port (non-zero value) used by remote client + (for examples- telnet, netcat, etc.) to connect and configure Soft NIC device in run-time. + (Optional: yes, Default value: 0, no connection with external client) + +#. ``cpu_id``: numa node id. (Optional: yes, Default value: 0) + +#. ``tm_n_queues``: number of traffic manager's scheduler queues. The traffic manager + is based on DPDK *librte_sched* library. (Optional: yes, Default value: 65,536 queues) + +#. ``tm_qsize0``: size of scheduler queue 0 per traffic class of the pipes/subscribers. + (Optional: yes, Default: 64) + +#. ``tm_qsize1``: size of scheduler queue 1 per traffic class of the pipes/subscribers. + (Optional: yes, Default: 64) + +#. ``tm_qsize2``: size of scheduler queue 2 per traffic class of the pipes/subscribers. + (Optional: yes, Default: 64) + +#. ``tm_qsize3``: size of scheduler queue 3 per traffic class of the pipes/subscribers. + (Optional: yes, Default: 64) + + +Soft NIC testing +---------------- + +* Run testpmd application in Soft NIC forwarding mode with loopback feature + enabled on Soft NIC port: + + .. code-block:: console + + ./testpmd -c 0x3 --vdev 'net_softnic0,firmware=