From 39a0d9019076a29a57a38a817b1830aef0708c11 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 14:26:09 +0200 Subject: Adding upstream version 20230210. Signed-off-by: Daniel Baumann --- cxgb4/configs/t4-config-default.txt | 562 ++++++++++++++++++++++++++++++ cxgb4/configs/t5-config-default.txt | 613 +++++++++++++++++++++++++++++++++ cxgb4/configs/t5-config-hashfilter.txt | 467 +++++++++++++++++++++++++ cxgb4/configs/t6-config-default.txt | 603 ++++++++++++++++++++++++++++++++ cxgb4/configs/t6-config-hashfilter.txt | 430 +++++++++++++++++++++++ 5 files changed, 2675 insertions(+) create mode 100644 cxgb4/configs/t4-config-default.txt create mode 100644 cxgb4/configs/t5-config-default.txt create mode 100644 cxgb4/configs/t5-config-hashfilter.txt create mode 100644 cxgb4/configs/t6-config-default.txt create mode 100644 cxgb4/configs/t6-config-hashfilter.txt (limited to 'cxgb4/configs') diff --git a/cxgb4/configs/t4-config-default.txt b/cxgb4/configs/t4-config-default.txt new file mode 100644 index 0000000..1774b4d --- /dev/null +++ b/cxgb4/configs/t4-config-default.txt @@ -0,0 +1,562 @@ +# Chelsio T4 Factory Default configuration file. +# +# Copyright (C) 2010-2014 Chelsio Communications. All rights reserved. +# +# DO NOT MODIFY THIS FILE UNDER ANY CIRCUMSTANCES. MODIFICATION OF +# THIS FILE WILL RESULT IN A NON-FUNCTIONAL T4 ADAPTER AND MAY RESULT +# IN PHYSICAL DAMAGE TO T4 ADAPTERS. + +# This file provides the default, power-on configuration for 4-port T4-based +# adapters shipped from the factory. These defaults are designed to address +# the needs of the vast majority of T4 customers. The basic idea is to have +# a default configuration which allows a customer to plug a T4 adapter in and +# have it work regardless of OS, driver or application except in the most +# unusual and/or demanding customer applications. +# +# Many of the T4 resources which are described by this configuration are +# finite. This requires balancing the configuration/operation needs of +# device drivers across OSes and a large number of customer application. +# +# Some of the more important resources to allocate and their constaints are: +# 1. Virtual Interfaces: 128. +# 2. Ingress Queues with Free Lists: 1024. PCI-E SR-IOV Virtual Functions +# must use a power of 2 Ingress Queues. +# 3. Egress Queues: 128K. PCI-E SR-IOV Virtual Functions must use a +# power of 2 Egress Queues. +# 4. MSI-X Vectors: 1088. A complication here is that the PCI-E SR-IOV +# Virtual Functions based off of a Physical Function all get the +# same umber of MSI-X Vectors as the base Physical Function. +# Additionally, regardless of whether Virtual Functions are enabled or +# not, their MSI-X "needs" are counted by the PCI-E implementation. +# And finally, all Physical Funcations capable of supporting Virtual +# Functions (PF0-3) must have the same number of configured TotalVFs in +# their SR-IOV Capabilities. +# 5. Multi-Port Support (MPS) TCAM: 336 entries to support MAC destination +# address matching on Ingress Packets. +# +# Some of the important OS/Driver resource needs are: +# 6. Some OS Drivers will manage all resources through a single Physical +# Function (currently PF0 but it could be any Physical Function). Thus, +# this "Unified PF" will need to have enough resources allocated to it +# to allow for this. And because of the MSI-X resource allocation +# constraints mentioned above, this probably means we'll either have to +# severely limit the TotalVFs if we continue to use PF0 as the Unified PF +# or we'll need to move the Unified PF into the PF4-7 range since those +# Physical Functions don't have any Virtual Functions associated with +# them. +# 7. Some OS Drivers will manage different ports and functions (NIC, +# storage, etc.) on different Physical Functions. For example, NIC +# functions for ports 0-3 on PF0-3, FCoE on PF4, iSCSI on PF5, etc. +# +# Some of the customer application needs which need to be accommodated: +# 8. Some customers will want to support large CPU count systems with +# good scaling. Thus, we'll need to accommodate a number of +# Ingress Queues and MSI-X Vectors to allow up to some number of CPUs +# to be involved per port and per application function. For example, +# in the case where all ports and application functions will be +# managed via a single Unified PF and we want to accommodate scaling up +# to 8 CPUs, we would want: +# +# 4 ports * +# 3 application functions (NIC, FCoE, iSCSI) per port * +# 8 Ingress Queue/MSI-X Vectors per application function +# +# for a total of 96 Ingress Queues and MSI-X Vectors on the Unified PF. +# (Plus a few for Firmware Event Queues, etc.) +# +# 9. Some customers will want to use T4's PCI-E SR-IOV Capability to allow +# Virtual Machines to directly access T4 functionality via SR-IOV +# Virtual Functions and "PCI Device Passthrough" -- this is especially +# true for the NIC application functionality. (Note that there is +# currently no ability to use the TOE, FCoE, iSCSI, etc. via Virtual +# Functions so this is in fact solely limited to NIC.) +# + + +# Global configuration settings. +# +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # The following Scatter Gather Engine (SGE) settings assume a 4KB Host + # Page Size and a 64B L1 Cache Line Size. It programs the + # EgrStatusPageSize and IngPadBoundary to 64B and the PktShift to 2. + # If a Master PF Driver finds itself on a machine with different + # parameters, then the Master PF Driver is responsible for initializing + # these parameters to appropriate values. + # + # Notes: + # 1. The Free List Buffer Sizes below are raw and the firmware will + # round them up to the Ingress Padding Boundary. + # 2. The SGE Timer Values below are expressed below in microseconds. + # The firmware will convert these values to Core Clock Ticks when + # it processes the configuration parameters. + # + reg[0x1008] = 0x40810/0x21c70 # SGE_CONTROL + reg[0x100c] = 0x22222222 # SGE_HOST_PAGE_SIZE + reg[0x10a0] = 0x01040810 # SGE_INGRESS_RX_THRESHOLD + reg[0x1044] = 4096 # SGE_FL_BUFFER_SIZE0 + reg[0x1048] = 65536 # SGE_FL_BUFFER_SIZE1 + reg[0x104c] = 1536 # SGE_FL_BUFFER_SIZE2 + reg[0x1050] = 9024 # SGE_FL_BUFFER_SIZE3 + reg[0x1054] = 9216 # SGE_FL_BUFFER_SIZE4 + reg[0x1058] = 2048 # SGE_FL_BUFFER_SIZE5 + reg[0x105c] = 128 # SGE_FL_BUFFER_SIZE6 + reg[0x1060] = 8192 # SGE_FL_BUFFER_SIZE7 + reg[0x1064] = 16384 # SGE_FL_BUFFER_SIZE8 + reg[0x10a4] = 0xa000a000/0xf000f000 # SGE_DBFIFO_STATUS + reg[0x10a8] = 0x2000/0x2000 # SGE_DOORBELL_CONTROL + sge_timer_value = 5, 10, 20, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + + # enable TP_OUT_CONFIG.IPIDSPLITMODE + reg[0x7d04] = 0x00010000/0x00010000 + + # disable TP_PARA_REG3.RxFragEn + reg[0x7d6c] = 0x00000000/0x00007000 + + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT + + # TP_VLAN_PRI_MAP to select filter tuples + # filter tuples : fragmentation, mpshittype, macmatch, ethertype, + # protocol, tos, vlan, vnic_id, port, fcoe + # valid filterModes are described the Terminator 4 Data Book + filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe + + # filter tuples enforced in LE active region (equal to or subset of filterMode) + filterMask = protocol, fcoe + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP RX payload + tp_pmrx = 34 + + # TP RX payload page size + tp_pmrx_pagesize = 64K + + # TP number of RX channels + tp_nrxch = 0 # 0 (auto) = 1 + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP TX payload + tp_pmtx = 32 + + # TP TX payload page size + tp_pmtx_pagesize = 64K + + # TP number of TX channels + tp_ntxch = 0 # 0 (auto) = equal number of ports + + # TP OFLD MTUs + tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + + # ULPRX iSCSI Page Sizes + reg[0x19168] = 0x04020100 # 64K, 16K, 8K and 4K + +# Some "definitions" to make the rest of this a bit more readable. We support +# 4 ports, 3 functions (NIC, FCoE and iSCSI), scaling up to 8 "CPU Queue Sets" +# per function per port ... +# +# NMSIX = 1088 # available MSI-X Vectors +# NVI = 128 # available Virtual Interfaces +# NMPSTCAM = 336 # MPS TCAM entries +# +# NPORTS = 4 # ports +# NCPUS = 8 # CPUs we want to support scalably +# NFUNCS = 3 # functions per port (NIC, FCoE, iSCSI) + +# Breakdown of Virtual Interface/Queue/Interrupt resources for the "Unified +# PF" which many OS Drivers will use to manage most or all functions. +# +# Each Ingress Queue can use one MSI-X interrupt but some Ingress Queues can +# use Forwarded Interrupt Ingress Queues. For these latter, an Ingress Queue +# would be created and the Queue ID of a Forwarded Interrupt Ingress Queue +# will be specified as the "Ingress Queue Asynchronous Destination Index." +# Thus, the number of MSI-X Vectors assigned to the Unified PF will be less +# than or equal to the number of Ingress Queues ... +# +# NVI_NIC = 4 # NIC access to NPORTS +# NFLIQ_NIC = 32 # NIC Ingress Queues with Free Lists +# NETHCTRL_NIC = 32 # NIC Ethernet Control/TX Queues +# NEQ_NIC = 64 # NIC Egress Queues (FL, ETHCTRL/TX) +# NMPSTCAM_NIC = 16 # NIC MPS TCAM Entries (NPORTS*4) +# NMSIX_NIC = 32 # NIC MSI-X Interrupt Vectors (FLIQ) +# +# NVI_OFLD = 0 # Offload uses NIC function to access ports +# NFLIQ_OFLD = 16 # Offload Ingress Queues with Free Lists +# NETHCTRL_OFLD = 0 # Offload Ethernet Control/TX Queues +# NEQ_OFLD = 16 # Offload Egress Queues (FL) +# NMPSTCAM_OFLD = 0 # Offload MPS TCAM Entries (uses NIC's) +# NMSIX_OFLD = 16 # Offload MSI-X Interrupt Vectors (FLIQ) +# +# NVI_RDMA = 0 # RDMA uses NIC function to access ports +# NFLIQ_RDMA = 4 # RDMA Ingress Queues with Free Lists +# NETHCTRL_RDMA = 0 # RDMA Ethernet Control/TX Queues +# NEQ_RDMA = 4 # RDMA Egress Queues (FL) +# NMPSTCAM_RDMA = 0 # RDMA MPS TCAM Entries (uses NIC's) +# NMSIX_RDMA = 4 # RDMA MSI-X Interrupt Vectors (FLIQ) +# +# NEQ_WD = 128 # Wire Direct TX Queues and FLs +# NETHCTRL_WD = 64 # Wire Direct TX Queues +# NFLIQ_WD = 64 ` # Wire Direct Ingress Queues with Free Lists +# +# NVI_ISCSI = 4 # ISCSI access to NPORTS +# NFLIQ_ISCSI = 4 # ISCSI Ingress Queues with Free Lists +# NETHCTRL_ISCSI = 0 # ISCSI Ethernet Control/TX Queues +# NEQ_ISCSI = 4 # ISCSI Egress Queues (FL) +# NMPSTCAM_ISCSI = 4 # ISCSI MPS TCAM Entries (NPORTS) +# NMSIX_ISCSI = 4 # ISCSI MSI-X Interrupt Vectors (FLIQ) +# +# NVI_FCOE = 4 # FCOE access to NPORTS +# NFLIQ_FCOE = 34 # FCOE Ingress Queues with Free Lists +# NETHCTRL_FCOE = 32 # FCOE Ethernet Control/TX Queues +# NEQ_FCOE = 66 # FCOE Egress Queues (FL) +# NMPSTCAM_FCOE = 32 # FCOE MPS TCAM Entries (NPORTS) +# NMSIX_FCOE = 34 # FCOE MSI-X Interrupt Vectors (FLIQ) + +# Two extra Ingress Queues per function for Firmware Events and Forwarded +# Interrupts, and two extra interrupts per function for Firmware Events (or a +# Forwarded Interrupt Queue) and General Interrupts per function. +# +# NFLIQ_EXTRA = 6 # "extra" Ingress Queues 2*NFUNCS (Firmware and +# # Forwarded Interrupts +# NMSIX_EXTRA = 6 # extra interrupts 2*NFUNCS (Firmware and +# # General Interrupts + +# Microsoft HyperV resources. The HyperV Virtual Ingress Queues will have +# their interrupts forwarded to another set of Forwarded Interrupt Queues. +# +# NVI_HYPERV = 16 # VMs we want to support +# NVIIQ_HYPERV = 2 # Virtual Ingress Queues with Free Lists per VM +# NFLIQ_HYPERV = 40 # VIQs + NCPUS Forwarded Interrupt Queues +# NEQ_HYPERV = 32 # VIQs Free Lists +# NMPSTCAM_HYPERV = 16 # MPS TCAM Entries (NVI_HYPERV) +# NMSIX_HYPERV = 8 # NCPUS Forwarded Interrupt Queues + +# Adding all of the above Unified PF resource needs together: (NIC + OFLD + +# RDMA + ISCSI + FCOE + EXTRA + HYPERV) +# +# NVI_UNIFIED = 28 +# NFLIQ_UNIFIED = 106 +# NETHCTRL_UNIFIED = 32 +# NEQ_UNIFIED = 124 +# NMPSTCAM_UNIFIED = 40 +# +# The sum of all the MSI-X resources above is 74 MSI-X Vectors but we'll round +# that up to 128 to make sure the Unified PF doesn't run out of resources. +# +# NMSIX_UNIFIED = 128 +# +# The Storage PFs could need up to NPORTS*NCPUS + NMSIX_EXTRA MSI-X Vectors +# which is 34 but they're probably safe with 32. +# +# NMSIX_STORAGE = 32 + +# Note: The UnifiedPF is PF4 which doesn't have any Virtual Functions +# associated with it. Thus, the MSI-X Vector allocations we give to the +# UnifiedPF aren't inherited by any Virtual Functions. As a result we can +# provision many more Virtual Functions than we can if the UnifiedPF were +# one of PF0-3. +# + +# All of the below PCI-E parameters are actually stored in various *_init.txt +# files. We include them below essentially as comments. +# +# For PF0-3 we assign 8 vectors each for NIC Ingress Queues of the associated +# ports 0-3. +# +# For PF4, the Unified PF, we give it an MSI-X Table Size as outlined above. +# +# For PF5-6 we assign enough MSI-X Vectors to support FCoE and iSCSI +# storage applications across all four possible ports. +# +# Additionally, since the UnifiedPF isn't one of the per-port Physical +# Functions, we give the UnifiedPF and the PF0-3 Physical Functions +# different PCI Device IDs which will allow Unified and Per-Port Drivers +# to directly select the type of Physical Function to which they wish to be +# attached. +# +# Note that the actual values used for the PCI-E Intelectual Property will be +# 1 less than those below since that's the way it "counts" things. For +# readability, we use the number we actually mean ... +# +# PF0_INT = 8 # NCPUS +# PF1_INT = 8 # NCPUS +# PF2_INT = 8 # NCPUS +# PF3_INT = 8 # NCPUS +# PF0_3_INT = 32 # PF0_INT + PF1_INT + PF2_INT + PF3_INT +# +# PF4_INT = 128 # NMSIX_UNIFIED +# PF5_INT = 32 # NMSIX_STORAGE +# PF6_INT = 32 # NMSIX_STORAGE +# PF7_INT = 0 # Nothing Assigned +# PF4_7_INT = 192 # PF4_INT + PF5_INT + PF6_INT + PF7_INT +# +# PF0_7_INT = 224 # PF0_3_INT + PF4_7_INT +# +# With the above we can get 17 VFs/PF0-3 (limited by 336 MPS TCAM entries) +# but we'll lower that to 16 to make our total 64 and a nice power of 2 ... +# +# NVF = 16 + +# For those OSes which manage different ports on different PFs, we need +# only enough resources to support a single port's NIC application functions +# on PF0-3. The below assumes that we're only doing NIC with NCPUS "Queue +# Sets" for ports 0-3. The FCoE and iSCSI functions for such OSes will be +# managed on the "storage PFs" (see below). +# +[function "0"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x1 # access to only one port + +[function "1"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x2 # access to only one port + +[function "2"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x4 # access to only one port + +[function "3"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x8 # access to only one port + +# Some OS Drivers manage all application functions for all ports via PF4. +# Thus we need to provide a large number of resources here. For Egress +# Queues we need to account for both TX Queues as well as Free List Queues +# (because the host is responsible for producing Free List Buffers for the +# hardware to consume). +# +[function "4"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 28 # NVI_UNIFIED + niqflint = 170 # NFLIQ_UNIFIED + NLFIQ_WD + nethctrl = 100 # NETHCTRL_UNIFIED + NETHCTRL_WD + neq = 256 # NEQ_UNIFIED + NEQ_WD + nexactf = 40 # NMPSTCAM_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nethofld = 1024 # number of user mode ethernet flow contexts + nroute = 32 # number of routing region entries + nclip = 32 # number of clip region entries + nfilter = 496 # number of filter region entries + nserver = 496 # number of server region entries + nhash = 12288 # number of hash region entries + protocol = nic_vm, ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu + tp_l2t = 3072 + tp_ddp = 3 + tp_ddp_iscsi = 2 + tp_stag = 3 + tp_pbl = 10 + tp_rq = 13 + +# We have FCoE and iSCSI storage functions on PF5 and PF6 each of which may +# need to have Virtual Interfaces on each of the four ports with up to NCPUS +# "Queue Sets" each. +# +[function "5"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 64 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + nexactf = 4 # NPORTS + cmask = all # access to all channels + pmask = all # access to all four ports ... + nserver = 16 + nhash = 2048 + tp_l2t = 1020 + protocol = iscsi_initiator_fofld + tp_ddp_iscsi = 2 + iscsi_ntask = 2048 + iscsi_nsess = 2048 + iscsi_nconn_per_session = 1 + iscsi_ninitiator_instance = 64 + +[function "6"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 66 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + 2 (EXTRA) + nexactf = 32 # NPORTS + adding 28 exact entries for FCoE + # which is OK since < MIN(SUM PF0..3, PF4) + # and we never load PF0..3 and PF4 concurrently + cmask = all # access to all channels + pmask = all # access to all four ports ... + nhash = 2048 + tp_l2t = 4 + protocol = fcoe_initiator + tp_ddp = 1 + fcoe_nfcf = 16 + fcoe_nvnp = 32 + fcoe_nssn = 1024 + +# The following function, 1023, is not an actual PCIE function but is used to +# configure and reserve firmware internal resources that come from the global +# resource pool. +# +[function "1023"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NVI_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nexactf = 8 # NPORTS + DCBX + + nfilter = 16 # number of filter region entries + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x1 # access to only one port ... + +[function "1/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x2 # access to only one port ... + +[function "2/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x4 # access to only one port ... + +[function "3/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x8 # access to only one port ... + +# MPS features a 196608 bytes ingress buffer that is used for ingress buffering +# for packets from the wire as well as the loopback path of the L2 switch. The +# folling params control how the buffer memory is distributed and the L2 flow +# control settings: +# +# bg_mem: %-age of mem to use for port/buffer group +# lpbk_mem: %-age of port/bg mem to use for loopback +# hwm: high watermark; bytes available when starting to send pause +# frames (in units of 0.1 MTU) +# lwm: low watermark; bytes remaining when sending 'unpause' frame +# (in inuits of 0.1 MTU) +# dwm: minimum delta between high and low watermark (in units of 100 +# Bytes) +# +# + +[port "0"] + dcb = ppp, dcbx # configure for DCB PPP and enable DCBX offload + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + +[port "1"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + +[port "2"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + +[port "3"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + +[fini] + version = 0x1425001c + checksum = 0x5ceab41e + +# Total resources used by above allocations: +# Virtual Interfaces: 104 +# Ingress Queues/w Free Lists and Interrupts: 526 +# Egress Queues: 702 +# MPS TCAM Entries: 336 +# MSI-X Vectors: 736 +# Virtual Functions: 64 diff --git a/cxgb4/configs/t5-config-default.txt b/cxgb4/configs/t5-config-default.txt new file mode 100644 index 0000000..44fdfad --- /dev/null +++ b/cxgb4/configs/t5-config-default.txt @@ -0,0 +1,613 @@ +# Chelsio T5 Factory Default configuration file. +# +# Copyright (C) 2010-2015 Chelsio Communications. All rights reserved. +# +# DO NOT MODIFY THIS FILE UNDER ANY CIRCUMSTANCES. MODIFICATION OF THIS FILE +# WILL RESULT IN A NON-FUNCTIONAL ADAPTER AND MAY RESULT IN PHYSICAL DAMAGE +# TO ADAPTERS. + + +# This file provides the default, power-on configuration for 4-port T5-based +# adapters shipped from the factory. These defaults are designed to address +# the needs of the vast majority of Terminator customers. The basic idea is to +# have a default configuration which allows a customer to plug a Terminator +# adapter in and have it work regardless of OS, driver or application except in +# the most unusual and/or demanding customer applications. +# +# Many of the Terminator resources which are described by this configuration +# are finite. This requires balancing the configuration/operation needs of +# device drivers across OSes and a large number of customer application. +# +# Some of the more important resources to allocate and their constaints are: +# 1. Virtual Interfaces: 256. +# 2. Ingress Queues with Free Lists: 1024. +# 3. Egress Queues: 128K. +# 4. MSI-X Vectors: 1088. +# 5. Multi-Port Support (MPS) TCAM: 336 entries to support MAC destination +# address matching on Ingress Packets. +# +# Some of the important OS/Driver resource needs are: +# 6. Some OS Drivers will manage all resources through a single Physical +# Function (currently PF4 but it could be any Physical Function). +# 7. Some OS Drivers will manage different ports and functions (NIC, +# storage, etc.) on different Physical Functions. For example, NIC +# functions for ports 0-3 on PF0-3, FCoE on PF4, iSCSI on PF5, etc. +# +# Some of the customer application needs which need to be accommodated: +# 8. Some customers will want to support large CPU count systems with +# good scaling. Thus, we'll need to accommodate a number of +# Ingress Queues and MSI-X Vectors to allow up to some number of CPUs +# to be involved per port and per application function. For example, +# in the case where all ports and application functions will be +# managed via a single Unified PF and we want to accommodate scaling up +# to 8 CPUs, we would want: +# +# 4 ports * +# 3 application functions (NIC, FCoE, iSCSI) per port * +# 8 Ingress Queue/MSI-X Vectors per application function +# +# for a total of 96 Ingress Queues and MSI-X Vectors on the Unified PF. +# (Plus a few for Firmware Event Queues, etc.) +# +# 9. Some customers will want to use PCI-E SR-IOV Capability to allow Virtual +# Machines to directly access T6 functionality via SR-IOV Virtual Functions +# and "PCI Device Passthrough" -- this is especially true for the NIC +# application functionality. +# + + +# Global configuration settings. +# +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # PL_TIMEOUT register + pl_timeout_value = 10000 # the timeout value in units of us + + # The following Scatter Gather Engine (SGE) settings assume a 4KB Host + # Page Size and a 64B L1 Cache Line Size. It programs the + # EgrStatusPageSize and IngPadBoundary to 64B and the PktShift to 2. + # If a Master PF Driver finds itself on a machine with different + # parameters, then the Master PF Driver is responsible for initializing + # these parameters to appropriate values. + # + # Notes: + # 1. The Free List Buffer Sizes below are raw and the firmware will + # round them up to the Ingress Padding Boundary. + # 2. The SGE Timer Values below are expressed below in microseconds. + # The firmware will convert these values to Core Clock Ticks when + # it processes the configuration parameters. + # + reg[0x1008] = 0x40810/0x21c70 # SGE_CONTROL + reg[0x100c] = 0x22222222 # SGE_HOST_PAGE_SIZE + reg[0x10a0] = 0x01040810 # SGE_INGRESS_RX_THRESHOLD + reg[0x1044] = 4096 # SGE_FL_BUFFER_SIZE0 + reg[0x1048] = 65536 # SGE_FL_BUFFER_SIZE1 + reg[0x104c] = 1536 # SGE_FL_BUFFER_SIZE2 + reg[0x1050] = 9024 # SGE_FL_BUFFER_SIZE3 + reg[0x1054] = 9216 # SGE_FL_BUFFER_SIZE4 + reg[0x1058] = 2048 # SGE_FL_BUFFER_SIZE5 + reg[0x105c] = 128 # SGE_FL_BUFFER_SIZE6 + reg[0x1060] = 8192 # SGE_FL_BUFFER_SIZE7 + reg[0x1064] = 16384 # SGE_FL_BUFFER_SIZE8 + reg[0x10a4] = 0x00280000/0x3ffc0000 # SGE_DBFIFO_STATUS + reg[0x1118] = 0x00002800/0x00003c00 # SGE_DBFIFO_STATUS2 + reg[0x10a8] = 0x402000/0x402000 # SGE_DOORBELL_CONTROL + + # SGE_THROTTLE_CONTROL + bar2throttlecount = 500 # bar2throttlecount in us + + sge_timer_value = 5, 10, 20, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + + + reg[0x1124] = 0x00000400/0x00000400 # SGE_CONTROL2, enable VFIFO; if + # SGE_VFIFO_SIZE is not set, then + # firmware will set it up in function + # of number of egress queues used + + reg[0x1130] = 0x00d5ffeb # SGE_DBP_FETCH_THRESHOLD, fetch + # threshold set to queue depth + # minus 128-entries for FL and HP + # queues, and 0xfff for LP which + # prompts the firmware to set it up + # in function of egress queues + # used + + reg[0x113c] = 0x0002ffc0 # SGE_VFIFO_SIZE, set to 0x2ffc0 which + # prompts the firmware to set it up in + # function of number of egress queues + # used + + # enable TP_OUT_CONFIG.IPIDSPLITMODE + reg[0x7d04] = 0x00010000/0x00010000 + + # disable TP_PARA_REG3.RxFragEn + reg[0x7d6c] = 0x00000000/0x00007000 + + # enable TP_PARA_REG6.EnableCSnd + reg[0x7d78] = 0x00000400/0x00000000 + + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT + + # TP_VLAN_PRI_MAP to select filter tuples and enable ServerSram + # filter control: compact, fcoemask + # server sram : srvrsram + # filter tuples : fragmentation, mpshittype, macmatch, ethertype, + # protocol, tos, vlan, vnic_id, port, fcoe + # valid filterModes are described the Terminator 5 Data Book + filterMode = fcoemask, srvrsram, fragmentation, mpshittype, protocol, vlan, port, fcoe + + # filter tuples enforced in LE active region (equal to or subset of filterMode) + filterMask = protocol, fcoe + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP RX payload + tp_pmrx = 30 + + # TP RX payload page size + tp_pmrx_pagesize = 64K + + # TP number of RX channels + tp_nrxch = 0 # 0 (auto) = 1 + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP TX payload + tp_pmtx = 50 + + # TP TX payload page size + tp_pmtx_pagesize = 64K + + # TP number of TX channels + tp_ntxch = 0 # 0 (auto) = equal number of ports + + # TP OFLD MTUs + tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + + # TP_GLOBAL_CONFIG + reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable + + # TP_PC_CONFIG + reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError + + # TP_PARA_REG0 + reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + + # ULPRX iSCSI Page Sizes + reg[0x19168] = 0x04020100 # 64K, 16K, 8K and 4K + + # LE_DB_CONFIG + reg[0x19c04] = 0x00400000/0x00400000 # LE Server SRAM Enable + + # MC configuration + mc_mode_brc[0] = 1 # mc0 - 1: enable BRC, 0: enable RBC + mc_mode_brc[1] = 1 # mc1 - 1: enable BRC, 0: enable RBC + + # ULP_TX_CONFIG + reg[0x8dc0] = 0x00000004/0x00000004 # Enable more error msg for ... + # TPT error. + +# Some "definitions" to make the rest of this a bit more readable. We support +# 4 ports, 3 functions (NIC, FCoE and iSCSI), scaling up to 8 "CPU Queue Sets" +# per function per port ... +# +# NMSIX = 1088 # available MSI-X Vectors +# NVI = 128 # available Virtual Interfaces +# NMPSTCAM = 336 # MPS TCAM entries +# +# NPORTS = 4 # ports +# NCPUS = 8 # CPUs we want to support scalably +# NFUNCS = 3 # functions per port (NIC, FCoE, iSCSI) + +# Breakdown of Virtual Interface/Queue/Interrupt resources for the "Unified +# PF" which many OS Drivers will use to manage most or all functions. +# +# Each Ingress Queue can use one MSI-X interrupt but some Ingress Queues can +# use Forwarded Interrupt Ingress Queues. For these latter, an Ingress Queue +# would be created and the Queue ID of a Forwarded Interrupt Ingress Queue +# will be specified as the "Ingress Queue Asynchronous Destination Index." +# Thus, the number of MSI-X Vectors assigned to the Unified PF will be less +# than or equal to the number of Ingress Queues ... +# +# NVI_NIC = 4 # NIC access to NPORTS +# NFLIQ_NIC = 32 # NIC Ingress Queues with Free Lists +# NETHCTRL_NIC = 32 # NIC Ethernet Control/TX Queues +# NEQ_NIC = 64 # NIC Egress Queues (FL, ETHCTRL/TX) +# NMPSTCAM_NIC = 16 # NIC MPS TCAM Entries (NPORTS*4) +# NMSIX_NIC = 32 # NIC MSI-X Interrupt Vectors (FLIQ) +# +# NVI_OFLD = 0 # Offload uses NIC function to access ports +# NFLIQ_OFLD = 16 # Offload Ingress Queues with Free Lists +# NETHCTRL_OFLD = 0 # Offload Ethernet Control/TX Queues +# NEQ_OFLD = 16 # Offload Egress Queues (FL) +# NMPSTCAM_OFLD = 0 # Offload MPS TCAM Entries (uses NIC's) +# NMSIX_OFLD = 16 # Offload MSI-X Interrupt Vectors (FLIQ) +# +# NVI_RDMA = 0 # RDMA uses NIC function to access ports +# NFLIQ_RDMA = 4 # RDMA Ingress Queues with Free Lists +# NETHCTRL_RDMA = 0 # RDMA Ethernet Control/TX Queues +# NEQ_RDMA = 4 # RDMA Egress Queues (FL) +# NMPSTCAM_RDMA = 0 # RDMA MPS TCAM Entries (uses NIC's) +# NMSIX_RDMA = 4 # RDMA MSI-X Interrupt Vectors (FLIQ) +# +# NEQ_WD = 128 # Wire Direct TX Queues and FLs +# NETHCTRL_WD = 64 # Wire Direct TX Queues +# NFLIQ_WD = 64 ` # Wire Direct Ingress Queues with Free Lists +# +# NVI_ISCSI = 4 # ISCSI access to NPORTS +# NFLIQ_ISCSI = 4 # ISCSI Ingress Queues with Free Lists +# NETHCTRL_ISCSI = 0 # ISCSI Ethernet Control/TX Queues +# NEQ_ISCSI = 4 # ISCSI Egress Queues (FL) +# NMPSTCAM_ISCSI = 4 # ISCSI MPS TCAM Entries (NPORTS) +# NMSIX_ISCSI = 4 # ISCSI MSI-X Interrupt Vectors (FLIQ) +# +# NVI_FCOE = 4 # FCOE access to NPORTS +# NFLIQ_FCOE = 34 # FCOE Ingress Queues with Free Lists +# NETHCTRL_FCOE = 32 # FCOE Ethernet Control/TX Queues +# NEQ_FCOE = 66 # FCOE Egress Queues (FL) +# NMPSTCAM_FCOE = 32 # FCOE MPS TCAM Entries (NPORTS) +# NMSIX_FCOE = 34 # FCOE MSI-X Interrupt Vectors (FLIQ) + +# Two extra Ingress Queues per function for Firmware Events and Forwarded +# Interrupts, and two extra interrupts per function for Firmware Events (or a +# Forwarded Interrupt Queue) and General Interrupts per function. +# +# NFLIQ_EXTRA = 6 # "extra" Ingress Queues 2*NFUNCS (Firmware and +# # Forwarded Interrupts +# NMSIX_EXTRA = 6 # extra interrupts 2*NFUNCS (Firmware and +# # General Interrupts + +# Microsoft HyperV resources. The HyperV Virtual Ingress Queues will have +# their interrupts forwarded to another set of Forwarded Interrupt Queues. +# +# NVI_HYPERV = 16 # VMs we want to support +# NVIIQ_HYPERV = 2 # Virtual Ingress Queues with Free Lists per VM +# NFLIQ_HYPERV = 40 # VIQs + NCPUS Forwarded Interrupt Queues +# NEQ_HYPERV = 32 # VIQs Free Lists +# NMPSTCAM_HYPERV = 16 # MPS TCAM Entries (NVI_HYPERV) +# NMSIX_HYPERV = 8 # NCPUS Forwarded Interrupt Queues + +# Adding all of the above Unified PF resource needs together: (NIC + OFLD + +# RDMA + ISCSI + FCOE + EXTRA + HYPERV) +# +# NVI_UNIFIED = 28 +# NFLIQ_UNIFIED = 106 +# NETHCTRL_UNIFIED = 32 +# NEQ_UNIFIED = 124 +# NMPSTCAM_UNIFIED = 40 +# +# The sum of all the MSI-X resources above is 74 MSI-X Vectors but we'll round +# that up to 128 to make sure the Unified PF doesn't run out of resources. +# +# NMSIX_UNIFIED = 128 +# +# The Storage PFs could need up to NPORTS*NCPUS + NMSIX_EXTRA MSI-X Vectors +# which is 34 but they're probably safe with 32. +# +# NMSIX_STORAGE = 32 + +# Note: The UnifiedPF is PF4 which doesn't have any Virtual Functions +# associated with it. Thus, the MSI-X Vector allocations we give to the +# UnifiedPF aren't inherited by any Virtual Functions. As a result we can +# provision many more Virtual Functions than we can if the UnifiedPF were +# one of PF0-3. +# + +# All of the below PCI-E parameters are actually stored in various *_init.txt +# files. We include them below essentially as comments. +# +# For PF0-3 we assign 8 vectors each for NIC Ingress Queues of the associated +# ports 0-3. +# +# For PF4, the Unified PF, we give it an MSI-X Table Size as outlined above. +# +# For PF5-6 we assign enough MSI-X Vectors to support FCoE and iSCSI +# storage applications across all four possible ports. +# +# Additionally, since the UnifiedPF isn't one of the per-port Physical +# Functions, we give the UnifiedPF and the PF0-3 Physical Functions +# different PCI Device IDs which will allow Unified and Per-Port Drivers +# to directly select the type of Physical Function to which they wish to be +# attached. +# +# Note that the actual values used for the PCI-E Intelectual Property will be +# 1 less than those below since that's the way it "counts" things. For +# readability, we use the number we actually mean ... +# +# PF0_INT = 8 # NCPUS +# PF1_INT = 8 # NCPUS +# PF2_INT = 8 # NCPUS +# PF3_INT = 8 # NCPUS +# PF0_3_INT = 32 # PF0_INT + PF1_INT + PF2_INT + PF3_INT +# +# PF4_INT = 128 # NMSIX_UNIFIED +# PF5_INT = 32 # NMSIX_STORAGE +# PF6_INT = 32 # NMSIX_STORAGE +# PF7_INT = 0 # Nothing Assigned +# PF4_7_INT = 192 # PF4_INT + PF5_INT + PF6_INT + PF7_INT +# +# PF0_7_INT = 224 # PF0_3_INT + PF4_7_INT +# +# With the above we can get 17 VFs/PF0-3 (limited by 336 MPS TCAM entries) +# but we'll lower that to 16 to make our total 64 and a nice power of 2 ... +# +# NVF = 16 + + +# For those OSes which manage different ports on different PFs, we need +# only enough resources to support a single port's NIC application functions +# on PF0-3. The below assumes that we're only doing NIC with NCPUS "Queue +# Sets" for ports 0-3. The FCoE and iSCSI functions for such OSes will be +# managed on the "storage PFs" (see below). +# +[function "0"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x1 # access to only one port + + +[function "1"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x2 # access to only one port + + +[function "2"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x4 # access to only one port + + +[function "3"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x8 # access to only one port + + +# Some OS Drivers manage all application functions for all ports via PF4. +# Thus we need to provide a large number of resources here. For Egress +# Queues we need to account for both TX Queues as well as Free List Queues +# (because the host is responsible for producing Free List Buffers for the +# hardware to consume). +# +[function "4"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 28 # NVI_UNIFIED + niqflint = 170 # NFLIQ_UNIFIED + NLFIQ_WD + nethctrl = 100 # NETHCTRL_UNIFIED + NETHCTRL_WD + neq = 256 # NEQ_UNIFIED + NEQ_WD + nqpcq = 12288 + nexactf = 40 # NMPSTCAM_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nethofld = 1024 # number of user mode ethernet flow contexts + nroute = 32 # number of routing region entries + nclip = 32 # number of clip region entries + nfilter = 496 # number of filter region entries + nserver = 496 # number of server region entries + nhash = 12288 # number of hash region entries + protocol = nic_vm, ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu, iscsi_t10dif, nic_hashfilter + tp_l2t = 3072 + tp_ddp = 2 + tp_ddp_iscsi = 2 + tp_stag = 2 + tp_pbl = 5 + tp_rq = 7 + + +# We have FCoE and iSCSI storage functions on PF5 and PF6 each of which may +# need to have Virtual Interfaces on each of the four ports with up to NCPUS +# "Queue Sets" each. +# +[function "5"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 64 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + nexactf = 16 # (NPORTS *(no of snmc grp + 1 hw mac) + 1 anmc grp)) rounded to 16. + cmask = all # access to all channels + pmask = all # access to all four ports ... + nserver = 16 + nhash = 2048 + tp_l2t = 1020 + protocol = iscsi_initiator_fofld + tp_ddp_iscsi = 2 + iscsi_ntask = 2048 + iscsi_nsess = 2048 + iscsi_nconn_per_session = 1 + iscsi_ninitiator_instance = 64 + + +[function "6"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 66 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + 2 (EXTRA) + nexactf = 32 # NPORTS + adding 28 exact entries for FCoE + # which is OK since < MIN(SUM PF0..3, PF4) + # and we never load PF0..3 and PF4 concurrently + cmask = all # access to all channels + pmask = all # access to all four ports ... + nhash = 2048 + tp_l2t = 4 + protocol = fcoe_initiator + tp_ddp = 2 + fcoe_nfcf = 16 + fcoe_nvnp = 32 + fcoe_nssn = 1024 + + +# The following function, 1023, is not an actual PCIE function but is used to +# configure and reserve firmware internal resources that come from the global +# resource pool. +# +[function "1023"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NVI_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nexactf = 8 # NPORTS + DCBX + + nfilter = 16 # number of filter region entries + + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x1 # access to only one port ... + + +[function "1/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x2 # access to only one port ... + + +[function "2/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x4 # access to only one port ... + + +[function "3/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x8 # access to only one port ... + + +# MPS features a 196608 bytes ingress buffer that is used for ingress buffering +# for packets from the wire as well as the loopback path of the L2 switch. The +# folling params control how the buffer memory is distributed and the L2 flow +# control settings: +# +# bg_mem: %-age of mem to use for port/buffer group +# lpbk_mem: %-age of port/bg mem to use for loopback +# hwm: high watermark; bytes available when starting to send pause +# frames (in units of 0.1 MTU) +# lwm: low watermark; bytes remaining when sending 'unpause' frame +# (in inuits of 0.1 MTU) +# dwm: minimum delta between high and low watermark (in units of 100 +# Bytes) +# +[port "0"] + dcb = ppp, dcbx # configure for DCB PPP and enable DCBX offload + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + + +[port "1"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + + +[port "2"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + + +[port "3"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + + +[fini] + version = 0x1425001c + checksum = 0xd8c8fbd6 + +# Total resources used by above allocations: +# Virtual Interfaces: 104 +# Ingress Queues/w Free Lists and Interrupts: 526 +# Egress Queues: 702 +# MPS TCAM Entries: 336 +# MSI-X Vectors: 736 +# Virtual Functions: 64 diff --git a/cxgb4/configs/t5-config-hashfilter.txt b/cxgb4/configs/t5-config-hashfilter.txt new file mode 100644 index 0000000..e106f24 --- /dev/null +++ b/cxgb4/configs/t5-config-hashfilter.txt @@ -0,0 +1,467 @@ +# Chelsio T5 HASHFILTER configuration file. +# +# Copyright (C) 2010-2017 Chelsio Communications. All rights reserved. +# +# DO NOT MODIFY THIS FILE UNDER ANY CIRCUMSTANCES. MODIFICATION OF THIS FILE +# WILL RESULT IN A NON-FUNCTIONAL ADAPTER AND MAY RESULT IN PHYSICAL DAMAGE +# TO ADAPTERS. + + +# This file provides the default, power-on configuration for 4-port T5-based +# adapters shipped from the factory. These defaults are designed to address +# the needs of the vast majority of Terminator customers. The basic idea is to +# have a default configuration which allows a customer to plug a Terminator +# adapter in and have it work regardless of OS, driver or application except in +# the most unusual and/or demanding customer applications. +# +# Many of the Terminator resources which are described by this configuration +# are finite. This requires balancing the configuration/operation needs of +# device drivers across OSes and a large number of customer application. +# +# Some of the more important resources to allocate and their constaints are: +# 1. Virtual Interfaces: 256. +# 2. Ingress Queues with Free Lists: 1024. +# 3. Egress Queues: 128K. +# 4. MSI-X Vectors: 1088. +# 5. Multi-Port Support (MPS) TCAM: 336 entries to support MAC destination +# address matching on Ingress Packets. +# +# Some of the important OS/Driver resource needs are: +# 6. Some OS Drivers will manage all resources through a single Physical +# Function (currently PF4 but it could be any Physical Function). +# 7. Some OS Drivers will manage different ports and functions (NIC, +# storage, etc.) on different Physical Functions. For example, NIC +# functions for ports 0-3 on PF0-3, FCoE on PF4, iSCSI on PF5, etc. +# +# Some of the customer application needs which need to be accommodated: +# 8. Some customers will want to support large CPU count systems with +# good scaling. Thus, we'll need to accommodate a number of +# Ingress Queues and MSI-X Vectors to allow up to some number of CPUs +# to be involved per port and per application function. For example, +# in the case where all ports and application functions will be +# managed via a single Unified PF and we want to accommodate scaling up +# to 8 CPUs, we would want: +# +# 4 ports * +# 3 application functions (NIC, FCoE, iSCSI) per port * +# 8 Ingress Queue/MSI-X Vectors per application function +# +# for a total of 96 Ingress Queues and MSI-X Vectors on the Unified PF. +# (Plus a few for Firmware Event Queues, etc.) +# +# 9. Some customers will want to use PCI-E SR-IOV Capability to allow Virtual +# Machines to directly access T6 functionality via SR-IOV Virtual Functions +# and "PCI Device Passthrough" -- this is especially true for the NIC +# application functionality. +# + + +# Global configuration settings. +# +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # PL_TIMEOUT register + pl_timeout_value = 10000 # the timeout value in units of us + + # The following Scatter Gather Engine (SGE) settings assume a 4KB Host + # Page Size and a 64B L1 Cache Line Size. It programs the + # EgrStatusPageSize and IngPadBoundary to 64B and the PktShift to 2. + # If a Master PF Driver finds itself on a machine with different + # parameters, then the Master PF Driver is responsible for initializing + # these parameters to appropriate values. + # + # Notes: + # 1. The Free List Buffer Sizes below are raw and the firmware will + # round them up to the Ingress Padding Boundary. + # 2. The SGE Timer Values below are expressed below in microseconds. + # The firmware will convert these values to Core Clock Ticks when + # it processes the configuration parameters. + # + reg[0x1008] = 0x40810/0x21c70 # SGE_CONTROL + reg[0x100c] = 0x22222222 # SGE_HOST_PAGE_SIZE + reg[0x10a0] = 0x01040810 # SGE_INGRESS_RX_THRESHOLD + reg[0x1044] = 4096 # SGE_FL_BUFFER_SIZE0 + reg[0x1048] = 65536 # SGE_FL_BUFFER_SIZE1 + reg[0x104c] = 1536 # SGE_FL_BUFFER_SIZE2 + reg[0x1050] = 9024 # SGE_FL_BUFFER_SIZE3 + reg[0x1054] = 9216 # SGE_FL_BUFFER_SIZE4 + reg[0x1058] = 2048 # SGE_FL_BUFFER_SIZE5 + reg[0x105c] = 128 # SGE_FL_BUFFER_SIZE6 + reg[0x1060] = 8192 # SGE_FL_BUFFER_SIZE7 + reg[0x1064] = 16384 # SGE_FL_BUFFER_SIZE8 + reg[0x10a4] = 0x00280000/0x3ffc0000 # SGE_DBFIFO_STATUS + reg[0x1118] = 0x00002800/0x00003c00 # SGE_DBFIFO_STATUS2 + reg[0x10a8] = 0x402000/0x402000 # SGE_DOORBELL_CONTROL + + # SGE_THROTTLE_CONTROL + bar2throttlecount = 500 # bar2throttlecount in us + + sge_timer_value = 5, 10, 20, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + + + reg[0x1124] = 0x00000400/0x00000400 # SGE_CONTROL2, enable VFIFO; if + # SGE_VFIFO_SIZE is not set, then + # firmware will set it up in function + # of number of egress queues used + + reg[0x1130] = 0x00d5ffeb # SGE_DBP_FETCH_THRESHOLD, fetch + # threshold set to queue depth + # minus 128-entries for FL and HP + # queues, and 0xfff for LP which + # prompts the firmware to set it up + # in function of egress queues + # used + + reg[0x113c] = 0x0002ffc0 # SGE_VFIFO_SIZE, set to 0x2ffc0 which + # prompts the firmware to set it up in + # function of number of egress queues + # used + + # enable TP_OUT_CONFIG.IPIDSPLITMODE + reg[0x7d04] = 0x00010000/0x00010000 + + # disable TP_PARA_REG3.RxFragEn + reg[0x7d6c] = 0x00000000/0x00007000 + + # enable TP_PARA_REG6.EnableCSnd + reg[0x7d78] = 0x00000400/0x00000000 + + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT + + # TP_VLAN_PRI_MAP to select filter tuples and enable ServerSram + # filter control: compact, fcoemask + # server sram : srvrsram + # filter tuples : fragmentation, mpshittype, macmatch, ethertype, + # protocol, tos, vlan, vnic_id, port, fcoe + # valid filterModes are described the Terminator 5 Data Book + filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe + + # filter tuples enforced in LE active region (equal to or subset of filterMode) + filterMask = port, protocol + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP RX payload + tp_pmrx = 20 + + # TP RX payload page size + tp_pmrx_pagesize = 16K + + # TP number of RX channels + tp_nrxch = 0 # 0 (auto) = 1 + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP TX payload + tp_pmtx = 40 + + # TP TX payload page size + tp_pmtx_pagesize = 64K + + # TP number of TX channels + tp_ntxch = 0 # 0 (auto) = equal number of ports + + # TP OFLD MTUs + tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + + # TP_GLOBAL_CONFIG + reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable + + # TP_PC_CONFIG + reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError + + # TP_PC_CONFIG2 + reg[0x7d4c] = 0x00010000/0x00010000 # set DisableNewPshFlag + + # TP_PARA_REG0 + reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + + # TP_PARA_REG3 + reg[0x7d6c] = 0x28000000/0x28000000 # set EnableTnlCngHdr + # set RxMacCheck (Note: + # Only for hash filter, + # no tcp offload) + + # TP_PIO_ADDR:TP_RX_LPBK + reg[tp_pio:0x28] = 0x00208208/0x00ffffff # set commit limits to 8 + + # MC configuration + mc_mode_brc[0] = 0 # mc0 - 1: enable BRC, 0: enable RBC + mc_mode_brc[1] = 0 # mc1 - 1: enable BRC, 0: enable RBC + + # ULP_TX_CONFIG + reg[0x8dc0] = 0x00000004/0x00000004 # Enable more error msg for ... + # TPT error. + +# Some "definitions" to make the rest of this a bit more readable. We support +# 4 ports, 3 functions (NIC, FCoE and iSCSI), scaling up to 8 "CPU Queue Sets" +# per function per port ... +# +# NMSIX = 1088 # available MSI-X Vectors +# NVI = 128 # available Virtual Interfaces +# NMPSTCAM = 336 # MPS TCAM entries +# +# NPORTS = 4 # ports +# NCPUS = 8 # CPUs we want to support scalably +# NFUNCS = 3 # functions per port (NIC, FCoE, iSCSI) + +# Breakdown of Virtual Interface/Queue/Interrupt resources for the "Unified +# PF" which many OS Drivers will use to manage most or all functions. +# +# Each Ingress Queue can use one MSI-X interrupt but some Ingress Queues can +# use Forwarded Interrupt Ingress Queues. For these latter, an Ingress Queue +# would be created and the Queue ID of a Forwarded Interrupt Ingress Queue +# will be specified as the "Ingress Queue Asynchronous Destination Index." +# Thus, the number of MSI-X Vectors assigned to the Unified PF will be less +# than or equal to the number of Ingress Queues ... +# +# NVI_NIC = 4 # NIC access to NPORTS +# NFLIQ_NIC = 32 # NIC Ingress Queues with Free Lists +# NETHCTRL_NIC = 32 # NIC Ethernet Control/TX Queues +# NEQ_NIC = 64 # NIC Egress Queues (FL, ETHCTRL/TX) +# NMPSTCAM_NIC = 16 # NIC MPS TCAM Entries (NPORTS*4) +# NMSIX_NIC = 32 # NIC MSI-X Interrupt Vectors (FLIQ) +# +# NVI_OFLD = 0 # Offload uses NIC function to access ports +# NFLIQ_OFLD = 16 # Offload Ingress Queues with Free Lists +# NETHCTRL_OFLD = 0 # Offload Ethernet Control/TX Queues +# NEQ_OFLD = 16 # Offload Egress Queues (FL) +# NMPSTCAM_OFLD = 0 # Offload MPS TCAM Entries (uses NIC's) +# NMSIX_OFLD = 16 # Offload MSI-X Interrupt Vectors (FLIQ) +# +# NVI_RDMA = 0 # RDMA uses NIC function to access ports +# NFLIQ_RDMA = 4 # RDMA Ingress Queues with Free Lists +# NETHCTRL_RDMA = 0 # RDMA Ethernet Control/TX Queues +# NEQ_RDMA = 4 # RDMA Egress Queues (FL) +# NMPSTCAM_RDMA = 0 # RDMA MPS TCAM Entries (uses NIC's) +# NMSIX_RDMA = 4 # RDMA MSI-X Interrupt Vectors (FLIQ) +# +# NEQ_WD = 128 # Wire Direct TX Queues and FLs +# NETHCTRL_WD = 64 # Wire Direct TX Queues +# NFLIQ_WD = 64 ` # Wire Direct Ingress Queues with Free Lists +# +# NVI_ISCSI = 4 # ISCSI access to NPORTS +# NFLIQ_ISCSI = 4 # ISCSI Ingress Queues with Free Lists +# NETHCTRL_ISCSI = 0 # ISCSI Ethernet Control/TX Queues +# NEQ_ISCSI = 4 # ISCSI Egress Queues (FL) +# NMPSTCAM_ISCSI = 4 # ISCSI MPS TCAM Entries (NPORTS) +# NMSIX_ISCSI = 4 # ISCSI MSI-X Interrupt Vectors (FLIQ) +# +# NVI_FCOE = 4 # FCOE access to NPORTS +# NFLIQ_FCOE = 34 # FCOE Ingress Queues with Free Lists +# NETHCTRL_FCOE = 32 # FCOE Ethernet Control/TX Queues +# NEQ_FCOE = 66 # FCOE Egress Queues (FL) +# NMPSTCAM_FCOE = 32 # FCOE MPS TCAM Entries (NPORTS) +# NMSIX_FCOE = 34 # FCOE MSI-X Interrupt Vectors (FLIQ) + +# Two extra Ingress Queues per function for Firmware Events and Forwarded +# Interrupts, and two extra interrupts per function for Firmware Events (or a +# Forwarded Interrupt Queue) and General Interrupts per function. +# +# NFLIQ_EXTRA = 6 # "extra" Ingress Queues 2*NFUNCS (Firmware and +# # Forwarded Interrupts +# NMSIX_EXTRA = 6 # extra interrupts 2*NFUNCS (Firmware and +# # General Interrupts + +# Microsoft HyperV resources. The HyperV Virtual Ingress Queues will have +# their interrupts forwarded to another set of Forwarded Interrupt Queues. +# +# NVI_HYPERV = 16 # VMs we want to support +# NVIIQ_HYPERV = 2 # Virtual Ingress Queues with Free Lists per VM +# NFLIQ_HYPERV = 40 # VIQs + NCPUS Forwarded Interrupt Queues +# NEQ_HYPERV = 32 # VIQs Free Lists +# NMPSTCAM_HYPERV = 16 # MPS TCAM Entries (NVI_HYPERV) +# NMSIX_HYPERV = 8 # NCPUS Forwarded Interrupt Queues + +# Adding all of the above Unified PF resource needs together: (NIC + OFLD + +# RDMA + ISCSI + FCOE + EXTRA + HYPERV) +# +# NVI_UNIFIED = 28 +# NFLIQ_UNIFIED = 106 +# NETHCTRL_UNIFIED = 32 +# NEQ_UNIFIED = 124 +# NMPSTCAM_UNIFIED = 40 +# +# The sum of all the MSI-X resources above is 74 MSI-X Vectors but we'll round +# that up to 128 to make sure the Unified PF doesn't run out of resources. +# +# NMSIX_UNIFIED = 128 +# +# The Storage PFs could need up to NPORTS*NCPUS + NMSIX_EXTRA MSI-X Vectors +# which is 34 but they're probably safe with 32. +# +# NMSIX_STORAGE = 32 + +# Note: The UnifiedPF is PF4 which doesn't have any Virtual Functions +# associated with it. Thus, the MSI-X Vector allocations we give to the +# UnifiedPF aren't inherited by any Virtual Functions. As a result we can +# provision many more Virtual Functions than we can if the UnifiedPF were +# one of PF0-3. +# + +# All of the below PCI-E parameters are actually stored in various *_init.txt +# files. We include them below essentially as comments. +# +# For PF0-3 we assign 8 vectors each for NIC Ingress Queues of the associated +# ports 0-3. +# +# For PF4, the Unified PF, we give it an MSI-X Table Size as outlined above. +# +# For PF5-6 we assign enough MSI-X Vectors to support FCoE and iSCSI +# storage applications across all four possible ports. +# +# Additionally, since the UnifiedPF isn't one of the per-port Physical +# Functions, we give the UnifiedPF and the PF0-3 Physical Functions +# different PCI Device IDs which will allow Unified and Per-Port Drivers +# to directly select the type of Physical Function to which they wish to be +# attached. +# +# Note that the actual values used for the PCI-E Intelectual Property will be +# 1 less than those below since that's the way it "counts" things. For +# readability, we use the number we actually mean ... +# +# PF0_INT = 8 # NCPUS +# PF1_INT = 8 # NCPUS +# PF2_INT = 8 # NCPUS +# PF3_INT = 8 # NCPUS +# PF0_3_INT = 32 # PF0_INT + PF1_INT + PF2_INT + PF3_INT +# +# PF4_INT = 128 # NMSIX_UNIFIED +# PF5_INT = 32 # NMSIX_STORAGE +# PF6_INT = 32 # NMSIX_STORAGE +# PF7_INT = 0 # Nothing Assigned +# PF4_7_INT = 192 # PF4_INT + PF5_INT + PF6_INT + PF7_INT +# +# PF0_7_INT = 224 # PF0_3_INT + PF4_7_INT +# +# With the above we can get 17 VFs/PF0-3 (limited by 336 MPS TCAM entries) +# but we'll lower that to 16 to make our total 64 and a nice power of 2 ... +# +# NVF = 16 + + +# Some OS Drivers manage all application functions for all ports via PF4. +# Thus we need to provide a large number of resources here. For Egress +# Queues we need to account for both TX Queues as well as Free List Queues +# (because the host is responsible for producing Free List Buffers for the +# hardware to consume). +# +[function "4"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 8 # NVI_UNIFIED + rssnvi = 8 + niqflint = 320 # NFLIQ_UNIFIED + NLFIQ_WD + nethctrl = 320 # NETHCTRL_UNIFIED + NETHCTRL_WD + neq = 640 # NEQ_UNIFIED + NEQ_WD + nexactf = 40 # NMPSTCAM_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nroute = 32 # number of routing region entries + nclip = 32 # number of clip region entries + nfilter = 496 # number of filter region entries + nhash = 524288 # number of hash region entries + protocol = nic_hashfilter + tp_l2t = 4096 + + + +# The following function, 1023, is not an actual PCIE function but is used to +# configure and reserve firmware internal resources that come from the global +# resource pool. +# +[function "1023"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NVI_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nexactf = 8 # NPORTS + DCBX + + nfilter = 16 # number of filter region entries + + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +[function "1/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +[function "2/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +[function "3/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +# MPS features a 196608 bytes ingress buffer that is used for ingress buffering +# for packets from the wire as well as the loopback path of the L2 switch. The +# folling params control how the buffer memory is distributed and the L2 flow +# control settings: +# +# bg_mem: %-age of mem to use for port/buffer group +# lpbk_mem: %-age of port/bg mem to use for loopback +# hwm: high watermark; bytes available when starting to send pause +# frames (in units of 0.1 MTU) +# lwm: low watermark; bytes remaining when sending 'unpause' frame +# (in inuits of 0.1 MTU) +# dwm: minimum delta between high and low watermark (in units of 100 +# Bytes) +# +[port "0"] + dcb = 0 # configure for DCB PPP and enable DCBX offload + bg_mem = 25 + lpbk_mem = 0 + hwm = 30 + lwm = 15 + dwm = 30 + + +[port "1"] + dcb = 0 + bg_mem = 25 + lpbk_mem = 0 + hwm = 30 + lwm = 15 + dwm = 30 + + +[port "2"] + dcb = 0 + bg_mem = 25 + lpbk_mem = 0 + hwm = 30 + lwm = 15 + dwm = 30 + + +[port "3"] + dcb = 0 + bg_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[fini] + version = 0x0a000025 + checksum = 0x94be1820 + +# Total resources used by above allocations: +# Virtual Interfaces: 104 +# Ingress Queues/w Free Lists and Interrupts: 526 +# Egress Queues: 702 +# MPS TCAM Entries: 336 +# MSI-X Vectors: 736 +# Virtual Functions: 64 diff --git a/cxgb4/configs/t6-config-default.txt b/cxgb4/configs/t6-config-default.txt new file mode 100644 index 0000000..b0145ab --- /dev/null +++ b/cxgb4/configs/t6-config-default.txt @@ -0,0 +1,603 @@ +# Chelsio T6 Factory Default configuration file. +# +# Copyright (C) 2014-2015 Chelsio Communications. All rights reserved. +# +# DO NOT MODIFY THIS FILE UNDER ANY CIRCUMSTANCES. MODIFICATION OF THIS FILE +# WILL RESULT IN A NON-FUNCTIONAL ADAPTER AND MAY RESULT IN PHYSICAL DAMAGE +# TO ADAPTERS. + + +# This file provides the default, power-on configuration for 2-port T6-based +# adapters shipped from the factory. These defaults are designed to address +# the needs of the vast majority of Terminator customers. The basic idea is to +# have a default configuration which allows a customer to plug a Terminator +# adapter in and have it work regardless of OS, driver or application except in +# the most unusual and/or demanding customer applications. +# +# Many of the Terminator resources which are described by this configuration +# are finite. This requires balancing the configuration/operation needs of +# device drivers across OSes and a large number of customer application. +# +# Some of the more important resources to allocate and their constaints are: +# 1. Virtual Interfaces: 256. +# 2. Ingress Queues with Free Lists: 1024. +# 3. Egress Queues: 128K. +# 4. MSI-X Vectors: 1088. +# 5. Multi-Port Support (MPS) TCAM: 336 entries to support MAC destination +# address matching on Ingress Packets. +# +# Some of the important OS/Driver resource needs are: +# 6. Some OS Drivers will manage all resources through a single Physical +# Function (currently PF4 but it could be any Physical Function). +# 7. Some OS Drivers will manage different ports and functions (NIC, +# storage, etc.) on different Physical Functions. For example, NIC +# functions for ports 0-1 on PF0-1, FCoE on PF4, iSCSI on PF5, etc. +# +# Some of the customer application needs which need to be accommodated: +# 8. Some customers will want to support large CPU count systems with +# good scaling. Thus, we'll need to accommodate a number of +# Ingress Queues and MSI-X Vectors to allow up to some number of CPUs +# to be involved per port and per application function. For example, +# in the case where all ports and application functions will be +# managed via a single Unified PF and we want to accommodate scaling up +# to 8 CPUs, we would want: +# +# 2 ports * +# 3 application functions (NIC, FCoE, iSCSI) per port * +# 16 Ingress Queue/MSI-X Vectors per application function +# +# for a total of 96 Ingress Queues and MSI-X Vectors on the Unified PF. +# (Plus a few for Firmware Event Queues, etc.) +# +# 9. Some customers will want to use PCI-E SR-IOV Capability to allow Virtual +# Machines to directly access T6 functionality via SR-IOV Virtual Functions +# and "PCI Device Passthrough" -- this is especially true for the NIC +# application functionality. +# + + +# Global configuration settings. +# +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # PL_TIMEOUT register + pl_timeout_value = 200 # the timeout value in units of us + + # The following Scatter Gather Engine (SGE) settings assume a 4KB Host + # Page Size and a 64B L1 Cache Line Size. It programs the + # EgrStatusPageSize and IngPadBoundary to 64B and the PktShift to 2. + # If a Master PF Driver finds itself on a machine with different + # parameters, then the Master PF Driver is responsible for initializing + # these parameters to appropriate values. + # + # Notes: + # 1. The Free List Buffer Sizes below are raw and the firmware will + # round them up to the Ingress Padding Boundary. + # 2. The SGE Timer Values below are expressed below in microseconds. + # The firmware will convert these values to Core Clock Ticks when + # it processes the configuration parameters. + # + reg[0x1008] = 0x40800/0x21c70 # SGE_CONTROL + reg[0x100c] = 0x22222222 # SGE_HOST_PAGE_SIZE + reg[0x10a0] = 0x01040810 # SGE_INGRESS_RX_THRESHOLD + reg[0x1044] = 4096 # SGE_FL_BUFFER_SIZE0 + reg[0x1048] = 65536 # SGE_FL_BUFFER_SIZE1 + reg[0x104c] = 1536 # SGE_FL_BUFFER_SIZE2 + reg[0x1050] = 9024 # SGE_FL_BUFFER_SIZE3 + reg[0x1054] = 9216 # SGE_FL_BUFFER_SIZE4 + reg[0x1058] = 2048 # SGE_FL_BUFFER_SIZE5 + reg[0x105c] = 128 # SGE_FL_BUFFER_SIZE6 + reg[0x1060] = 8192 # SGE_FL_BUFFER_SIZE7 + reg[0x1064] = 16384 # SGE_FL_BUFFER_SIZE8 + + sge_timer_value = 5, 10, 20, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + reg[0x10c4] = 0x20000000/0x20000000 # GK_CONTROL, enable 5th thread + + # Set the SGE Doorbell Queue Timer "tick" to 50us and initialize + # the Timer Table to a default set of values (which are multiples + # of the Timer Tick). Note that the set of Tick Multipliers are + # NOT sorted. The Host Drivers are expected to pick amongst them + # for (Tick * Multiplier[i]) values which most closely match the Host + # Drivers' needs. Also, most Host Drivers will be default start + # start with (Tick * Multiplier[0]), so this gives us some flexibility + # in terms of picking a Tick and a default Multiplier somewhere in + # the middle of the achievable set of (Tick * Multiplier[i]) values. + # Thus, the below select for 150us by this default. + # + sge_dbq_timertick = 50 + sge_dbq_timer = 3, 2, 1, 5, 7, 9, 12, 16 + + # enable TP_OUT_CONFIG.IPIDSPLITMODE + # Set TP_OUT_CONFIG.CCplAckMode to get srtt/rttvar + reg[0x7d04] = 0x00012000/0x00012000 + + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT + + #Tick granularities in kbps + tsch_ticks = 100000, 10000, 1000, 10 + + # TP_VLAN_PRI_MAP to select filter tuples and enable ServerSram + # filter control: compact, fcoemask + # server sram : srvrsram + # filter tuples : fragmentation, mpshittype, macmatch, ethertype, + # protocol, tos, vlan, vnic_id, port, fcoe + # valid filterModes are described the Terminator 5 Data Book + # vnicMode = pf_vf #default. Other values are outer_vlan, encapsulation + filterMode = fcoemask, srvrsram, fragmentation, mpshittype, protocol, vlan, port, fcoe + + # filter tuples enforced in LE active region (equal to or subset of filterMode) + filterMask = protocol, fcoe + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP RX payload + tp_pmrx = 30 + + # TP RX payload page size + tp_pmrx_pagesize = 64K + + # TP number of RX channels + tp_nrxch = 0 # 0 (auto) = 1 + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP TX payload + tp_pmtx = 50 + + # TP TX payload page size + tp_pmtx_pagesize = 64K + + # TP number of TX channels + tp_ntxch = 0 # 0 (auto) = equal number of ports + + # TP OFLD MTUs + tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + + # enable TP_OUT_CONFIG.IPIDSPLITMODE and CRXPKTENC + reg[0x7d04] = 0x00010008/0x00010008 + + # TP_GLOBAL_CONFIG + reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable + + # TP_PC_CONFIG + reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError + + # TP_PARA_REG0 + reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + + # ULPRX iSCSI Page Sizes + reg[0x19168] = 0x04020100 # 64K, 16K, 8K and 4K + + # LE_DB_CONFIG + reg[0x19c04] = 0x00000000/0x00440000 # LE Server SRAM disabled + # LE IPv4 compression disabled + # LE_DB_HASH_CONFIG + reg[0x19c28] = 0x00800000/0x01f00000 # LE Hash bucket size 8, + + # ULP_TX_CONFIG + reg[0x8dc0] = 0x00000104/0x00000104 # Enable ITT on PI err + # Enable more error msg for ... + # TPT error. + + # ULP_RX_MISC_FEATURE_ENABLE + #reg[0x1925c] = 0x01003400/0x01003400 # iscsi tag pi bit + # Enable offset decrement after ... + # PI extraction and before DDP + # ulp insert pi source info in DIF + # iscsi_eff_offset_en + + #Enable iscsi completion moderation feature + reg[0x1925c] = 0x000041c0/0x000031c0 # Enable offset decrement after + # PI extraction and before DDP. + # ulp insert pi source info in + # DIF. + # Enable iscsi hdr cmd mode. + # iscsi force cmd mode. + # Enable iscsi cmp mode. + # MC configuration + #mc_mode_brc[0] = 1 # mc0 - 1: enable BRC, 0: enable RBC, 2: enable BRBC + + # HMA configuration + hma_size = 92 # Size (in MBs) of host memory expected + hma_regions = stag,pbl,rq # What all regions to place in host memory + + #enable bottleneck-bw congestion control mode + #ofld_flags = 4 + +# Some "definitions" to make the rest of this a bit more readable. We support +# 4 ports, 3 functions (NIC, FCoE and iSCSI), scaling up to 8 "CPU Queue Sets" +# per function per port ... +# +# NMSIX = 1088 # available MSI-X Vectors +# NVI = 256 # available Virtual Interfaces +# NMPSTCAM = 336 # MPS TCAM entries +# +# NPORTS = 2 # ports +# NCPUS = 16 # CPUs we want to support scalably +# NFUNCS = 3 # functions per port (NIC, FCoE, iSCSI) + +# Breakdown of Virtual Interface/Queue/Interrupt resources for the "Unified +# PF" which many OS Drivers will use to manage most or all functions. +# +# Each Ingress Queue can use one MSI-X interrupt but some Ingress Queues can +# use Forwarded Interrupt Ingress Queues. For these latter, an Ingress Queue +# would be created and the Queue ID of a Forwarded Interrupt Ingress Queue +# will be specified as the "Ingress Queue Asynchronous Destination Index." +# Thus, the number of MSI-X Vectors assigned to the Unified PF will be less +# than or equal to the number of Ingress Queues ... +# +# NVI_NIC = 4 # NIC access to NPORTS +# NFLIQ_NIC = 32 # NIC Ingress Queues with Free Lists +# NETHCTRL_NIC = 32 # NIC Ethernet Control/TX Queues +# NEQ_NIC = 64 # NIC Egress Queues (FL, ETHCTRL/TX) +# NMPSTCAM_NIC = 16 # NIC MPS TCAM Entries (NPORTS*4) +# NMSIX_NIC = 32 # NIC MSI-X Interrupt Vectors (FLIQ) +# +# NVI_OFLD = 0 # Offload uses NIC function to access ports +# NFLIQ_OFLD = 16 # Offload Ingress Queues with Free Lists +# NETHCTRL_OFLD = 0 # Offload Ethernet Control/TX Queues +# NEQ_OFLD = 16 # Offload Egress Queues (FL) +# NMPSTCAM_OFLD = 0 # Offload MPS TCAM Entries (uses NIC's) +# NMSIX_OFLD = 16 # Offload MSI-X Interrupt Vectors (FLIQ) +# +# NVI_RDMA = 0 # RDMA uses NIC function to access ports +# NFLIQ_RDMA = 4 # RDMA Ingress Queues with Free Lists +# NETHCTRL_RDMA = 0 # RDMA Ethernet Control/TX Queues +# NEQ_RDMA = 4 # RDMA Egress Queues (FL) +# NMPSTCAM_RDMA = 0 # RDMA MPS TCAM Entries (uses NIC's) +# NMSIX_RDMA = 4 # RDMA MSI-X Interrupt Vectors (FLIQ) +# +# NEQ_WD = 128 # Wire Direct TX Queues and FLs +# NETHCTRL_WD = 64 # Wire Direct TX Queues +# NFLIQ_WD = 64 ` # Wire Direct Ingress Queues with Free Lists +# +# NVI_ISCSI = 4 # ISCSI access to NPORTS +# NFLIQ_ISCSI = 4 # ISCSI Ingress Queues with Free Lists +# NETHCTRL_ISCSI = 0 # ISCSI Ethernet Control/TX Queues +# NEQ_ISCSI = 4 # ISCSI Egress Queues (FL) +# NMPSTCAM_ISCSI = 4 # ISCSI MPS TCAM Entries (NPORTS) +# NMSIX_ISCSI = 4 # ISCSI MSI-X Interrupt Vectors (FLIQ) +# +# NVI_FCOE = 4 # FCOE access to NPORTS +# NFLIQ_FCOE = 34 # FCOE Ingress Queues with Free Lists +# NETHCTRL_FCOE = 32 # FCOE Ethernet Control/TX Queues +# NEQ_FCOE = 66 # FCOE Egress Queues (FL) +# NMPSTCAM_FCOE = 32 # FCOE MPS TCAM Entries (NPORTS) +# NMSIX_FCOE = 34 # FCOE MSI-X Interrupt Vectors (FLIQ) + +# Two extra Ingress Queues per function for Firmware Events and Forwarded +# Interrupts, and two extra interrupts per function for Firmware Events (or a +# Forwarded Interrupt Queue) and General Interrupts per function. +# +# NFLIQ_EXTRA = 6 # "extra" Ingress Queues 2*NFUNCS (Firmware and +# # Forwarded Interrupts +# NMSIX_EXTRA = 6 # extra interrupts 2*NFUNCS (Firmware and +# # General Interrupts + +# Microsoft HyperV resources. The HyperV Virtual Ingress Queues will have +# their interrupts forwarded to another set of Forwarded Interrupt Queues. +# +# NVI_HYPERV = 16 # VMs we want to support +# NVIIQ_HYPERV = 2 # Virtual Ingress Queues with Free Lists per VM +# NFLIQ_HYPERV = 40 # VIQs + NCPUS Forwarded Interrupt Queues +# NEQ_HYPERV = 32 # VIQs Free Lists +# NMPSTCAM_HYPERV = 16 # MPS TCAM Entries (NVI_HYPERV) +# NMSIX_HYPERV = 8 # NCPUS Forwarded Interrupt Queues + +# Adding all of the above Unified PF resource needs together: (NIC + OFLD + +# RDMA + ISCSI + FCOE + EXTRA + HYPERV) +# +# NVI_UNIFIED = 28 +# NFLIQ_UNIFIED = 106 +# NETHCTRL_UNIFIED = 32 +# NEQ_UNIFIED = 124 +# NMPSTCAM_UNIFIED = 40 +# +# The sum of all the MSI-X resources above is 74 MSI-X Vectors but we'll round +# that up to 128 to make sure the Unified PF doesn't run out of resources. +# +# NMSIX_UNIFIED = 128 +# +# The Storage PFs could need up to NPORTS*NCPUS + NMSIX_EXTRA MSI-X Vectors +# which is 34 but they're probably safe with 32. +# +# NMSIX_STORAGE = 32 + +# Note: The UnifiedPF is PF4 which doesn't have any Virtual Functions +# associated with it. Thus, the MSI-X Vector allocations we give to the +# UnifiedPF aren't inherited by any Virtual Functions. As a result we can +# provision many more Virtual Functions than we can if the UnifiedPF were +# one of PF0-3. +# + +# All of the below PCI-E parameters are actually stored in various *_init.txt +# files. We include them below essentially as comments. +# +# For PF0-3 we assign 8 vectors each for NIC Ingress Queues of the associated +# ports 0-3. +# +# For PF4, the Unified PF, we give it an MSI-X Table Size as outlined above. +# +# For PF5-6 we assign enough MSI-X Vectors to support FCoE and iSCSI +# storage applications across all four possible ports. +# +# Additionally, since the UnifiedPF isn't one of the per-port Physical +# Functions, we give the UnifiedPF and the PF0-3 Physical Functions +# different PCI Device IDs which will allow Unified and Per-Port Drivers +# to directly select the type of Physical Function to which they wish to be +# attached. +# +# Note that the actual values used for the PCI-E Intelectual Property will be +# 1 less than those below since that's the way it "counts" things. For +# readability, we use the number we actually mean ... +# +# PF0_INT = 8 # NCPUS +# PF1_INT = 8 # NCPUS +# PF0_3_INT = 32 # PF0_INT + PF1_INT + PF2_INT + PF3_INT +# +# PF4_INT = 128 # NMSIX_UNIFIED +# PF5_INT = 32 # NMSIX_STORAGE +# PF6_INT = 32 # NMSIX_STORAGE +# PF7_INT = 0 # Nothing Assigned +# PF4_7_INT = 192 # PF4_INT + PF5_INT + PF6_INT + PF7_INT +# +# PF0_7_INT = 224 # PF0_3_INT + PF4_7_INT +# +# With the above we can get 17 VFs/PF0-3 (limited by 336 MPS TCAM entries) +# but we'll lower that to 16 to make our total 64 and a nice power of 2 ... +# +# NVF = 16 + + +# For those OSes which manage different ports on different PFs, we need +# only enough resources to support a single port's NIC application functions +# on PF0-3. The below assumes that we're only doing NIC with NCPUS "Queue +# Sets" for ports 0-3. The FCoE and iSCSI functions for such OSes will be +# managed on the "storage PFs" (see below). +# +[function "0"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x1 # access to only one port + + +[function "1"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x2 # access to only one port + +[function "2"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x4 # access to only one port + +[function "3"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x8 # access to only one port + + +# Some OS Drivers manage all application functions for all ports via PF4. +# Thus we need to provide a large number of resources here. For Egress +# Queues we need to account for both TX Queues as well as Free List Queues +# (because the host is responsible for producing Free List Buffers for the +# hardware to consume). +# +[function "4"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 28 # NVI_UNIFIED + niqflint = 218 # NFLIQ_UNIFIED + NLFIQ_WD + NFLIQ_CRYPTO (32) + nethctrl = 116 # NETHCTRL_UNIFIED + NETHCTRL_WD + ncrypto_lookaside + neq = 256 # NEQ_UNIFIED + NEQ_WD + nqpcq = 12288 + nexactf = 40 # NMPSTCAM_UNIFIED + nrawf = 2 + cmask = all # access to all channels + pmask = all # access to all four ports ... + nethofld = 1024 # number of user mode ethernet flow contexts + ncrypto_lookaside = 16 # Number of lookaside flow contexts + nclip = 320 # number of clip region entries + nfilter = 496 # number of filter region entries + nserver = 496 # number of server region entries + nhash = 12288 # number of hash region entries + nhpfilter = 64 # number of high priority filter region entries + protocol = nic_vm, ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu, iscsi_t10dif, tlskeys, crypto_lookaside, ipsec_inline, nic_hashfilter, nic_ktls_ofld + tp_l2t = 3072 + tp_ddp = 2 + tp_ddp_iscsi = 2 + tp_tls_key = 2 + tp_tls_mxrxsize = 17408 # 16384 + 1024, governs max rx data, pm max xfer len, rx coalesce sizes + tp_stag = 2 + tp_pbl = 7 + tp_rq = 7 + tp_srq = 128 + +# We have FCoE and iSCSI storage functions on PF5 and PF6 each of which may +# need to have Virtual Interfaces on each of the four ports with up to NCPUS +# "Queue Sets" each. +# +[function "5"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 64 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + nexactf = 16 # (NPORTS *(no of snmc grp + 1 hw mac) + 1 anmc grp)) rounded to 16. + cmask = all # access to all channels + pmask = all # access to all four ports ... + nserver = 16 + nhash = 2048 + tp_l2t = 1020 + nclip = 64 + protocol = iscsi_initiator_fofld + tp_ddp_iscsi = 2 + iscsi_ntask = 2048 + iscsi_nsess = 2048 + iscsi_nconn_per_session = 1 + iscsi_ninitiator_instance = 64 + + +[function "6"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 66 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + 2 (EXTRA) + nexactf = 32 # NPORTS + adding 28 exact entries for FCoE + # which is OK since < MIN(SUM PF0..3, PF4) + # and we never load PF0..3 and PF4 concurrently + cmask = all # access to all channels + pmask = all # access to all four ports ... + nhash = 2048 + tp_l2t = 4 + protocol = fcoe_initiator + tp_ddp = 1 + fcoe_nfcf = 16 + fcoe_nvnp = 32 + fcoe_nssn = 1024 + + +# The following function, 1023, is not an actual PCIE function but is used to +# configure and reserve firmware internal resources that come from the global +# resource pool. +# +[function "1023"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NVI_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nexactf = 8 # NPORTS + DCBX + + nfilter = 16 # number of filter region entries + + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x1 # access to only one port ... + + +[function "1/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x2 # access to only one port ... + +[function "2/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x1 # access to only one port ... + + +[function "3/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 6 # 2 "Queue Sets" + NXIQ + nethctrl = 4 # 2 "Queue Sets" + neq = 8 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x2 # access to only one port ... + +# MPS features a 196608 bytes ingress buffer that is used for ingress buffering +# for packets from the wire as well as the loopback path of the L2 switch. The +# folling params control how the buffer memory is distributed and the L2 flow +# control settings: +# +# bg_mem: %-age of mem to use for port/buffer group +# lpbk_mem: %-age of port/bg mem to use for loopback +# hwm: high watermark; bytes available when starting to send pause +# frames (in units of 0.1 MTU) +# lwm: low watermark; bytes remaining when sending 'unpause' frame +# (in inuits of 0.1 MTU) +# dwm: minimum delta between high and low watermark (in units of 100 +# Bytes) +# +[port "0"] + dcb = ppp, dcbx # configure for DCB PPP and enable DCBX offload + #bg_mem = 25 + #lpbk_mem = 25 + hwm = 60 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + +[port "1"] + dcb = ppp, dcbx + #bg_mem = 25 + #lpbk_mem = 25 + hwm = 60 + lwm = 15 + dwm = 30 + dcb_app_tlv[0] = 0x8906, ethertype, 3 + dcb_app_tlv[1] = 0x8914, ethertype, 3 + dcb_app_tlv[2] = 3260, socketnum, 5 + +[fini] + version = 0x1425001d + checksum = 0xa1403d73 + +# Total resources used by above allocations: +# Virtual Interfaces: 104 +# Ingress Queues/w Free Lists and Interrupts: 526 +# Egress Queues: 702 +# MPS TCAM Entries: 336 +# MSI-X Vectors: 736 +# Virtual Functions: 64 diff --git a/cxgb4/configs/t6-config-hashfilter.txt b/cxgb4/configs/t6-config-hashfilter.txt new file mode 100644 index 0000000..f8c7821 --- /dev/null +++ b/cxgb4/configs/t6-config-hashfilter.txt @@ -0,0 +1,430 @@ +# Chelsio T6 HASHFILTER configuration file. +# +# Copyright (C) 2014-2017 Chelsio Communications. All rights reserved. +# +# DO NOT MODIFY THIS FILE UNDER ANY CIRCUMSTANCES. MODIFICATION OF THIS FILE +# WILL RESULT IN A NON-FUNCTIONAL ADAPTER AND MAY RESULT IN PHYSICAL DAMAGE +# TO ADAPTERS. + + +# This file provides the default, power-on configuration for 2-port T6-based +# adapters shipped from the factory. These defaults are designed to address +# the needs of the vast majority of Terminator customers. The basic idea is to +# have a default configuration which allows a customer to plug a Terminator +# adapter in and have it work regardless of OS, driver or application except in +# the most unusual and/or demanding customer applications. +# +# Many of the Terminator resources which are described by this configuration +# are finite. This requires balancing the configuration/operation needs of +# device drivers across OSes and a large number of customer application. +# +# Some of the more important resources to allocate and their constaints are: +# 1. Virtual Interfaces: 256. +# 2. Ingress Queues with Free Lists: 1024. +# 3. Egress Queues: 128K. +# 4. MSI-X Vectors: 1088. +# 5. Multi-Port Support (MPS) TCAM: 336 entries to support MAC destination +# address matching on Ingress Packets. +# +# Some of the important OS/Driver resource needs are: +# 6. Some OS Drivers will manage all resources through a single Physical +# Function (currently PF4 but it could be any Physical Function). +# 7. Some OS Drivers will manage different ports and functions (NIC, +# storage, etc.) on different Physical Functions. For example, NIC +# functions for ports 0-1 on PF0-1, FCoE on PF4, iSCSI on PF5, etc. +# +# Some of the customer application needs which need to be accommodated: +# 8. Some customers will want to support large CPU count systems with +# good scaling. Thus, we'll need to accommodate a number of +# Ingress Queues and MSI-X Vectors to allow up to some number of CPUs +# to be involved per port and per application function. For example, +# in the case where all ports and application functions will be +# managed via a single Unified PF and we want to accommodate scaling up +# to 8 CPUs, we would want: +# +# 2 ports * +# 3 application functions (NIC, FCoE, iSCSI) per port * +# 16 Ingress Queue/MSI-X Vectors per application function +# +# for a total of 96 Ingress Queues and MSI-X Vectors on the Unified PF. +# (Plus a few for Firmware Event Queues, etc.) +# +# 9. Some customers will want to use PCI-E SR-IOV Capability to allow Virtual +# Machines to directly access T6 functionality via SR-IOV Virtual Functions +# and "PCI Device Passthrough" -- this is especially true for the NIC +# application functionality. +# + + +# Global configuration settings. +# +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # PL_TIMEOUT register + pl_timeout_value = 200 # the timeout value in units of us + + # The following Scatter Gather Engine (SGE) settings assume a 4KB Host + # Page Size and a 64B L1 Cache Line Size. It programs the + # EgrStatusPageSize and IngPadBoundary to 64B and the PktShift to 2. + # If a Master PF Driver finds itself on a machine with different + # parameters, then the Master PF Driver is responsible for initializing + # these parameters to appropriate values. + # + # Notes: + # 1. The Free List Buffer Sizes below are raw and the firmware will + # round them up to the Ingress Padding Boundary. + # 2. The SGE Timer Values below are expressed below in microseconds. + # The firmware will convert these values to Core Clock Ticks when + # it processes the configuration parameters. + # + reg[0x1008] = 0x40800/0x21c70 # SGE_CONTROL + reg[0x100c] = 0x22222222 # SGE_HOST_PAGE_SIZE + reg[0x10a0] = 0x01040810 # SGE_INGRESS_RX_THRESHOLD + reg[0x1044] = 4096 # SGE_FL_BUFFER_SIZE0 + reg[0x1048] = 65536 # SGE_FL_BUFFER_SIZE1 + reg[0x104c] = 1536 # SGE_FL_BUFFER_SIZE2 + reg[0x1050] = 9024 # SGE_FL_BUFFER_SIZE3 + reg[0x1054] = 9216 # SGE_FL_BUFFER_SIZE4 + reg[0x1058] = 2048 # SGE_FL_BUFFER_SIZE5 + reg[0x105c] = 128 # SGE_FL_BUFFER_SIZE6 + reg[0x1060] = 8192 # SGE_FL_BUFFER_SIZE7 + reg[0x1064] = 16384 # SGE_FL_BUFFER_SIZE8 + + sge_timer_value = 5, 10, 20, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + reg[0x10c4] = 0x20000000/0x20000000 # GK_CONTROL, enable 5th thread + + # Set the SGE Doorbell Queue Timer "tick" to 5us and initialize + # the Timer Table to a default set of values (which are multiples + # + sge_dbq_timertick = 5 + sge_dbq_timer = 1, 2, 3, 5, 7, 9, 12, 16 + + # enable TP_OUT_CONFIG.IPIDSPLITMODE + reg[0x7d04] = 0x00010000/0x00010000 + + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT + + #Tick granularities in kbps + tsch_ticks = 100000, 10000, 1000, 10 + + # TP_VLAN_PRI_MAP to select filter tuples and enable ServerSram + # filter control: compact, fcoemask + # server sram : srvrsram + # filter tuples : fragmentation, mpshittype, macmatch, ethertype, + # protocol, tos, vlan, vnic_id, port, fcoe + # valid filterModes are described the Terminator 5 Data Book + # vnicMode = pf_vf #default. Other values are outer_vlan, encapsulation + filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe + + # filter tuples enforced in LE active region (equal to or subset of filterMode) + filterMask = port, protocol + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP RX payload + tp_pmrx = 20 + + # TP RX payload page size + tp_pmrx_pagesize = 16K + + # TP number of RX channels + tp_nrxch = 0 # 0 (auto) = 1 + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP TX payload + tp_pmtx = 40 + + # TP TX payload page size + tp_pmtx_pagesize = 64K + + # TP number of TX channels + tp_ntxch = 0 # 0 (auto) = equal number of ports + + # TP OFLD MTUs + tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + + # enable TP_OUT_CONFIG.IPIDSPLITMODE and CRXPKTENC + reg[0x7d04] = 0x00010008/0x00010008 + + # TP_GLOBAL_CONFIG + reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable + + # TP_PC_CONFIG + reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError + + # TP_PC_CONFIG2 + reg[0x7d4c] = 0x00010000/0x00010000 # set DisableNewPshFlag + + # TP_PARA_REG0 + reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + + # TP_PARA_REG3 + reg[0x7d6c] = 0x28000000/0x28000000 # set EnableTnlCngHdr + # set RxMacCheck (Note: + # Only for hash filter, + # no tcp offload) + + # LE_DB_CONFIG + reg[0x19c04] = 0x00000000/0x02040000 # LE IPv4 compression disabled + # EXTN_HASH_IPV4 Diable + + #LE_DB_RSP_CODE_0 + reg[0x19c74] = 0x00000004/0x0000000f # TCAM_ACTV_HIT = 4 + + #LE_DB_RSP_CODE_1 + reg[0x19c78] = 0x08000000/0x0e000000 # HASH_ACTV_HIT = 4 + + # LE_DB_HASH_CONFIG + reg[0x19c28] = 0x00800000/0x01f00000 # LE Hash bucket size 8, + + # MC configuration + mc_mode_brc[0] = 0 # mc0 - 1: enable BRC, 0: enable RBC, 2: enable BRBC + +# Some "definitions" to make the rest of this a bit more readable. We support +# 4 ports, 3 functions (NIC, FCoE and iSCSI), scaling up to 8 "CPU Queue Sets" +# per function per port ... +# +# NMSIX = 1088 # available MSI-X Vectors +# NVI = 256 # available Virtual Interfaces +# NMPSTCAM = 336 # MPS TCAM entries +# +# NPORTS = 2 # ports +# NCPUS = 16 # CPUs we want to support scalably +# NFUNCS = 3 # functions per port (NIC, FCoE, iSCSI) + +# Breakdown of Virtual Interface/Queue/Interrupt resources for the "Unified +# PF" which many OS Drivers will use to manage most or all functions. +# +# Each Ingress Queue can use one MSI-X interrupt but some Ingress Queues can +# use Forwarded Interrupt Ingress Queues. For these latter, an Ingress Queue +# would be created and the Queue ID of a Forwarded Interrupt Ingress Queue +# will be specified as the "Ingress Queue Asynchronous Destination Index." +# Thus, the number of MSI-X Vectors assigned to the Unified PF will be less +# than or equal to the number of Ingress Queues ... +# +# NVI_NIC = 4 # NIC access to NPORTS +# NFLIQ_NIC = 32 # NIC Ingress Queues with Free Lists +# NETHCTRL_NIC = 32 # NIC Ethernet Control/TX Queues +# NEQ_NIC = 64 # NIC Egress Queues (FL, ETHCTRL/TX) +# NMPSTCAM_NIC = 16 # NIC MPS TCAM Entries (NPORTS*4) +# NMSIX_NIC = 32 # NIC MSI-X Interrupt Vectors (FLIQ) +# +# NVI_OFLD = 0 # Offload uses NIC function to access ports +# NFLIQ_OFLD = 16 # Offload Ingress Queues with Free Lists +# NETHCTRL_OFLD = 0 # Offload Ethernet Control/TX Queues +# NEQ_OFLD = 16 # Offload Egress Queues (FL) +# NMPSTCAM_OFLD = 0 # Offload MPS TCAM Entries (uses NIC's) +# NMSIX_OFLD = 16 # Offload MSI-X Interrupt Vectors (FLIQ) +# +# NVI_RDMA = 0 # RDMA uses NIC function to access ports +# NFLIQ_RDMA = 4 # RDMA Ingress Queues with Free Lists +# NETHCTRL_RDMA = 0 # RDMA Ethernet Control/TX Queues +# NEQ_RDMA = 4 # RDMA Egress Queues (FL) +# NMPSTCAM_RDMA = 0 # RDMA MPS TCAM Entries (uses NIC's) +# NMSIX_RDMA = 4 # RDMA MSI-X Interrupt Vectors (FLIQ) +# +# NEQ_WD = 128 # Wire Direct TX Queues and FLs +# NETHCTRL_WD = 64 # Wire Direct TX Queues +# NFLIQ_WD = 64 ` # Wire Direct Ingress Queues with Free Lists +# +# NVI_ISCSI = 4 # ISCSI access to NPORTS +# NFLIQ_ISCSI = 4 # ISCSI Ingress Queues with Free Lists +# NETHCTRL_ISCSI = 0 # ISCSI Ethernet Control/TX Queues +# NEQ_ISCSI = 4 # ISCSI Egress Queues (FL) +# NMPSTCAM_ISCSI = 4 # ISCSI MPS TCAM Entries (NPORTS) +# NMSIX_ISCSI = 4 # ISCSI MSI-X Interrupt Vectors (FLIQ) +# +# NVI_FCOE = 4 # FCOE access to NPORTS +# NFLIQ_FCOE = 34 # FCOE Ingress Queues with Free Lists +# NETHCTRL_FCOE = 32 # FCOE Ethernet Control/TX Queues +# NEQ_FCOE = 66 # FCOE Egress Queues (FL) +# NMPSTCAM_FCOE = 32 # FCOE MPS TCAM Entries (NPORTS) +# NMSIX_FCOE = 34 # FCOE MSI-X Interrupt Vectors (FLIQ) + +# Two extra Ingress Queues per function for Firmware Events and Forwarded +# Interrupts, and two extra interrupts per function for Firmware Events (or a +# Forwarded Interrupt Queue) and General Interrupts per function. +# +# NFLIQ_EXTRA = 6 # "extra" Ingress Queues 2*NFUNCS (Firmware and +# # Forwarded Interrupts +# NMSIX_EXTRA = 6 # extra interrupts 2*NFUNCS (Firmware and +# # General Interrupts + +# Microsoft HyperV resources. The HyperV Virtual Ingress Queues will have +# their interrupts forwarded to another set of Forwarded Interrupt Queues. +# +# NVI_HYPERV = 16 # VMs we want to support +# NVIIQ_HYPERV = 2 # Virtual Ingress Queues with Free Lists per VM +# NFLIQ_HYPERV = 40 # VIQs + NCPUS Forwarded Interrupt Queues +# NEQ_HYPERV = 32 # VIQs Free Lists +# NMPSTCAM_HYPERV = 16 # MPS TCAM Entries (NVI_HYPERV) +# NMSIX_HYPERV = 8 # NCPUS Forwarded Interrupt Queues + +# Adding all of the above Unified PF resource needs together: (NIC + OFLD + +# RDMA + ISCSI + FCOE + EXTRA + HYPERV) +# +# NVI_UNIFIED = 28 +# NFLIQ_UNIFIED = 106 +# NETHCTRL_UNIFIED = 32 +# NEQ_UNIFIED = 124 +# NMPSTCAM_UNIFIED = 40 +# +# The sum of all the MSI-X resources above is 74 MSI-X Vectors but we'll round +# that up to 128 to make sure the Unified PF doesn't run out of resources. +# +# NMSIX_UNIFIED = 128 +# +# The Storage PFs could need up to NPORTS*NCPUS + NMSIX_EXTRA MSI-X Vectors +# which is 34 but they're probably safe with 32. +# +# NMSIX_STORAGE = 32 + +# Note: The UnifiedPF is PF4 which doesn't have any Virtual Functions +# associated with it. Thus, the MSI-X Vector allocations we give to the +# UnifiedPF aren't inherited by any Virtual Functions. As a result we can +# provision many more Virtual Functions than we can if the UnifiedPF were +# one of PF0-3. +# + +# All of the below PCI-E parameters are actually stored in various *_init.txt +# files. We include them below essentially as comments. +# +# For PF0-3 we assign 8 vectors each for NIC Ingress Queues of the associated +# ports 0-3. +# +# For PF4, the Unified PF, we give it an MSI-X Table Size as outlined above. +# +# For PF5-6 we assign enough MSI-X Vectors to support FCoE and iSCSI +# storage applications across all four possible ports. +# +# Additionally, since the UnifiedPF isn't one of the per-port Physical +# Functions, we give the UnifiedPF and the PF0-3 Physical Functions +# different PCI Device IDs which will allow Unified and Per-Port Drivers +# to directly select the type of Physical Function to which they wish to be +# attached. +# +# Note that the actual values used for the PCI-E Intelectual Property will be +# 1 less than those below since that's the way it "counts" things. For +# readability, we use the number we actually mean ... +# +# PF0_INT = 8 # NCPUS +# PF1_INT = 8 # NCPUS +# PF0_3_INT = 32 # PF0_INT + PF1_INT + PF2_INT + PF3_INT +# +# PF4_INT = 128 # NMSIX_UNIFIED +# PF5_INT = 32 # NMSIX_STORAGE +# PF6_INT = 32 # NMSIX_STORAGE +# PF7_INT = 0 # Nothing Assigned +# PF4_7_INT = 192 # PF4_INT + PF5_INT + PF6_INT + PF7_INT +# +# PF0_7_INT = 224 # PF0_3_INT + PF4_7_INT +# +# With the above we can get 17 VFs/PF0-3 (limited by 336 MPS TCAM entries) +# but we'll lower that to 16 to make our total 64 and a nice power of 2 ... +# +# NVF = 16 + + +# Some OS Drivers manage all application functions for all ports via PF4. +# Thus we need to provide a large number of resources here. For Egress +# Queues we need to account for both TX Queues as well as Free List Queues +# (because the host is responsible for producing Free List Buffers for the +# hardware to consume). +# +[function "4"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 8 # NVI_UNIFIED + rssnvi = 8 + niqflint = 320 # NFLIQ_UNIFIED + NLFIQ_WD + nethctrl = 320 # NETHCTRL_UNIFIED + NETHCTRL_WD + neq = 640 # NEQ_UNIFIED + NEQ_WD + nexactf = 40 # NMPSTCAM_UNIFIED + nrawf = 2 + cmask = all # access to all channels + pmask = all # access to all four ports ... + nclip = 384 # number of clip region entries + nfilter = 496 # number of filter region entries + nhash = 524288 # number of hash region entries + nhpfilter = 64 # number of high priority filter region entries + protocol = nic_hashfilter + tp_l2t = 4096 + +# The following function, 1023, is not an actual PCIE function but is used to +# configure and reserve firmware internal resources that come from the global +# resource pool. +# +[function "1023"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NVI_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nexactf = 8 # NPORTS + DCBX + + nfilter = 16 # number of filter region entries + + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +[function "1/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +[function "2/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +[function "3/*"] # NVF + nvi = 1 # 1 port + rssnvi = 0 + + +# MPS features a 196608 bytes ingress buffer that is used for ingress buffering +# for packets from the wire as well as the loopback path of the L2 switch. The +# folling params control how the buffer memory is distributed and the L2 flow +# control settings: +# +# bg_mem: %-age of mem to use for port/buffer group +# lpbk_mem: %-age of port/bg mem to use for loopback +# hwm: high watermark; bytes available when starting to send pause +# frames (in units of 0.1 MTU) +# lwm: low watermark; bytes remaining when sending 'unpause' frame +# (in inuits of 0.1 MTU) +# dwm: minimum delta between high and low watermark (in units of 100 +# Bytes) +# +[port "0"] + dcb = 0 # configure for DCB PPP and enable DCBX offload + hwm = 60 + lwm = 15 + dwm = 30 + +[port "1"] + dcb = 0 + hwm = 60 + lwm = 15 + dwm = 30 + +[fini] + version = 0x0a000025 + checksum = 0x1c3a42cf + +# Total resources used by above allocations: +# Virtual Interfaces: 104 +# Ingress Queues/w Free Lists and Interrupts: 526 +# Egress Queues: 702 +# MPS TCAM Entries: 336 +# MSI-X Vectors: 736 +# Virtual Functions: 64 -- cgit v1.2.3