diff options
Diffstat (limited to 'debian/patches/features/all')
39 files changed, 4819 insertions, 0 deletions
diff --git a/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch b/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch new file mode 100644 index 000000000..f2278ce63 --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch @@ -0,0 +1,130 @@ +From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com> +Date: Tue, 13 Mar 2018 18:38:02 +0800 +Subject: [PATCH 3/4] MODSIGN: checking the blacklisted hash before loading a + kernel module +Origin: https://lore.kernel.org/patchwork/patch/933175/ + +This patch adds the logic for checking the kernel module's hash +base on blacklist. The hash must be generated by sha256 and enrolled +to dbx/mokx. + +For example: + sha256sum sample.ko + mokutil --mokx --import-hash $HASH_RESULT + +Whether the signature on ko file is stripped or not, the hash can be +compared by kernel. + +Cc: David Howells <dhowells@redhat.com> +Cc: Josh Boyer <jwboyer@fedoraproject.org> +Cc: James Bottomley <James.Bottomley@HansenPartnership.com> +Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> +[Rebased by Luca Boccassi] +[bwh: Forward-ported to 5.19: + - The type parameter to is_hash_blacklisted() is now an enumeration + rather than a string + - Adjust filename, context] +--- + kernel/module/signing.c | 59 +++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 57 insertions(+), 2 deletions(-) + +--- a/kernel/module/signing.c ++++ b/kernel/module/signing.c +@@ -13,6 +13,8 @@ + #include <linux/verification.h> + #include <linux/security.h> + #include <crypto/public_key.h> ++#include <crypto/hash.h> ++#include <keys/system_keyring.h> + #include <uapi/linux/module.h> + #include "internal.h" + +@@ -37,13 +39,60 @@ + sig_enforce = true; + } + ++static int mod_is_hash_blacklisted(const void *mod, size_t verifylen) ++{ ++ struct crypto_shash *tfm; ++ struct shash_desc *desc; ++ size_t digest_size, desc_size; ++ u8 *digest; ++ int ret; ++ ++ tfm = crypto_alloc_shash("sha256", 0, 0); ++ if (IS_ERR(tfm)) { ++ ret = PTR_ERR(tfm); ++ goto error_return; ++ } ++ ++ desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); ++ digest_size = crypto_shash_digestsize(tfm); ++ digest = kzalloc(digest_size + desc_size, GFP_KERNEL); ++ if (!digest) { ++ pr_err("digest memory buffer allocate fail\n"); ++ ret = -ENOMEM; ++ goto error_digest; ++ } ++ desc = (void *)digest + digest_size; ++ desc->tfm = tfm; ++ ret = crypto_shash_init(desc); ++ if (ret < 0) ++ goto error_shash; ++ ++ ret = crypto_shash_finup(desc, mod, verifylen, digest); ++ if (ret < 0) ++ goto error_shash; ++ ++ pr_debug("%ld digest: %*phN\n", verifylen, (int) digest_size, digest); ++ ++ ret = is_hash_blacklisted(digest, digest_size, BLACKLIST_HASH_BINARY); ++ if (ret == -EKEYREJECTED) ++ pr_err("Module hash %*phN is blacklisted\n", ++ (int) digest_size, digest); ++ ++error_shash: ++ kfree(digest); ++error_digest: ++ crypto_free_shash(tfm); ++error_return: ++ return ret; ++} ++ + /* + * Verify the signature on a module. + */ + int mod_verify_sig(const void *mod, struct load_info *info) + { + struct module_signature ms; +- size_t sig_len, modlen = info->len; ++ size_t sig_len, modlen = info->len, wholelen; + int ret; + + pr_devel("==>%s(,%zu)\n", __func__, modlen); +@@ -51,6 +100,7 @@ + if (modlen <= sizeof(ms)) + return -EBADMSG; + ++ wholelen = modlen + sizeof(MODULE_SIG_STRING) - 1; + memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); + + ret = mod_check_sig(&ms, modlen, "module"); +@@ -61,10 +111,17 @@ + modlen -= sig_len + sizeof(ms); + info->len = modlen; + +- return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, ++ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); ++ pr_devel("verify_pkcs7_signature() = %d\n", ret); ++ ++ /* checking hash of module is in blacklist */ ++ if (!ret) ++ ret = mod_is_hash_blacklisted(mod, wholelen); ++ ++ return ret; + } + + int module_sig_check(struct load_info *info, int flags) diff --git a/debian/patches/features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signature.patch b/debian/patches/features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signature.patch new file mode 100644 index 000000000..e46aefe4d --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signature.patch @@ -0,0 +1,33 @@ +From: Robert Holmes <robeholmes@gmail.com> +Date: Tue, 23 Apr 2019 07:39:29 +0000 +Subject: [PATCH] KEYS: Make use of platform keyring for module signature + verify +Bug-Debian: https://bugs.debian.org/935945 +Bug-Debian: https://bugs.debian.org/1030200 +Origin: https://src.fedoraproject.org/rpms/kernel/raw/master/f/KEYS-Make-use-of-platform-keyring-for-module-signature.patch +Forwarded: https://lore.kernel.org/linux-modules/qvgp2il2co4iyxkzxvcs4p2bpyilqsbfgcprtpfrsajwae2etc@3z2s2o52i3xg/t/#u + +This allows a cert in DB to be used to sign modules, +in addition to certs in the MoK and built-in keyrings. + +Signed-off-by: Robert Holmes <robeholmes@gmail.com> +Signed-off-by: Jeremy Cline <jcline@redhat.com> +[bwh: Forward-ported to 5.19: adjust filename] +[наб: reinstate for 6.1, re-write description] +--- +--- a/kernel/module/signing.c ++++ b/kernel/module/signing.c +@@ -116,6 +116,13 @@ int mod_verify_sig(const void *mod, stru + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + pr_devel("verify_pkcs7_signature() = %d\n", ret); ++ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { ++ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, ++ VERIFY_USE_PLATFORM_KEYRING, ++ VERIFYING_MODULE_SIGNATURE, ++ NULL, NULL); ++ pr_devel("verify_pkcs7_signature() = %d\n", ret); ++ } + + /* checking hash of module is in blacklist */ + if (!ret) diff --git a/debian/patches/features/all/db-mok-keyring/trust-machine-keyring-by-default.patch b/debian/patches/features/all/db-mok-keyring/trust-machine-keyring-by-default.patch new file mode 100644 index 000000000..7c32a018f --- /dev/null +++ b/debian/patches/features/all/db-mok-keyring/trust-machine-keyring-by-default.patch @@ -0,0 +1,16 @@ +Author: Luca Boccassi <bluca@debian.org> +Description: trust machine keyring (MoK) by default + Debian always trusted keys in MoK by default. Upstream made it conditional on + a new EFI variable being set. To keep backward compatibility skip this check. +--- a/security/integrity/platform_certs/machine_keyring.c ++++ b/security/integrity/platform_certs/machine_keyring.c +@@ -69,8 +69,7 @@ + if (!initialized) { + initialized = true; + +- if (uefi_check_trust_mok_keys()) +- trust_mok = true; ++ trust_mok = true; + } + + return trust_mok; diff --git a/debian/patches/features/all/drivers-media-dvb-usb-af9005-request_firmware.patch b/debian/patches/features/all/drivers-media-dvb-usb-af9005-request_firmware.patch new file mode 100644 index 000000000..a24ba17ef --- /dev/null +++ b/debian/patches/features/all/drivers-media-dvb-usb-af9005-request_firmware.patch @@ -0,0 +1,149 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Mon, 24 Aug 2009 23:19:58 +0100 +Subject: af9005: Use request_firmware() to load register init script +Forwarded: no + +Read the register init script from the Windows driver. This is sick +but should avoid the potential copyright infringement in distributing +a version of the script which is directly derived from the driver. +--- + drivers/media/dvb/dvb-usb/Kconfig | 2 +- + drivers/media/dvb/dvb-usb/af9005-fe.c | 66 ++++++++++++++++++++++++++------ + 2 files changed, 54 insertions(+), 14 deletions(-) + +Index: debian-kernel/drivers/media/usb/dvb-usb/Kconfig +=================================================================== +--- debian-kernel.orig/drivers/media/usb/dvb-usb/Kconfig ++++ debian-kernel/drivers/media/usb/dvb-usb/Kconfig +@@ -260,10 +260,10 @@ config DVB_USB_OPERA1 + + config DVB_USB_AF9005 + tristate "Afatech AF9005 DVB-T USB1.1 support" +- depends on BROKEN + depends on DVB_USB + select MEDIA_TUNER_MT2060 if MEDIA_SUBDRV_AUTOSELECT + select MEDIA_TUNER_QT1010 if MEDIA_SUBDRV_AUTOSELECT ++ select FW_LOADER + help + Say Y here to support the Afatech AF9005 based DVB-T USB1.1 receiver + and the TerraTec Cinergy T USB XE (Rev.1) +Index: debian-kernel/drivers/media/usb/dvb-usb/af9005-fe.c +=================================================================== +--- debian-kernel.orig/drivers/media/usb/dvb-usb/af9005-fe.c ++++ debian-kernel/drivers/media/usb/dvb-usb/af9005-fe.c +@@ -9,10 +9,26 @@ + * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information + */ + #include "af9005.h" +-#include "af9005-script.h" + #include "mt2060.h" + #include "qt1010.h" + #include <asm/div64.h> ++#include <linux/firmware.h> ++ ++/* Register initialisation script to be extracted from the Windows driver */ ++ ++typedef struct { ++ __le16 reg; ++ u8 pos; ++ u8 len; ++ u8 val; ++ u8 pad; ++} __packed RegDesc; ++ ++#define WIN_DRV_NAME "AF05BDA.sys" ++#define WIN_DRV_VERSION "6.3.2.1" ++#define WIN_DRV_SIZE 133504 ++#define WIN_DRV_SCRIPT_OFFSET 88316 ++#define WIN_DRV_SCRIPT_SIZE 1110 + + struct af9005_fe_state { + struct dvb_usb_device *d; +@@ -804,6 +820,8 @@ static int af9005_fe_init(struct dvb_fro + { + struct af9005_fe_state *state = fe->demodulator_priv; + struct dvb_usb_adapter *adap = fe->dvb->priv; ++ const struct firmware *fw; ++ const RegDesc *script; + int ret, i, scriptlen; + u8 temp, temp0 = 0, temp1 = 0, temp2 = 0; + u8 buf[2]; +@@ -956,37 +974,55 @@ static int af9005_fe_init(struct dvb_fro + if ((ret = af9005_write_ofdm_register(state->d, 0xaefb, 0x01))) + return ret; + +- /* load init script */ +- deb_info("load init script\n"); +- scriptlen = sizeof(script) / sizeof(RegDesc); ++ /* load and validate init script */ ++ deb_info("load init script from Windows driver\n"); ++ ret = request_firmware(&fw, WIN_DRV_NAME, &state->d->udev->dev); ++ if (ret) ++ return ret; ++ BUILD_BUG_ON(sizeof(RegDesc) != 6); ++ if (fw->size != WIN_DRV_SIZE || ++ memcmp(fw->data + WIN_DRV_SCRIPT_OFFSET, ++ "\x80\xa1\x00\x08\x0a\x00", 6) || ++ memcmp(fw->data + WIN_DRV_SCRIPT_OFFSET + WIN_DRV_SCRIPT_SIZE - 6, ++ "\x49\xa3\x00\x06\x02\x00", 6)) { ++ err("%s is invalid - should be version %s, size %u bytes\n", ++ WIN_DRV_NAME, WIN_DRV_VERSION, WIN_DRV_SIZE); ++ ret = -EINVAL; ++ goto fail_release; ++ } ++ ++ script = (const RegDesc *)(fw->data + WIN_DRV_SCRIPT_OFFSET); ++ scriptlen = WIN_DRV_SCRIPT_SIZE / sizeof(RegDesc); + for (i = 0; i < scriptlen; i++) { ++ u16 reg = le16_to_cpu(script[i].reg); + if ((ret = +- af9005_write_register_bits(state->d, script[i].reg, ++ af9005_write_register_bits(state->d, reg, + script[i].pos, + script[i].len, script[i].val))) +- return ret; ++ goto fail_release; + /* save 3 bytes of original fcw */ +- if (script[i].reg == 0xae18) ++ if (reg == 0xae18) + temp2 = script[i].val; +- if (script[i].reg == 0xae19) ++ if (reg == 0xae19) + temp1 = script[i].val; +- if (script[i].reg == 0xae1a) ++ if (reg == 0xae1a) + temp0 = script[i].val; + + /* save original unplug threshold */ +- if (script[i].reg == xd_p_reg_unplug_th) ++ if (reg == xd_p_reg_unplug_th) + state->original_if_unplug_th = script[i].val; +- if (script[i].reg == xd_p_reg_unplug_rf_gain_th) ++ if (reg == xd_p_reg_unplug_rf_gain_th) + state->original_rf_unplug_th = script[i].val; +- if (script[i].reg == xd_p_reg_unplug_dtop_if_gain_th) ++ if (reg == xd_p_reg_unplug_dtop_if_gain_th) + state->original_dtop_if_unplug_th = script[i].val; +- if (script[i].reg == xd_p_reg_unplug_dtop_rf_gain_th) ++ if (reg == xd_p_reg_unplug_dtop_rf_gain_th) + state->original_dtop_rf_unplug_th = script[i].val; + + } + state->original_fcw = + ((u32) temp2 << 16) + ((u32) temp1 << 8) + (u32) temp0; + ++ release_firmware(fw); + + /* save original TOPs */ + deb_info("save original TOPs\n"); +@@ -1066,6 +1102,10 @@ static int af9005_fe_init(struct dvb_fro + + deb_info("profit!\n"); + return 0; ++ ++fail_release: ++ release_firmware(fw); ++ return ret; + } + + static int af9005_fe_sleep(struct dvb_frontend *fe) diff --git a/debian/patches/features/all/ethernet-microsoft/0001-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-d.patch b/debian/patches/features/all/ethernet-microsoft/0001-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-d.patch new file mode 100644 index 000000000..f17692d6c --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0001-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-d.patch @@ -0,0 +1,89 @@ +From 7c765fdf86fc9089b75d15ee33cdf30d9d9115d6 Mon Sep 17 00:00:00 2001 +From: Bastian Blank <waldi@debian.org> +Date: Wed, 12 Jul 2023 14:13:14 +0000 +Subject: [PATCH 01/23] net: Remove the obsolte u64_stats_fetch_*_irq() users + (drivers). + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +(cherry picked from commit 068c38ad88ccb09e5e966d4db5cedab0e02b3b95) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++++---- + drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 27a0f3af8aab..aec4bab6be56 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -315,10 +315,10 @@ static void mana_get_stats64(struct net_device *ndev, + rx_stats = &apc->rxqs[q]->stats; + + do { +- start = u64_stats_fetch_begin_irq(&rx_stats->syncp); ++ start = u64_stats_fetch_begin(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + st->rx_packets += packets; + st->rx_bytes += bytes; +@@ -328,10 +328,10 @@ static void mana_get_stats64(struct net_device *ndev, + tx_stats = &apc->tx_qp[q].txq.stats; + + do { +- start = u64_stats_fetch_begin_irq(&tx_stats->syncp); ++ start = u64_stats_fetch_begin(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + st->tx_packets += packets; + st->tx_bytes += bytes; +diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +index c530db76880f..96d55c91c969 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +@@ -90,13 +90,13 @@ static void mana_get_ethtool_stats(struct net_device *ndev, + rx_stats = &apc->rxqs[q]->stats; + + do { +- start = u64_stats_fetch_begin_irq(&rx_stats->syncp); ++ start = u64_stats_fetch_begin(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + xdp_drop = rx_stats->xdp_drop; + xdp_tx = rx_stats->xdp_tx; + xdp_redirect = rx_stats->xdp_redirect; +- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; +@@ -109,11 +109,11 @@ static void mana_get_ethtool_stats(struct net_device *ndev, + tx_stats = &apc->tx_qp[q].txq.stats; + + do { +- start = u64_stats_fetch_begin_irq(&tx_stats->syncp); ++ start = u64_stats_fetch_begin(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + xdp_xmit = tx_stats->xdp_xmit; +- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0002-net-mana-Assign-interrupts-to-CPUs-based-on-NUMA-nod.patch b/debian/patches/features/all/ethernet-microsoft/0002-net-mana-Assign-interrupts-to-CPUs-based-on-NUMA-nod.patch new file mode 100644 index 000000000..6fc9d9cc7 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0002-net-mana-Assign-interrupts-to-CPUs-based-on-NUMA-nod.patch @@ -0,0 +1,114 @@ +From 8bdc2e0b4963e7cfd41b43ed117e8f0dec13fe29 Mon Sep 17 00:00:00 2001 +From: Saurabh Sengar <ssengar@linux.microsoft.com> +Date: Mon, 31 Oct 2022 23:06:01 -0700 +Subject: [PATCH 02/23] net: mana: Assign interrupts to CPUs based on NUMA + nodes + +In large VMs with multiple NUMA nodes, network performance is usually +best if network interrupts are all assigned to the same virtual NUMA +node. This patch assigns online CPU according to a numa aware policy, +local cpus are returned first, followed by non-local ones, then it wraps +around. + +Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com> +Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> +Link: https://lore.kernel.org/r/1667282761-11547-1-git-send-email-ssengar@linux.microsoft.com +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +(cherry picked from commit 71fa6887eeca7b631528f9c7a39815498de8028c) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/gdma.h | 1 + + .../net/ethernet/microsoft/mana/gdma_main.c | 30 +++++++++++++++++-- + 2 files changed, 28 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h +index 48b0ab56bdb0..68684a8bcead 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma.h ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -356,6 +356,7 @@ struct gdma_context { + void __iomem *shm_base; + void __iomem *db_page_base; + u32 db_page_size; ++ int numa_node; + + /* Shared memory chanenl (used to bootstrap HWC) */ + struct shm_channel shm_channel; +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index d674ebda2053..abe9888a40aa 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -1208,8 +1208,10 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + unsigned int max_irqs; ++ u16 *cpus; ++ cpumask_var_t req_mask; + int nvec, irq; +- int err, i, j; ++ int err, i = 0, j; + + if (max_queues_per_port > MANA_MAX_NUM_QUEUES) + max_queues_per_port = MANA_MAX_NUM_QUEUES; +@@ -1228,7 +1230,21 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + goto free_irq_vector; + } + ++ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) { ++ err = -ENOMEM; ++ goto free_irq; ++ } ++ ++ cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL); ++ if (!cpus) { ++ err = -ENOMEM; ++ goto free_mask; ++ } ++ for (i = 0; i < nvec; i++) ++ cpus[i] = cpumask_local_spread(i, gc->numa_node); ++ + for (i = 0; i < nvec; i++) { ++ cpumask_set_cpu(cpus[i], req_mask); + gic = &gc->irq_contexts[i]; + gic->handler = NULL; + gic->arg = NULL; +@@ -1243,13 +1259,17 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; +- goto free_irq; ++ goto free_mask; + } + + err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); + if (err) +- goto free_irq; ++ goto free_mask; ++ irq_set_affinity_and_hint(irq, req_mask); ++ cpumask_clear(req_mask); + } ++ free_cpumask_var(req_mask); ++ kfree(cpus); + + err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); + if (err) +@@ -1260,6 +1280,9 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + + return 0; + ++free_mask: ++ free_cpumask_var(req_mask); ++ kfree(cpus); + free_irq: + for (j = i - 1; j >= 0; j--) { + irq = pci_irq_vector(pdev, j); +@@ -1389,6 +1412,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + if (!bar0_va) + goto free_gc; + ++ gc->numa_node = dev_to_node(&pdev->dev); + gc->is_pf = mana_is_pf(pdev->device); + gc->bar0_va = bar0_va; + gc->dev = &pdev->dev; +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0003-net-mana-Add-support-for-auxiliary-device.patch b/debian/patches/features/all/ethernet-microsoft/0003-net-mana-Add-support-for-auxiliary-device.patch new file mode 100644 index 000000000..a639b69c9 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0003-net-mana-Add-support-for-auxiliary-device.patch @@ -0,0 +1,178 @@ +From 63ea159035f1c00da99016a1c71380d90f156ad4 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:19 -0700 +Subject: [PATCH 03/23] net: mana: Add support for auxiliary device + +In preparation for supporting MANA RDMA driver, add support for auxiliary +device in the Ethernet driver. The RDMA device is modeled as an auxiliary +device to the Ethernet device. + +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-2-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit a69839d4327d053b18d8e1b0e7ddeee78db78f4f) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/Kconfig | 1 + + drivers/net/ethernet/microsoft/mana/gdma.h | 2 + + .../ethernet/microsoft/mana/mana_auxiliary.h | 10 +++ + drivers/net/ethernet/microsoft/mana/mana_en.c | 83 ++++++++++++++++++- + 4 files changed, 95 insertions(+), 1 deletion(-) + create mode 100644 drivers/net/ethernet/microsoft/mana/mana_auxiliary.h + +--- a/drivers/net/ethernet/microsoft/Kconfig ++++ b/drivers/net/ethernet/microsoft/Kconfig +@@ -19,6 +19,7 @@ config MICROSOFT_MANA + tristate "Microsoft Azure Network Adapter (MANA) support" + depends on PCI_MSI && X86_64 + depends on PCI_HYPERV ++ select AUXILIARY_BUS + select PAGE_POOL + help + This driver supports Microsoft Azure Network Adapter (MANA). +--- a/drivers/net/ethernet/microsoft/mana/gdma.h ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -204,6 +204,8 @@ struct gdma_dev { + + /* GDMA driver specific pointer */ + void *driver_data; ++ ++ struct auxiliary_device *adev; + }; + + #define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/mana_auxiliary.h +@@ -0,0 +1,10 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright (c) 2022, Microsoft Corporation. */ ++ ++#include "mana.h" ++#include <linux/auxiliary_bus.h> ++ ++struct mana_adev { ++ struct auxiliary_device adev; ++ struct gdma_dev *mdev; ++}; +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -14,6 +14,19 @@ + #include <net/ip6_checksum.h> + + #include "mana.h" ++#include "mana_auxiliary.h" ++ ++static DEFINE_IDA(mana_adev_ida); ++ ++static int mana_adev_idx_alloc(void) ++{ ++ return ida_alloc(&mana_adev_ida, GFP_KERNEL); ++} ++ ++static void mana_adev_idx_free(int idx) ++{ ++ ida_free(&mana_adev_ida, idx); ++} + + /* Microsoft Azure Network Adapter (MANA) functions */ + +@@ -2145,6 +2158,69 @@ free_net: + return err; + } + ++static void adev_release(struct device *dev) ++{ ++ struct mana_adev *madev = container_of(dev, struct mana_adev, adev.dev); ++ ++ kfree(madev); ++} ++ ++static void remove_adev(struct gdma_dev *gd) ++{ ++ struct auxiliary_device *adev = gd->adev; ++ int id = adev->id; ++ ++ auxiliary_device_delete(adev); ++ auxiliary_device_uninit(adev); ++ ++ mana_adev_idx_free(id); ++ gd->adev = NULL; ++} ++ ++static int add_adev(struct gdma_dev *gd) ++{ ++ struct auxiliary_device *adev; ++ struct mana_adev *madev; ++ int ret; ++ ++ madev = kzalloc(sizeof(*madev), GFP_KERNEL); ++ if (!madev) ++ return -ENOMEM; ++ ++ adev = &madev->adev; ++ ret = mana_adev_idx_alloc(); ++ if (ret < 0) ++ goto idx_fail; ++ adev->id = ret; ++ ++ adev->name = "rdma"; ++ adev->dev.parent = gd->gdma_context->dev; ++ adev->dev.release = adev_release; ++ madev->mdev = gd; ++ ++ ret = auxiliary_device_init(adev); ++ if (ret) ++ goto init_fail; ++ ++ ret = auxiliary_device_add(adev); ++ if (ret) ++ goto add_fail; ++ ++ gd->adev = adev; ++ return 0; ++ ++add_fail: ++ auxiliary_device_uninit(adev); ++ ++init_fail: ++ mana_adev_idx_free(adev->id); ++ ++idx_fail: ++ kfree(madev); ++ ++ return ret; ++} ++ + int mana_probe(struct gdma_dev *gd, bool resuming) + { + struct gdma_context *gc = gd->gdma_context; +@@ -2212,6 +2288,8 @@ int mana_probe(struct gdma_dev *gd, bool + break; + } + } ++ ++ err = add_adev(gd); + out: + if (err) + mana_remove(gd, false); +@@ -2228,6 +2306,10 @@ void mana_remove(struct gdma_dev *gd, bo + int err; + int i; + ++ /* adev currently doesn't support suspending, always remove it */ ++ if (gd->adev) ++ remove_adev(gd); ++ + for (i = 0; i < ac->num_ports; i++) { + ndev = ac->ports[i]; + if (!ndev) { +@@ -2260,7 +2342,6 @@ void mana_remove(struct gdma_dev *gd, bo + } + + mana_destroy_eq(ac); +- + out: + mana_gd_deregister_device(gd); + diff --git a/debian/patches/features/all/ethernet-microsoft/0004-net-mana-Record-the-physical-address-for-doorbell-pa.patch b/debian/patches/features/all/ethernet-microsoft/0004-net-mana-Record-the-physical-address-for-doorbell-pa.patch new file mode 100644 index 000000000..21568f6aa --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0004-net-mana-Record-the-physical-address-for-doorbell-pa.patch @@ -0,0 +1,63 @@ +From 108ed3960c8f1e65ad64b22a4a073c4e42204132 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:20 -0700 +Subject: [PATCH 04/23] net: mana: Record the physical address for doorbell + page region + +For supporting RDMA device with multiple user contexts with their +individual doorbell pages, record the start address of doorbell page +region for use by the RDMA driver to allocate user context doorbell IDs. + +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-3-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit f3dc096246091048677c45cfc0e24ad512927b52) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/gdma.h | 2 ++ + drivers/net/ethernet/microsoft/mana/gdma_main.c | 4 ++++ + 2 files changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h +index df0ffe35db92..86d8a9e36005 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma.h ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -354,9 +354,11 @@ struct gdma_context { + u32 test_event_eq_id; + + bool is_pf; ++ phys_addr_t bar0_pa; + void __iomem *bar0_va; + void __iomem *shm_base; + void __iomem *db_page_base; ++ phys_addr_t phys_db_page_base; + u32 db_page_size; + int numa_node; + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index abe9888a40aa..a00bd88443d3 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -44,6 +44,9 @@ static void mana_gd_init_vf_regs(struct pci_dev *pdev) + gc->db_page_base = gc->bar0_va + + mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); + ++ gc->phys_db_page_base = gc->bar0_pa + ++ mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); ++ + gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + } + +@@ -1407,6 +1410,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + + mutex_init(&gc->eq_test_event_mutex); + pci_set_drvdata(pdev, gc); ++ gc->bar0_pa = pci_resource_start(pdev, 0); + + bar0_va = pci_iomap(pdev, bar, 0); + if (!bar0_va) +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0005-net-mana-Handle-vport-sharing-between-devices.patch b/debian/patches/features/all/ethernet-microsoft/0005-net-mana-Handle-vport-sharing-between-devices.patch new file mode 100644 index 000000000..f02172221 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0005-net-mana-Handle-vport-sharing-between-devices.patch @@ -0,0 +1,150 @@ +From 35238c1a74198a73e35ed386a7a3992522de804d Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:21 -0700 +Subject: [PATCH 05/23] net: mana: Handle vport sharing between devices + +For outgoing packets, the PF requires the VF to configure the vport with +corresponding protection domain and doorbell ID for the kernel or user +context. The vport can't be shared between different contexts. + +Implement the logic to exclusively take over the vport by either the +Ethernet device or RDMA device. + +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-4-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit b5c1c9855be3b5b978fde975a63df3cabc273faa) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana.h | 7 +++ + drivers/net/ethernet/microsoft/mana/mana_en.c | 53 ++++++++++++++++++- + 2 files changed, 58 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h +index d58be64374c8..2883a08dbfb5 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana.h ++++ b/drivers/net/ethernet/microsoft/mana/mana.h +@@ -380,6 +380,10 @@ struct mana_port_context { + mana_handle_t port_handle; + mana_handle_t pf_filter_handle; + ++ /* Mutex for sharing access to vport_use_count */ ++ struct mutex vport_mutex; ++ int vport_use_count; ++ + u16 port_idx; + + bool port_is_up; +@@ -631,4 +635,7 @@ struct mana_tx_package { + struct gdma_posted_wqe_info wqe_info; + }; + ++int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, ++ u32 doorbell_pg_id); ++void mana_uncfg_vport(struct mana_port_context *apc); + #endif /* _MANA_H */ +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index b9e2723ce167..c5ab5cb63cb7 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -646,13 +646,48 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, + return 0; + } + +-static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, +- u32 doorbell_pg_id) ++void mana_uncfg_vport(struct mana_port_context *apc) ++{ ++ mutex_lock(&apc->vport_mutex); ++ apc->vport_use_count--; ++ WARN_ON(apc->vport_use_count < 0); ++ mutex_unlock(&apc->vport_mutex); ++} ++EXPORT_SYMBOL_NS(mana_uncfg_vport, NET_MANA); ++ ++int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, ++ u32 doorbell_pg_id) + { + struct mana_config_vport_resp resp = {}; + struct mana_config_vport_req req = {}; + int err; + ++ /* This function is used to program the Ethernet port in the hardware ++ * table. It can be called from the Ethernet driver or the RDMA driver. ++ * ++ * For Ethernet usage, the hardware supports only one active user on a ++ * physical port. The driver checks on the port usage before programming ++ * the hardware when creating the RAW QP (RDMA driver) or exposing the ++ * device to kernel NET layer (Ethernet driver). ++ * ++ * Because the RDMA driver doesn't know in advance which QP type the ++ * user will create, it exposes the device with all its ports. The user ++ * may not be able to create RAW QP on a port if this port is already ++ * in used by the Ethernet driver from the kernel. ++ * ++ * This physical port limitation only applies to the RAW QP. For RC QP, ++ * the hardware doesn't have this limitation. The user can create RC ++ * QPs on a physical port up to the hardware limits independent of the ++ * Ethernet usage on the same port. ++ */ ++ mutex_lock(&apc->vport_mutex); ++ if (apc->vport_use_count > 0) { ++ mutex_unlock(&apc->vport_mutex); ++ return -EBUSY; ++ } ++ apc->vport_use_count++; ++ mutex_unlock(&apc->vport_mutex); ++ + mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; +@@ -679,9 +714,16 @@ static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + + apc->tx_shortform_allowed = resp.short_form_allowed; + apc->tx_vp_offset = resp.tx_vport_offset; ++ ++ netdev_info(apc->ndev, "Configured vPort %llu PD %u DB %u\n", ++ apc->port_handle, protection_dom_id, doorbell_pg_id); + out: ++ if (err) ++ mana_uncfg_vport(apc); ++ + return err; + } ++EXPORT_SYMBOL_NS(mana_cfg_vport, NET_MANA); + + static int mana_cfg_vport_steering(struct mana_port_context *apc, + enum TRI_STATE rx, +@@ -742,6 +784,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, + resp.hdr.status); + err = -EPROTO; + } ++ ++ netdev_info(ndev, "Configured steering vPort %llu entries %u\n", ++ apc->port_handle, num_entries); + out: + kfree(req); + return err; +@@ -1810,6 +1855,7 @@ static void mana_destroy_vport(struct mana_port_context *apc) + } + + mana_destroy_txq(apc); ++ mana_uncfg_vport(apc); + + if (gd->gdma_context->is_pf) + mana_pf_deregister_hw_vport(apc); +@@ -2082,6 +2128,9 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, + apc->pf_filter_handle = INVALID_MANA_HANDLE; + apc->port_idx = port_idx; + ++ mutex_init(&apc->vport_mutex); ++ apc->vport_use_count = 0; ++ + ndev->netdev_ops = &mana_devops; + ndev->ethtool_ops = &mana_ethtool_ops; + ndev->mtu = ETH_DATA_LEN; +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0006-net-mana-Set-the-DMA-device-max-segment-size.patch b/debian/patches/features/all/ethernet-microsoft/0006-net-mana-Set-the-DMA-device-max-segment-size.patch new file mode 100644 index 000000000..6ac8249d0 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0006-net-mana-Set-the-DMA-device-max-segment-size.patch @@ -0,0 +1,40 @@ +From 09b5391c16655893159fe8a58c8b7e31499ef107 Mon Sep 17 00:00:00 2001 +From: Ajay Sharma <sharmaajay@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:22 -0700 +Subject: [PATCH 06/23] net: mana: Set the DMA device max segment size + +MANA hardware doesn't have any restrictions on the DMA segment size, set it +to the max allowed value. + +Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com> +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-5-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit 6fe254160bd033a1e62dbad9b734183b31144678) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index a00bd88443d3..bc00a7d07232 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -1403,6 +1403,12 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + if (err) + goto release_region; + ++ err = dma_set_max_seg_size(&pdev->dev, UINT_MAX); ++ if (err) { ++ dev_err(&pdev->dev, "Failed to set dma device segment size\n"); ++ goto release_region; ++ } ++ + err = -ENOMEM; + gc = vzalloc(sizeof(*gc)); + if (!gc) +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0007-net-mana-Export-Work-Queue-functions-for-use-by-RDMA.patch b/debian/patches/features/all/ethernet-microsoft/0007-net-mana-Export-Work-Queue-functions-for-use-by-RDMA.patch new file mode 100644 index 000000000..c846badc8 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0007-net-mana-Export-Work-Queue-functions-for-use-by-RDMA.patch @@ -0,0 +1,100 @@ +From 8334d5a28eb711b8f464e0bd3ec0f76b0594bf95 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:23 -0700 +Subject: [PATCH 07/23] net: mana: Export Work Queue functions for use by RDMA + driver + +RDMA device may need to create Ethernet device queues for use by Queue +Pair type RAW. This allows a user-mode context accesses Ethernet hardware +queues. Export the supporting functions for use by the RDMA driver. + +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-6-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit 4c0ff7a106e16ab63e0b597557255c012f179578) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 1 + + drivers/net/ethernet/microsoft/mana/mana.h | 9 +++++++++ + drivers/net/ethernet/microsoft/mana/mana_en.c | 16 +++++++++------- + 3 files changed, 19 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index bc00a7d07232..d9be0f3044ea 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -152,6 +152,7 @@ int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + + return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); + } ++EXPORT_SYMBOL_NS(mana_gd_send_request, NET_MANA); + + int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi) +diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h +index 2883a08dbfb5..6e9e86fb4c02 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana.h ++++ b/drivers/net/ethernet/microsoft/mana/mana.h +@@ -635,6 +635,15 @@ struct mana_tx_package { + struct gdma_posted_wqe_info wqe_info; + }; + ++int mana_create_wq_obj(struct mana_port_context *apc, ++ mana_handle_t vport, ++ u32 wq_type, struct mana_obj_spec *wq_spec, ++ struct mana_obj_spec *cq_spec, ++ mana_handle_t *wq_obj); ++ ++void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, ++ mana_handle_t wq_obj); ++ + int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id); + void mana_uncfg_vport(struct mana_port_context *apc); +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index c5ab5cb63cb7..a526657f4edb 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -792,11 +792,11 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, + return err; + } + +-static int mana_create_wq_obj(struct mana_port_context *apc, +- mana_handle_t vport, +- u32 wq_type, struct mana_obj_spec *wq_spec, +- struct mana_obj_spec *cq_spec, +- mana_handle_t *wq_obj) ++int mana_create_wq_obj(struct mana_port_context *apc, ++ mana_handle_t vport, ++ u32 wq_type, struct mana_obj_spec *wq_spec, ++ struct mana_obj_spec *cq_spec, ++ mana_handle_t *wq_obj) + { + struct mana_create_wqobj_resp resp = {}; + struct mana_create_wqobj_req req = {}; +@@ -845,9 +845,10 @@ static int mana_create_wq_obj(struct mana_port_context *apc, + out: + return err; + } ++EXPORT_SYMBOL_NS(mana_create_wq_obj, NET_MANA); + +-static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, +- mana_handle_t wq_obj) ++void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, ++ mana_handle_t wq_obj) + { + struct mana_destroy_wqobj_resp resp = {}; + struct mana_destroy_wqobj_req req = {}; +@@ -872,6 +873,7 @@ static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, + resp.hdr.status); + } ++EXPORT_SYMBOL_NS(mana_destroy_wq_obj, NET_MANA); + + static void mana_destroy_eq(struct mana_context *ac) + { +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0008-net-mana-Record-port-number-in-netdev.patch b/debian/patches/features/all/ethernet-microsoft/0008-net-mana-Record-port-number-in-netdev.patch new file mode 100644 index 000000000..11a680a25 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0008-net-mana-Record-port-number-in-netdev.patch @@ -0,0 +1,34 @@ +From 026293abc2d3962ebd7d9a4bc9375fa1bddc2995 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:24 -0700 +Subject: [PATCH 08/23] net: mana: Record port number in netdev + +The port number is useful for user-mode application to identify this +net device based on port index. Set to the correct value in ndev. + +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-7-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit d44089e555ffe63a49cc6e94d0c03d933e413059) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index a526657f4edb..5b7aae9bf983 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -2139,6 +2139,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, + ndev->max_mtu = ndev->mtu; + ndev->min_mtu = ndev->mtu; + ndev->needed_headroom = MANA_HEADROOM; ++ ndev->dev_port = port_idx; + SET_NETDEV_DEV(ndev, gc->dev); + + netif_carrier_off(ndev); +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0009-net-mana-Move-header-files-to-a-common-location.patch b/debian/patches/features/all/ethernet-microsoft/0009-net-mana-Move-header-files-to-a-common-location.patch new file mode 100644 index 000000000..e83e38049 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0009-net-mana-Move-header-files-to-a-common-location.patch @@ -0,0 +1,152 @@ +From 36187c02914640d8c2e2ef9f11213842b5082671 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:25 -0700 +Subject: [PATCH 09/23] net: mana: Move header files to a common location + +In preparation to add MANA RDMA driver, move all the required header files +to a common location for use by both Ethernet and RDMA drivers. + +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-8-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit fd325cd648f15eb9a8b32a68de3bafc72bcfe753) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + MAINTAINERS | 1 + + drivers/net/ethernet/microsoft/mana/gdma_main.c | 2 +- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 4 ++-- + drivers/net/ethernet/microsoft/mana/mana_bpf.c | 2 +- + drivers/net/ethernet/microsoft/mana/mana_en.c | 4 ++-- + drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 2 +- + drivers/net/ethernet/microsoft/mana/shm_channel.c | 2 +- + {drivers/net/ethernet/microsoft => include/net}/mana/gdma.h | 0 + .../net/ethernet/microsoft => include/net}/mana/hw_channel.h | 0 + {drivers/net/ethernet/microsoft => include/net}/mana/mana.h | 0 + .../ethernet/microsoft => include/net}/mana/mana_auxiliary.h | 0 + .../net/ethernet/microsoft => include/net}/mana/shm_channel.h | 0 + 12 files changed, 9 insertions(+), 8 deletions(-) + rename {drivers/net/ethernet/microsoft => include/net}/mana/gdma.h (100%) + rename {drivers/net/ethernet/microsoft => include/net}/mana/hw_channel.h (100%) + rename {drivers/net/ethernet/microsoft => include/net}/mana/mana.h (100%) + rename {drivers/net/ethernet/microsoft => include/net}/mana/mana_auxiliary.h (100%) + rename {drivers/net/ethernet/microsoft => include/net}/mana/shm_channel.h (100%) + +diff --git a/MAINTAINERS b/MAINTAINERS +index 379387e20a96..a923933bbe82 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -9536,6 +9536,7 @@ F: include/asm-generic/hyperv-tlfs.h + F: include/asm-generic/mshyperv.h + F: include/clocksource/hyperv_timer.h + F: include/linux/hyperv.h ++F: include/net/mana + F: include/uapi/linux/hyperv.h + F: net/vmw_vsock/hyperv_transport.c + F: tools/hv/ +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index d9be0f3044ea..b114c31d70ba 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -6,7 +6,7 @@ + #include <linux/utsname.h> + #include <linux/version.h> + +-#include "mana.h" ++#include <net/mana/mana.h> + + static u32 mana_gd_r32(struct gdma_context *g, u64 offset) + { +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +index 543a5d5c304f..76829ab43d40 100644 +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -1,8 +1,8 @@ + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + /* Copyright (c) 2021, Microsoft Corporation. */ + +-#include "gdma.h" +-#include "hw_channel.h" ++#include <net/mana/gdma.h> ++#include <net/mana/hw_channel.h> + + static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) + { +diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c +index 421fd39ff3a8..3caea631229c 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c +@@ -8,7 +8,7 @@ + #include <linux/bpf_trace.h> + #include <net/xdp.h> + +-#include "mana.h" ++#include <net/mana/mana.h> + + void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev) + { +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 5b7aae9bf983..c0421c4a80d4 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -12,8 +12,8 @@ + #include <net/checksum.h> + #include <net/ip6_checksum.h> + +-#include "mana.h" +-#include "mana_auxiliary.h" ++#include <net/mana/mana.h> ++#include <net/mana/mana_auxiliary.h> + + static DEFINE_IDA(mana_adev_ida); + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +index 96d55c91c969..5b776a33a817 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +@@ -5,7 +5,7 @@ + #include <linux/etherdevice.h> + #include <linux/ethtool.h> + +-#include "mana.h" ++#include <net/mana/mana.h> + + static const struct { + char name[ETH_GSTRING_LEN]; +diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c +index da255da62176..5553af9c8085 100644 +--- a/drivers/net/ethernet/microsoft/mana/shm_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c +@@ -6,7 +6,7 @@ + #include <linux/io.h> + #include <linux/mm.h> + +-#include "shm_channel.h" ++#include <net/mana/shm_channel.h> + + #define PAGE_FRAME_L48_WIDTH_BYTES 6 + #define PAGE_FRAME_L48_WIDTH_BITS (PAGE_FRAME_L48_WIDTH_BYTES * 8) +diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/include/net/mana/gdma.h +similarity index 100% +rename from drivers/net/ethernet/microsoft/mana/gdma.h +rename to include/net/mana/gdma.h +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.h b/include/net/mana/hw_channel.h +similarity index 100% +rename from drivers/net/ethernet/microsoft/mana/hw_channel.h +rename to include/net/mana/hw_channel.h +diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/include/net/mana/mana.h +similarity index 100% +rename from drivers/net/ethernet/microsoft/mana/mana.h +rename to include/net/mana/mana.h +diff --git a/drivers/net/ethernet/microsoft/mana/mana_auxiliary.h b/include/net/mana/mana_auxiliary.h +similarity index 100% +rename from drivers/net/ethernet/microsoft/mana/mana_auxiliary.h +rename to include/net/mana/mana_auxiliary.h +diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.h b/include/net/mana/shm_channel.h +similarity index 100% +rename from drivers/net/ethernet/microsoft/mana/shm_channel.h +rename to include/net/mana/shm_channel.h +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0010-net-mana-Define-max-values-for-SGL-entries.patch b/debian/patches/features/all/ethernet-microsoft/0010-net-mana-Define-max-values-for-SGL-entries.patch new file mode 100644 index 000000000..25af5ac09 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0010-net-mana-Define-max-values-for-SGL-entries.patch @@ -0,0 +1,79 @@ +From 5e6b27eabf53e70c084cf61e3d3860cd6ed1c321 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:26 -0700 +Subject: [PATCH 10/23] net: mana: Define max values for SGL entries + +The number of maximum SGl entries should be computed from the maximum +WQE size for the intended queue type and the corresponding OOB data +size. This guarantees the hardware queue can successfully queue requests +up to the queue depth exposed to the upper layer. + +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-9-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit aa56549792fb348892fbbae67f6f0c71bb750b65) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 2 +- + include/net/mana/gdma.h | 7 +++++++ + include/net/mana/mana.h | 4 +--- + 3 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index c0421c4a80d4..958e55c936b5 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -189,7 +189,7 @@ int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) + pkg.wqe_req.client_data_unit = 0; + + pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; +- WARN_ON_ONCE(pkg.wqe_req.num_sge > 30); ++ WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); + + if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { + pkg.wqe_req.sgl = pkg.sgl_array; +diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h +index 86d8a9e36005..11fc1cc67c01 100644 +--- a/include/net/mana/gdma.h ++++ b/include/net/mana/gdma.h +@@ -431,6 +431,13 @@ struct gdma_wqe { + #define MAX_TX_WQE_SIZE 512 + #define MAX_RX_WQE_SIZE 256 + ++#define MAX_TX_WQE_SGL_ENTRIES ((GDMA_MAX_SQE_SIZE - \ ++ sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \ ++ sizeof(struct gdma_sge)) ++ ++#define MAX_RX_WQE_SGL_ENTRIES ((GDMA_MAX_RQE_SIZE - \ ++ sizeof(struct gdma_sge)) / sizeof(struct gdma_sge)) ++ + struct gdma_cqe { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 6e9e86fb4c02..713a8f8cca9a 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -265,8 +265,6 @@ struct mana_cq { + int budget; + }; + +-#define GDMA_MAX_RQE_SGES 15 +- + struct mana_recv_buf_oob { + /* A valid GDMA work request representing the data buffer. */ + struct gdma_wqe_request wqe_req; +@@ -276,7 +274,7 @@ struct mana_recv_buf_oob { + + /* SGL of the buffer going to be sent has part of the work request. */ + u32 num_sge; +- struct gdma_sge sgl[GDMA_MAX_RQE_SGES]; ++ struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES]; + + /* Required to store the result of mana_gd_post_work_request. + * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0011-net-mana-Define-and-process-GDMA-response-code-GDMA_.patch b/debian/patches/features/all/ethernet-microsoft/0011-net-mana-Define-and-process-GDMA-response-code-GDMA_.patch new file mode 100644 index 000000000..539f450c0 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0011-net-mana-Define-and-process-GDMA-response-code-GDMA_.patch @@ -0,0 +1,52 @@ +From 71b611192d6e5c0aa276eba89c4ac0b15ca1851b Mon Sep 17 00:00:00 2001 +From: Ajay Sharma <sharmaajay@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:27 -0700 +Subject: [PATCH 11/23] net: mana: Define and process GDMA response code + GDMA_STATUS_MORE_ENTRIES + +When doing memory registration, the PF may respond with +GDMA_STATUS_MORE_ENTRIES to indicate a follow request is needed. This is +not an error and should be processed as expected. + +Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com> +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-10-git-send-email-longli@linuxonhyperv.com +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit de372f2a9ca7ada2698ecac7df8f02407cd98fa0) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +- + include/net/mana/gdma.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +index 76829ab43d40..9d1507eba5b9 100644 +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -836,7 +836,7 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + goto out; + } + +- if (ctx->status_code) { ++ if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); + err = -EPROTO; +diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h +index 11fc1cc67c01..202ac405ab59 100644 +--- a/include/net/mana/gdma.h ++++ b/include/net/mana/gdma.h +@@ -9,6 +9,8 @@ + + #include "shm_channel.h" + ++#define GDMA_STATUS_MORE_ENTRIES 0x00000105 ++ + /* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0012-net-mana-Define-data-structures-for-protection-domai.patch b/debian/patches/features/all/ethernet-microsoft/0012-net-mana-Define-data-structures-for-protection-domai.patch new file mode 100644 index 000000000..fb9a2a84d --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0012-net-mana-Define-data-structures-for-protection-domai.patch @@ -0,0 +1,353 @@ +From b6589e00fed6f5a53bd4ffd172b7e4d2d300447c Mon Sep 17 00:00:00 2001 +From: Ajay Sharma <sharmaajay@microsoft.com> +Date: Thu, 3 Nov 2022 12:16:29 -0700 +Subject: [PATCH 12/23] net: mana: Define data structures for protection domain + and memory registration + +The MANA hardware support protection domain and memory registration for use +in RDMA environment. Add those definitions and expose them for use by the +RDMA driver. + +Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1667502990-2559-12-git-send-email-longli@linuxonhyperv.com +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Acked-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Leon Romanovsky <leonro@nvidia.com> +(cherry picked from commit 28c66cfa45388af1126985d1114e0ed762eb2abd) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + .../net/ethernet/microsoft/mana/gdma_main.c | 27 ++-- + drivers/net/ethernet/microsoft/mana/mana_en.c | 18 +-- + include/net/mana/gdma.h | 121 +++++++++++++++++- + 3 files changed, 143 insertions(+), 23 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index b114c31d70ba..690691e3e86c 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -198,7 +198,7 @@ static int mana_gd_create_hw_eq(struct gdma_context *gc, + req.type = queue->type; + req.pdid = queue->gdma_dev->pdid; + req.doolbell_id = queue->gdma_dev->doorbell; +- req.gdma_region = queue->mem_info.gdma_region; ++ req.gdma_region = queue->mem_info.dma_region_handle; + req.queue_size = queue->queue_size; + req.log2_throttle_limit = queue->eq.log2_throttle_limit; + req.eq_pci_msix_index = queue->eq.msix_index; +@@ -212,7 +212,7 @@ static int mana_gd_create_hw_eq(struct gdma_context *gc, + + queue->id = resp.queue_index; + queue->eq.disable_needed = true; +- queue->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ queue->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + return 0; + } + +@@ -671,24 +671,30 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, + return err; + } + +-static void mana_gd_destroy_dma_region(struct gdma_context *gc, u64 gdma_region) ++int mana_gd_destroy_dma_region(struct gdma_context *gc, ++ gdma_obj_handle_t dma_region_handle) + { + struct gdma_destroy_dma_region_req req = {}; + struct gdma_general_resp resp = {}; + int err; + +- if (gdma_region == GDMA_INVALID_DMA_REGION) +- return; ++ if (dma_region_handle == GDMA_INVALID_DMA_REGION) ++ return 0; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req), + sizeof(resp)); +- req.gdma_region = gdma_region; ++ req.dma_region_handle = dma_region_handle; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); +- if (err || resp.hdr.status) ++ if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", + err, resp.hdr.status); ++ return -EPROTO; ++ } ++ ++ return 0; + } ++EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, NET_MANA); + + static int mana_gd_create_dma_region(struct gdma_dev *gd, + struct gdma_mem_info *gmi) +@@ -733,14 +739,15 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, + if (err) + goto out; + +- if (resp.hdr.status || resp.gdma_region == GDMA_INVALID_DMA_REGION) { ++ if (resp.hdr.status || ++ resp.dma_region_handle == GDMA_INVALID_DMA_REGION) { + dev_err(gc->dev, "Failed to create DMA region: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + goto out; + } + +- gmi->gdma_region = resp.gdma_region; ++ gmi->dma_region_handle = resp.dma_region_handle; + out: + kfree(req); + return err; +@@ -863,7 +870,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) + return; + } + +- mana_gd_destroy_dma_region(gc, gmi->gdma_region); ++ mana_gd_destroy_dma_region(gc, gmi->dma_region_handle); + mana_gd_free_memory(gmi); + kfree(queue); + } +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 958e55c936b5..9bce13714b25 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1529,10 +1529,10 @@ static int mana_create_txq(struct mana_port_context *apc, + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); + +- wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region; ++ wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle; + wq_spec.queue_size = txq->gdma_sq->queue_size; + +- cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; ++ cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; +@@ -1547,8 +1547,10 @@ static int mana_create_txq(struct mana_port_context *apc, + txq->gdma_sq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + +- txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; +- cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ txq->gdma_sq->mem_info.dma_region_handle = ++ GDMA_INVALID_DMA_REGION; ++ cq->gdma_cq->mem_info.dma_region_handle = ++ GDMA_INVALID_DMA_REGION; + + txq->gdma_txq_id = txq->gdma_sq->id; + +@@ -1759,10 +1761,10 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); +- wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region; ++ wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle; + wq_spec.queue_size = rxq->gdma_rq->queue_size; + +- cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; ++ cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; +@@ -1775,8 +1777,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + rxq->gdma_rq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + +- rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; +- cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; ++ cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + + rxq->gdma_id = rxq->gdma_rq->id; + cq->gdma_id = cq->gdma_cq->id; +diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h +index 202ac405ab59..aabc7cea8a49 100644 +--- a/include/net/mana/gdma.h ++++ b/include/net/mana/gdma.h +@@ -27,6 +27,10 @@ enum gdma_request_type { + GDMA_CREATE_DMA_REGION = 25, + GDMA_DMA_REGION_ADD_PAGES = 26, + GDMA_DESTROY_DMA_REGION = 27, ++ GDMA_CREATE_PD = 29, ++ GDMA_DESTROY_PD = 30, ++ GDMA_CREATE_MR = 31, ++ GDMA_DESTROY_MR = 32, + }; + + enum gdma_queue_type { +@@ -57,6 +61,8 @@ enum { + GDMA_DEVICE_MANA = 2, + }; + ++typedef u64 gdma_obj_handle_t; ++ + struct gdma_resource { + /* Protect the bitmap */ + spinlock_t lock; +@@ -190,7 +196,7 @@ struct gdma_mem_info { + u64 length; + + /* Allocated by the PF driver */ +- u64 gdma_region; ++ gdma_obj_handle_t dma_region_handle; + }; + + #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 +@@ -605,7 +611,7 @@ struct gdma_create_queue_req { + u32 reserved1; + u32 pdid; + u32 doolbell_id; +- u64 gdma_region; ++ gdma_obj_handle_t gdma_region; + u32 reserved2; + u32 queue_size; + u32 log2_throttle_limit; +@@ -632,6 +638,28 @@ struct gdma_disable_queue_req { + u32 alloc_res_id_on_creation; + }; /* HW DATA */ + ++enum atb_page_size { ++ ATB_PAGE_SIZE_4K, ++ ATB_PAGE_SIZE_8K, ++ ATB_PAGE_SIZE_16K, ++ ATB_PAGE_SIZE_32K, ++ ATB_PAGE_SIZE_64K, ++ ATB_PAGE_SIZE_128K, ++ ATB_PAGE_SIZE_256K, ++ ATB_PAGE_SIZE_512K, ++ ATB_PAGE_SIZE_1M, ++ ATB_PAGE_SIZE_2M, ++ ATB_PAGE_SIZE_MAX, ++}; ++ ++enum gdma_mr_access_flags { ++ GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0), ++ GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1), ++ GDMA_ACCESS_FLAG_REMOTE_READ = BIT_ULL(2), ++ GDMA_ACCESS_FLAG_REMOTE_WRITE = BIT_ULL(3), ++ GDMA_ACCESS_FLAG_REMOTE_ATOMIC = BIT_ULL(4), ++}; ++ + /* GDMA_CREATE_DMA_REGION */ + struct gdma_create_dma_region_req { + struct gdma_req_hdr hdr; +@@ -658,14 +686,14 @@ struct gdma_create_dma_region_req { + + struct gdma_create_dma_region_resp { + struct gdma_resp_hdr hdr; +- u64 gdma_region; ++ gdma_obj_handle_t dma_region_handle; + }; /* HW DATA */ + + /* GDMA_DMA_REGION_ADD_PAGES */ + struct gdma_dma_region_add_pages_req { + struct gdma_req_hdr hdr; + +- u64 gdma_region; ++ gdma_obj_handle_t dma_region_handle; + + u32 page_addr_list_len; + u32 reserved3; +@@ -677,9 +705,88 @@ struct gdma_dma_region_add_pages_req { + struct gdma_destroy_dma_region_req { + struct gdma_req_hdr hdr; + +- u64 gdma_region; ++ gdma_obj_handle_t dma_region_handle; + }; /* HW DATA */ + ++enum gdma_pd_flags { ++ GDMA_PD_FLAG_INVALID = 0, ++}; ++ ++struct gdma_create_pd_req { ++ struct gdma_req_hdr hdr; ++ enum gdma_pd_flags flags; ++ u32 reserved; ++};/* HW DATA */ ++ ++struct gdma_create_pd_resp { ++ struct gdma_resp_hdr hdr; ++ gdma_obj_handle_t pd_handle; ++ u32 pd_id; ++ u32 reserved; ++};/* HW DATA */ ++ ++struct gdma_destroy_pd_req { ++ struct gdma_req_hdr hdr; ++ gdma_obj_handle_t pd_handle; ++};/* HW DATA */ ++ ++struct gdma_destory_pd_resp { ++ struct gdma_resp_hdr hdr; ++};/* HW DATA */ ++ ++enum gdma_mr_type { ++ /* Guest Virtual Address - MRs of this type allow access ++ * to memory mapped by PTEs associated with this MR using a virtual ++ * address that is set up in the MST ++ */ ++ GDMA_MR_TYPE_GVA = 2, ++}; ++ ++struct gdma_create_mr_params { ++ gdma_obj_handle_t pd_handle; ++ enum gdma_mr_type mr_type; ++ union { ++ struct { ++ gdma_obj_handle_t dma_region_handle; ++ u64 virtual_address; ++ enum gdma_mr_access_flags access_flags; ++ } gva; ++ }; ++}; ++ ++struct gdma_create_mr_request { ++ struct gdma_req_hdr hdr; ++ gdma_obj_handle_t pd_handle; ++ enum gdma_mr_type mr_type; ++ u32 reserved_1; ++ ++ union { ++ struct { ++ gdma_obj_handle_t dma_region_handle; ++ u64 virtual_address; ++ enum gdma_mr_access_flags access_flags; ++ } gva; ++ ++ }; ++ u32 reserved_2; ++};/* HW DATA */ ++ ++struct gdma_create_mr_response { ++ struct gdma_resp_hdr hdr; ++ gdma_obj_handle_t mr_handle; ++ u32 lkey; ++ u32 rkey; ++};/* HW DATA */ ++ ++struct gdma_destroy_mr_request { ++ struct gdma_req_hdr hdr; ++ gdma_obj_handle_t mr_handle; ++};/* HW DATA */ ++ ++struct gdma_destroy_mr_response { ++ struct gdma_resp_hdr hdr; ++};/* HW DATA */ ++ + int mana_gd_verify_vf_version(struct pci_dev *pdev); + + int mana_gd_register_device(struct gdma_dev *gd); +@@ -706,4 +813,8 @@ void mana_gd_free_memory(struct gdma_mem_info *gmi); + + int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp); ++ ++int mana_gd_destroy_dma_region(struct gdma_context *gc, ++ gdma_obj_handle_t dma_region_handle); ++ + #endif /* _GDMA_H */ +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0013-net-mana-Fix-return-type-of-mana_start_xmit.patch b/debian/patches/features/all/ethernet-microsoft/0013-net-mana-Fix-return-type-of-mana_start_xmit.patch new file mode 100644 index 000000000..8f966405a --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0013-net-mana-Fix-return-type-of-mana_start_xmit.patch @@ -0,0 +1,66 @@ +From 37ec026c96afe1091f91d40515a91463ea50c150 Mon Sep 17 00:00:00 2001 +From: Nathan Huckleberry <nhuck@google.com> +Date: Tue, 8 Nov 2022 17:26:30 -0700 +Subject: [PATCH 13/23] net: mana: Fix return type of mana_start_xmit() + +The ndo_start_xmit field in net_device_ops is expected to be of type +netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev). + +The mismatched return type breaks forward edge kCFI since the underlying +function definition does not match the function hook definition. A new +warning in clang will catch this at compile time: + + drivers/net/ethernet/microsoft/mana/mana_en.c:382:21: error: incompatible function pointer types initializing 'netdev_tx_t (*)(struct sk_buff *, struct net_device *)' (aka 'enum netdev_tx (*)(struct sk_buff *, struct net_device *)') with an expression of type 'int (struct sk_buff *, struct net_device *)' [-Werror,-Wincompatible-function-pointer-types-strict] + .ndo_start_xmit = mana_start_xmit, + ^~~~~~~~~~~~~~~ + 1 error generated. + +The return type of mana_start_xmit should be changed from int to +netdev_tx_t. + +Reported-by: Dan Carpenter <error27@gmail.com> +Link: https://github.com/ClangBuiltLinux/linux/issues/1703 +Link: https://github.com/ClangBuiltLinux/linux/issues/1750 +Signed-off-by: Nathan Huckleberry <nhuck@google.com> +Reviewed-by: Dexuan Cui <decui@microsoft.com> +[nathan: Rebase on net-next and resolve conflicts + Add note about new clang warning] +Signed-off-by: Nathan Chancellor <nathan@kernel.org> +Link: https://lore.kernel.org/r/20221109002629.1446680-1-nathan@kernel.org +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +(cherry picked from commit 0c9ef08a4d0fd6c5e6000597b506235d71a85a61) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 2 +- + include/net/mana/mana.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 9bce13714b25..2f6a048dee90 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -141,7 +141,7 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, + return -ENOMEM; + } + +-int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ++netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) + { + enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; + struct mana_port_context *apc = netdev_priv(ndev); +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 713a8f8cca9a..575ea36ce606 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -390,7 +390,7 @@ struct mana_port_context { + struct mana_ethtool_stats eth_stats; + }; + +-int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); ++netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); + int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, + bool update_hash, bool update_tab); + +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0014-net-mana-Fix-accessing-freed-irq-affinity_hint.patch b/debian/patches/features/all/ethernet-microsoft/0014-net-mana-Fix-accessing-freed-irq-affinity_hint.patch new file mode 100644 index 000000000..2aa627612 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0014-net-mana-Fix-accessing-freed-irq-affinity_hint.patch @@ -0,0 +1,135 @@ +From 1b71f96ff9b962c904cefc9a57500869fa0d1d44 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Mon, 6 Feb 2023 13:28:49 -0800 +Subject: [PATCH 14/23] net: mana: Fix accessing freed irq affinity_hint + +After calling irq_set_affinity_and_hint(), the cpumask pointer is +saved in desc->affinity_hint, and will be used later when reading +/proc/irq/<num>/affinity_hint. So the cpumask variable needs to be +persistent. Otherwise, we are accessing freed memory when reading +the affinity_hint file. + +Also, need to clear affinity_hint before free_irq(), otherwise there +is a one-time warning and stack trace during module unloading: + + [ 243.948687] WARNING: CPU: 10 PID: 1589 at kernel/irq/manage.c:1913 free_irq+0x318/0x360 + ... + [ 243.948753] Call Trace: + [ 243.948754] <TASK> + [ 243.948760] mana_gd_remove_irqs+0x78/0xc0 [mana] + [ 243.948767] mana_gd_remove+0x3e/0x80 [mana] + [ 243.948773] pci_device_remove+0x3d/0xb0 + [ 243.948778] device_remove+0x46/0x70 + [ 243.948782] device_release_driver_internal+0x1fe/0x280 + [ 243.948785] driver_detach+0x4e/0xa0 + [ 243.948787] bus_remove_driver+0x70/0xf0 + [ 243.948789] driver_unregister+0x35/0x60 + [ 243.948792] pci_unregister_driver+0x44/0x90 + [ 243.948794] mana_driver_exit+0x14/0x3fe [mana] + [ 243.948800] __do_sys_delete_module.constprop.0+0x185/0x2f0 + +To fix the bug, use the persistent mask, cpumask_of(cpu#), and set +affinity_hint to NULL before freeing the IRQ, as required by free_irq(). + +Cc: stable@vger.kernel.org +Fixes: 71fa6887eeca ("net: mana: Assign interrupts to CPUs based on NUMA nodes") +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Michael Kelley <mikelley@microsoft.com> +Reviewed-by: Leon Romanovsky <leonro@nvidia.com> +Link: https://lore.kernel.org/r/1675718929-19565-1-git-send-email-haiyangz@microsoft.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +(cherry picked from commit 18a048370b06a3a521219e9e5b10bdc2178ef19c) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + .../net/ethernet/microsoft/mana/gdma_main.c | 37 ++++++------------- + 1 file changed, 11 insertions(+), 26 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index 690691e3e86c..5b7fddfc9ff1 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -1218,9 +1218,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + unsigned int max_queues_per_port = num_online_cpus(); + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; +- unsigned int max_irqs; +- u16 *cpus; +- cpumask_var_t req_mask; ++ unsigned int max_irqs, cpu; + int nvec, irq; + int err, i = 0, j; + +@@ -1241,21 +1239,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + goto free_irq_vector; + } + +- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) { +- err = -ENOMEM; +- goto free_irq; +- } +- +- cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL); +- if (!cpus) { +- err = -ENOMEM; +- goto free_mask; +- } +- for (i = 0; i < nvec; i++) +- cpus[i] = cpumask_local_spread(i, gc->numa_node); +- + for (i = 0; i < nvec; i++) { +- cpumask_set_cpu(cpus[i], req_mask); + gic = &gc->irq_contexts[i]; + gic->handler = NULL; + gic->arg = NULL; +@@ -1270,17 +1254,16 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; +- goto free_mask; ++ goto free_irq; + } + + err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); + if (err) +- goto free_mask; +- irq_set_affinity_and_hint(irq, req_mask); +- cpumask_clear(req_mask); ++ goto free_irq; ++ ++ cpu = cpumask_local_spread(i, gc->numa_node); ++ irq_set_affinity_and_hint(irq, cpumask_of(cpu)); + } +- free_cpumask_var(req_mask); +- kfree(cpus); + + err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); + if (err) +@@ -1291,13 +1274,12 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + + return 0; + +-free_mask: +- free_cpumask_var(req_mask); +- kfree(cpus); + free_irq: + for (j = i - 1; j >= 0; j--) { + irq = pci_irq_vector(pdev, j); + gic = &gc->irq_contexts[j]; ++ ++ irq_update_affinity_hint(irq, NULL); + free_irq(irq, gic); + } + +@@ -1325,6 +1307,9 @@ static void mana_gd_remove_irqs(struct pci_dev *pdev) + continue; + + gic = &gc->irq_contexts[i]; ++ ++ /* Need to clear the hint before free_irq */ ++ irq_update_affinity_hint(irq, NULL); + free_irq(irq, gic); + } + +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0015-net-mana-Add-new-MANA-VF-performance-counters-for-ea.patch b/debian/patches/features/all/ethernet-microsoft/0015-net-mana-Add-new-MANA-VF-performance-counters-for-ea.patch new file mode 100644 index 000000000..0ada4dca6 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0015-net-mana-Add-new-MANA-VF-performance-counters-for-ea.patch @@ -0,0 +1,334 @@ +From fbefc731a4cc001fe7ce5786e08053d707dde85f Mon Sep 17 00:00:00 2001 +From: Shradha Gupta <shradhagupta@linux.microsoft.com> +Date: Wed, 15 Mar 2023 04:55:13 -0700 +Subject: [PATCH 15/23] net: mana: Add new MANA VF performance counters for + easier troubleshooting + +Extended performance counter stats in 'ethtool -S <interface>' output +for MANA VF to facilitate troubleshooting. + +Tested-on: Ubuntu22 +Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit bd7fc6e1957c2102866f9e464c1f2302e891b7e9) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 62 ++++++++++++++++++- + .../ethernet/microsoft/mana/mana_ethtool.c | 52 +++++++++++++++- + include/net/mana/mana.h | 18 ++++++ + 3 files changed, 128 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -157,6 +157,7 @@ netdev_tx_t mana_start_xmit(struct sk_bu + struct mana_txq *txq; + struct mana_cq *cq; + int err, len; ++ u16 ihs; + + if (unlikely(!apc->port_is_up)) + goto tx_drop; +@@ -167,6 +168,7 @@ netdev_tx_t mana_start_xmit(struct sk_bu + txq = &apc->tx_qp[txq_idx].txq; + gdma_sq = txq->gdma_sq; + cq = &apc->tx_qp[txq_idx].tx_cq; ++ tx_stats = &txq->stats; + + pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; + pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; +@@ -180,10 +182,17 @@ netdev_tx_t mana_start_xmit(struct sk_bu + + pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; + +- if (pkt_fmt == MANA_SHORT_PKT_FMT) ++ if (pkt_fmt == MANA_SHORT_PKT_FMT) { + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); +- else ++ u64_stats_update_begin(&tx_stats->syncp); ++ tx_stats->short_pkt_fmt++; ++ u64_stats_update_end(&tx_stats->syncp); ++ } else { + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); ++ u64_stats_update_begin(&tx_stats->syncp); ++ tx_stats->long_pkt_fmt++; ++ u64_stats_update_end(&tx_stats->syncp); ++ } + + pkg.wqe_req.inline_oob_data = &pkg.tx_oob; + pkg.wqe_req.flags = 0; +@@ -233,9 +242,35 @@ netdev_tx_t mana_start_xmit(struct sk_bu + &ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } ++ ++ if (skb->encapsulation) { ++ ihs = skb_inner_tcp_all_headers(skb); ++ u64_stats_update_begin(&tx_stats->syncp); ++ tx_stats->tso_inner_packets++; ++ tx_stats->tso_inner_bytes += skb->len - ihs; ++ u64_stats_update_end(&tx_stats->syncp); ++ } else { ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { ++ ihs = skb_transport_offset(skb) + sizeof(struct udphdr); ++ } else { ++ ihs = skb_tcp_all_headers(skb); ++ if (ipv6_has_hopopt_jumbo(skb)) ++ ihs -= sizeof(struct hop_jumbo_hdr); ++ } ++ ++ u64_stats_update_begin(&tx_stats->syncp); ++ tx_stats->tso_packets++; ++ tx_stats->tso_bytes += skb->len - ihs; ++ u64_stats_update_end(&tx_stats->syncp); ++ } ++ + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + csum_type = mana_checksum_info(skb); + ++ u64_stats_update_begin(&tx_stats->syncp); ++ tx_stats->csum_partial++; ++ u64_stats_update_end(&tx_stats->syncp); ++ + if (csum_type == IPPROTO_TCP) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; +@@ -255,8 +290,12 @@ netdev_tx_t mana_start_xmit(struct sk_bu + } + } + +- if (mana_map_skb(skb, apc, &pkg)) ++ if (mana_map_skb(skb, apc, &pkg)) { ++ u64_stats_update_begin(&tx_stats->syncp); ++ tx_stats->mana_map_err++; ++ u64_stats_update_end(&tx_stats->syncp); + goto free_sgl_ptr; ++ } + + skb_queue_tail(&txq->pending_skbs, skb); + +@@ -1039,6 +1078,8 @@ static void mana_poll_tx_cq(struct mana_ + if (comp_read < 1) + return; + ++ apc->eth_stats.tx_cqes = comp_read; ++ + for (i = 0; i < comp_read; i++) { + struct mana_tx_comp_oob *cqe_oob; + +@@ -1067,6 +1108,7 @@ static void mana_poll_tx_cq(struct mana_ + netdev_err(ndev, "TX: CQE error %d\n", + cqe_oob->cqe_hdr.cqe_type); + ++ apc->eth_stats.tx_cqe_err++; + break; + + default: +@@ -1077,6 +1119,7 @@ static void mana_poll_tx_cq(struct mana_ + netdev_err(ndev, "TX: unknown CQE type %d\n", + cqe_oob->cqe_hdr.cqe_type); + ++ apc->eth_stats.tx_cqe_unknown_type++; + break; + } + +@@ -1123,6 +1166,8 @@ static void mana_poll_tx_cq(struct mana_ + WARN_ON_ONCE(1); + + cq->work_done = pkt_transmitted; ++ ++ apc->eth_stats.tx_cqes -= pkt_transmitted; + } + + static void mana_post_pkt_rxq(struct mana_rxq *rxq) +@@ -1257,12 +1302,15 @@ static void mana_process_rx_cqe(struct m + struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; + struct net_device *ndev = rxq->ndev; + struct mana_recv_buf_oob *rxbuf_oob; ++ struct mana_port_context *apc; + struct device *dev = gc->dev; + void *new_buf, *old_buf; + struct page *new_page; + u32 curr, pktlen; + dma_addr_t da; + ++ apc = netdev_priv(ndev); ++ + switch (oob->cqe_hdr.cqe_type) { + case CQE_RX_OKAY: + break; +@@ -1275,6 +1323,7 @@ static void mana_process_rx_cqe(struct m + + case CQE_RX_COALESCED_4: + netdev_err(ndev, "RX coalescing is unsupported\n"); ++ apc->eth_stats.rx_coalesced_err++; + return; + + case CQE_RX_OBJECT_FENCE: +@@ -1284,6 +1333,7 @@ static void mana_process_rx_cqe(struct m + default: + netdev_err(ndev, "Unknown RX CQE type = %d\n", + oob->cqe_hdr.cqe_type); ++ apc->eth_stats.rx_cqe_unknown_type++; + return; + } + +@@ -1346,11 +1396,15 @@ static void mana_poll_rx_cq(struct mana_ + { + struct gdma_comp *comp = cq->gdma_comp_buf; + struct mana_rxq *rxq = cq->rxq; ++ struct mana_port_context *apc; + int comp_read, i; + ++ apc = netdev_priv(rxq->ndev); ++ + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + ++ apc->eth_stats.rx_cqes = comp_read; + rxq->xdp_flush = false; + + for (i = 0; i < comp_read; i++) { +@@ -1362,6 +1416,8 @@ static void mana_poll_rx_cq(struct mana_ + return; + + mana_process_rx_cqe(rxq, cq, &comp[i]); ++ ++ apc->eth_stats.rx_cqes--; + } + + if (rxq->xdp_flush) +--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +@@ -13,6 +13,15 @@ static const struct { + } mana_eth_stats[] = { + {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, + {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, ++ {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)}, ++ {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, ++ {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, ++ tx_cqe_unknown_type)}, ++ {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)}, ++ {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, ++ rx_coalesced_err)}, ++ {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, ++ rx_cqe_unknown_type)}, + }; + + static int mana_get_sset_count(struct net_device *ndev, int stringset) +@@ -23,7 +32,8 @@ static int mana_get_sset_count(struct ne + if (stringset != ETH_SS_STATS) + return -EINVAL; + +- return ARRAY_SIZE(mana_eth_stats) + num_queues * 8; ++ return ARRAY_SIZE(mana_eth_stats) + num_queues * ++ (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); + } + + static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) +@@ -61,6 +71,22 @@ static void mana_get_strings(struct net_ + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_xdp_xmit", i); + p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_tso_packets", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_tso_bytes", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_tso_inner_packets", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_tso_inner_bytes", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_long_pkt_fmt", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_short_pkt_fmt", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_csum_partial", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_mana_map_err", i); ++ p += ETH_GSTRING_LEN; + } + } + +@@ -78,6 +104,14 @@ static void mana_get_ethtool_stats(struc + u64 xdp_xmit; + u64 xdp_drop; + u64 xdp_tx; ++ u64 tso_packets; ++ u64 tso_bytes; ++ u64 tso_inner_packets; ++ u64 tso_inner_bytes; ++ u64 long_pkt_fmt; ++ u64 short_pkt_fmt; ++ u64 csum_partial; ++ u64 mana_map_err; + int q, i = 0; + + if (!apc->port_is_up) +@@ -113,11 +147,27 @@ static void mana_get_ethtool_stats(struc + packets = tx_stats->packets; + bytes = tx_stats->bytes; + xdp_xmit = tx_stats->xdp_xmit; ++ tso_packets = tx_stats->tso_packets; ++ tso_bytes = tx_stats->tso_bytes; ++ tso_inner_packets = tx_stats->tso_inner_packets; ++ tso_inner_bytes = tx_stats->tso_inner_bytes; ++ long_pkt_fmt = tx_stats->long_pkt_fmt; ++ short_pkt_fmt = tx_stats->short_pkt_fmt; ++ csum_partial = tx_stats->csum_partial; ++ mana_map_err = tx_stats->mana_map_err; + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; + data[i++] = xdp_xmit; ++ data[i++] = tso_packets; ++ data[i++] = tso_bytes; ++ data[i++] = tso_inner_packets; ++ data[i++] = tso_inner_bytes; ++ data[i++] = long_pkt_fmt; ++ data[i++] = short_pkt_fmt; ++ data[i++] = csum_partial; ++ data[i++] = mana_map_err; + } + } + +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -48,6 +48,10 @@ enum TRI_STATE { + + #define MAX_PORTS_IN_MANA_DEV 256 + ++/* Update this count whenever the respective structures are changed */ ++#define MANA_STATS_RX_COUNT 5 ++#define MANA_STATS_TX_COUNT 11 ++ + struct mana_stats_rx { + u64 packets; + u64 bytes; +@@ -61,6 +65,14 @@ struct mana_stats_tx { + u64 packets; + u64 bytes; + u64 xdp_xmit; ++ u64 tso_packets; ++ u64 tso_bytes; ++ u64 tso_inner_packets; ++ u64 tso_inner_bytes; ++ u64 short_pkt_fmt; ++ u64 long_pkt_fmt; ++ u64 csum_partial; ++ u64 mana_map_err; + struct u64_stats_sync syncp; + }; + +@@ -331,6 +343,12 @@ struct mana_tx_qp { + struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; ++ u64 tx_cqes; ++ u64 tx_cqe_err; ++ u64 tx_cqe_unknown_type; ++ u64 rx_cqes; ++ u64 rx_coalesced_err; ++ u64 rx_cqe_unknown_type; + }; + + struct mana_context { diff --git a/debian/patches/features/all/ethernet-microsoft/0016-net-mana-Remove-redundant-pci_clear_master.patch b/debian/patches/features/all/ethernet-microsoft/0016-net-mana-Remove-redundant-pci_clear_master.patch new file mode 100644 index 000000000..c0bd34da8 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0016-net-mana-Remove-redundant-pci_clear_master.patch @@ -0,0 +1,54 @@ +From 0022db71934d56cc601cacee4c1e414d7f3cbb37 Mon Sep 17 00:00:00 2001 +From: Cai Huoqing <cai.huoqing@linux.dev> +Date: Thu, 23 Mar 2023 17:03:05 +0800 +Subject: [PATCH 16/23] net: mana: Remove redundant pci_clear_master + +Remove pci_clear_master to simplify the code, +the bus-mastering is also cleared in do_pci_disable_device, +like this: +./drivers/pci/pci.c:2197 +static void do_pci_disable_device(struct pci_dev *dev) +{ + u16 pci_command; + + pci_read_config_word(dev, PCI_COMMAND, &pci_command); + if (pci_command & PCI_COMMAND_MASTER) { + pci_command &= ~PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, pci_command); + } + + pcibios_disable_device(dev); +}. +And dev->is_busmaster is set to 0 in pci_disable_device. + +Signed-off-by: Cai Huoqing <cai.huoqing@linux.dev> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit 2d59af8307526f2829fdec9c5c5898a857d55180) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index 5b7fddfc9ff1..97a1845c676a 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -1440,7 +1440,6 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + release_region: + pci_release_regions(pdev); + disable_dev: +- pci_clear_master(pdev); + pci_disable_device(pdev); + dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err); + return err; +@@ -1459,7 +1458,6 @@ static void mana_gd_remove(struct pci_dev *pdev) + vfree(gc); + + pci_release_regions(pdev); +- pci_clear_master(pdev); + pci_disable_device(pdev); + } + +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0017-net-mana-Use-napi_build_skb-in-RX-path.patch b/debian/patches/features/all/ethernet-microsoft/0017-net-mana-Use-napi_build_skb-in-RX-path.patch new file mode 100644 index 000000000..c8f0f4588 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0017-net-mana-Use-napi_build_skb-in-RX-path.patch @@ -0,0 +1,33 @@ +From a8796c5278b15a55111d43ccd622b745945ca3d4 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Wed, 12 Apr 2023 14:16:00 -0700 +Subject: [PATCH 17/23] net: mana: Use napi_build_skb in RX path + +Use napi_build_skb() instead of build_skb() to take advantage of the +NAPI percpu caches to obtain skbuff_head. + +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit ce518bc3e9ca342309995c9270c3ec4892963695) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 55bf40e5ee71..a1b7905ed2f7 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1188,7 +1188,7 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq) + static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, + struct xdp_buff *xdp) + { +- struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE); ++ struct sk_buff *skb = napi_build_skb(buf_va, PAGE_SIZE); + + if (!skb) + return NULL; +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0018-net-mana-Refactor-RX-buffer-allocation-code-to-prepa.patch b/debian/patches/features/all/ethernet-microsoft/0018-net-mana-Refactor-RX-buffer-allocation-code-to-prepa.patch new file mode 100644 index 000000000..7c072d77c --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0018-net-mana-Refactor-RX-buffer-allocation-code-to-prepa.patch @@ -0,0 +1,289 @@ +From cf9f102448044cea851cb97666bf6e853c8963e3 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Wed, 12 Apr 2023 14:16:01 -0700 +Subject: [PATCH 18/23] net: mana: Refactor RX buffer allocation code to + prepare for various MTU + +Move out common buffer allocation code from mana_process_rx_cqe() and +mana_alloc_rx_wqe() to helper functions. +Refactor related variables so they can be changed in one place, and buffer +sizes are in sync. + +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit a2917b23497e4205db32271e4e06e142a9f8a6aa) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 154 ++++++++++-------- + include/net/mana/mana.h | 6 +- + 2 files changed, 91 insertions(+), 69 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index a1b7905ed2f7..af0c0ee95d87 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1282,14 +1282,64 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, + u64_stats_update_end(&rx_stats->syncp); + + drop: +- WARN_ON_ONCE(rxq->xdp_save_page); +- rxq->xdp_save_page = virt_to_page(buf_va); ++ WARN_ON_ONCE(rxq->xdp_save_va); ++ /* Save for reuse */ ++ rxq->xdp_save_va = buf_va; + + ++ndev->stats.rx_dropped; + + return; + } + ++static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, ++ dma_addr_t *da, bool is_napi) ++{ ++ struct page *page; ++ void *va; ++ ++ /* Reuse XDP dropped page if available */ ++ if (rxq->xdp_save_va) { ++ va = rxq->xdp_save_va; ++ rxq->xdp_save_va = NULL; ++ } else { ++ page = dev_alloc_page(); ++ if (!page) ++ return NULL; ++ ++ va = page_to_virt(page); ++ } ++ ++ *da = dma_map_single(dev, va + XDP_PACKET_HEADROOM, rxq->datasize, ++ DMA_FROM_DEVICE); ++ ++ if (dma_mapping_error(dev, *da)) { ++ put_page(virt_to_head_page(va)); ++ return NULL; ++ } ++ ++ return va; ++} ++ ++/* Allocate frag for rx buffer, and save the old buf */ ++static void mana_refill_rxoob(struct device *dev, struct mana_rxq *rxq, ++ struct mana_recv_buf_oob *rxoob, void **old_buf) ++{ ++ dma_addr_t da; ++ void *va; ++ ++ va = mana_get_rxfrag(rxq, dev, &da, true); ++ ++ if (!va) ++ return; ++ ++ dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, ++ DMA_FROM_DEVICE); ++ *old_buf = rxoob->buf_va; ++ ++ rxoob->buf_va = va; ++ rxoob->sgl[0].address = da; ++} ++ + static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + struct gdma_comp *cqe) + { +@@ -1299,10 +1349,8 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + struct mana_recv_buf_oob *rxbuf_oob; + struct mana_port_context *apc; + struct device *dev = gc->dev; +- void *new_buf, *old_buf; +- struct page *new_page; ++ void *old_buf = NULL; + u32 curr, pktlen; +- dma_addr_t da; + + apc = netdev_priv(ndev); + +@@ -1345,40 +1393,11 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + rxbuf_oob = &rxq->rx_oobs[curr]; + WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); + +- /* Reuse XDP dropped page if available */ +- if (rxq->xdp_save_page) { +- new_page = rxq->xdp_save_page; +- rxq->xdp_save_page = NULL; +- } else { +- new_page = alloc_page(GFP_ATOMIC); +- } +- +- if (new_page) { +- da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize, +- DMA_FROM_DEVICE); +- +- if (dma_mapping_error(dev, da)) { +- __free_page(new_page); +- new_page = NULL; +- } +- } +- +- new_buf = new_page ? page_to_virt(new_page) : NULL; +- +- if (new_buf) { +- dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize, +- DMA_FROM_DEVICE); +- +- old_buf = rxbuf_oob->buf_va; +- +- /* refresh the rxbuf_oob with the new page */ +- rxbuf_oob->buf_va = new_buf; +- rxbuf_oob->buf_dma_addr = da; +- rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr; +- } else { +- old_buf = NULL; /* drop the packet if no memory */ +- } ++ mana_refill_rxoob(dev, rxq, rxbuf_oob, &old_buf); + ++ /* Unsuccessful refill will have old_buf == NULL. ++ * In this case, mana_rx_skb() will drop the packet. ++ */ + mana_rx_skb(old_buf, oob, rxq); + + drop: +@@ -1659,8 +1678,8 @@ static void mana_destroy_rxq(struct mana_port_context *apc, + + mana_deinit_cq(apc, &rxq->rx_cq); + +- if (rxq->xdp_save_page) +- __free_page(rxq->xdp_save_page); ++ if (rxq->xdp_save_va) ++ put_page(virt_to_head_page(rxq->xdp_save_va)); + + for (i = 0; i < rxq->num_rx_buf; i++) { + rx_oob = &rxq->rx_oobs[i]; +@@ -1668,10 +1687,10 @@ static void mana_destroy_rxq(struct mana_port_context *apc, + if (!rx_oob->buf_va) + continue; + +- dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize, +- DMA_FROM_DEVICE); ++ dma_unmap_single(dev, rx_oob->sgl[0].address, ++ rx_oob->sgl[0].size, DMA_FROM_DEVICE); + +- free_page((unsigned long)rx_oob->buf_va); ++ put_page(virt_to_head_page(rx_oob->buf_va)); + rx_oob->buf_va = NULL; + } + +@@ -1681,6 +1700,26 @@ static void mana_destroy_rxq(struct mana_port_context *apc, + kfree(rxq); + } + ++static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, ++ struct mana_rxq *rxq, struct device *dev) ++{ ++ dma_addr_t da; ++ void *va; ++ ++ va = mana_get_rxfrag(rxq, dev, &da, false); ++ ++ if (!va) ++ return -ENOMEM; ++ ++ rx_oob->buf_va = va; ++ ++ rx_oob->sgl[0].address = da; ++ rx_oob->sgl[0].size = rxq->datasize; ++ rx_oob->sgl[0].mem_key = mem_key; ++ ++ return 0; ++} ++ + #define MANA_WQE_HEADER_SIZE 16 + #define MANA_WQE_SGE_SIZE 16 + +@@ -1690,9 +1729,8 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc, + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; +- struct page *page; +- dma_addr_t da; + u32 buf_idx; ++ int ret; + + WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE); + +@@ -1703,25 +1741,12 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc, + rx_oob = &rxq->rx_oobs[buf_idx]; + memset(rx_oob, 0, sizeof(*rx_oob)); + +- page = alloc_page(GFP_KERNEL); +- if (!page) +- return -ENOMEM; +- +- da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize, +- DMA_FROM_DEVICE); +- +- if (dma_mapping_error(dev, da)) { +- __free_page(page); +- return -ENOMEM; +- } +- +- rx_oob->buf_va = page_to_virt(page); +- rx_oob->buf_dma_addr = da; +- + rx_oob->num_sge = 1; +- rx_oob->sgl[0].address = rx_oob->buf_dma_addr; +- rx_oob->sgl[0].size = rxq->datasize; +- rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey; ++ ++ ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq, ++ dev); ++ if (ret) ++ return ret; + + rx_oob->wqe_req.sgl = rx_oob->sgl; + rx_oob->wqe_req.num_sge = rx_oob->num_sge; +@@ -1780,9 +1805,10 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + rxq->ndev = ndev; + rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; + rxq->rxq_idx = rxq_idx; +- rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64); + rxq->rxobj = INVALID_MANA_HANDLE; + ++ rxq->datasize = ALIGN(ETH_FRAME_LEN, 64); ++ + err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); + if (err) + goto out; +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 343ace6de20e..da556246233e 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -36,9 +36,6 @@ enum TRI_STATE { + + #define COMP_ENTRY_SIZE 64 + +-#define ADAPTER_MTU_SIZE 1500 +-#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14) +- + #define RX_BUFFERS_PER_QUEUE 512 + + #define MAX_SEND_BUFFERS_PER_QUEUE 256 +@@ -282,7 +279,6 @@ struct mana_recv_buf_oob { + struct gdma_wqe_request wqe_req; + + void *buf_va; +- dma_addr_t buf_dma_addr; + + /* SGL of the buffer going to be sent has part of the work request. */ + u32 num_sge; +@@ -322,7 +318,7 @@ struct mana_rxq { + + struct bpf_prog __rcu *bpf_prog; + struct xdp_rxq_info xdp_rxq; +- struct page *xdp_save_page; ++ void *xdp_save_va; /* for reusing */ + bool xdp_flush; + int xdp_rc; /* XDP redirect return code */ + +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0019-net-mana-Enable-RX-path-to-handle-various-MTU-sizes.patch b/debian/patches/features/all/ethernet-microsoft/0019-net-mana-Enable-RX-path-to-handle-various-MTU-sizes.patch new file mode 100644 index 000000000..7fb93c495 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0019-net-mana-Enable-RX-path-to-handle-various-MTU-sizes.patch @@ -0,0 +1,147 @@ +From 8640f747acafd65c43be07bd7db9a431b5926db3 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Wed, 12 Apr 2023 14:16:02 -0700 +Subject: [PATCH 19/23] net: mana: Enable RX path to handle various MTU sizes + +Update RX data path to allocate and use RX queue DMA buffers with +proper size based on potentially various MTU sizes. + +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit 2fbbd712baf1c60996554326728bbdbef5616e12) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 38 ++++++++++++++----- + include/net/mana/mana.h | 7 ++++ + 2 files changed, 35 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index af0c0ee95d87..afbbe447de1d 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1185,10 +1185,10 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq) + WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); + } + +-static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, +- struct xdp_buff *xdp) ++static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, ++ uint pkt_len, struct xdp_buff *xdp) + { +- struct sk_buff *skb = napi_build_skb(buf_va, PAGE_SIZE); ++ struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size); + + if (!skb) + return NULL; +@@ -1196,11 +1196,12 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, + if (xdp->data_hard_start) { + skb_reserve(skb, xdp->data - xdp->data_hard_start); + skb_put(skb, xdp->data_end - xdp->data); +- } else { +- skb_reserve(skb, XDP_PACKET_HEADROOM); +- skb_put(skb, pkt_len); ++ return skb; + } + ++ skb_reserve(skb, rxq->headroom); ++ skb_put(skb, pkt_len); ++ + return skb; + } + +@@ -1233,7 +1234,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, + if (act != XDP_PASS && act != XDP_TX) + goto drop_xdp; + +- skb = mana_build_skb(buf_va, pkt_len, &xdp); ++ skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp); + + if (!skb) + goto drop; +@@ -1301,6 +1302,14 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, + if (rxq->xdp_save_va) { + va = rxq->xdp_save_va; + rxq->xdp_save_va = NULL; ++ } else if (rxq->alloc_size > PAGE_SIZE) { ++ if (is_napi) ++ va = napi_alloc_frag(rxq->alloc_size); ++ else ++ va = netdev_alloc_frag(rxq->alloc_size); ++ ++ if (!va) ++ return NULL; + } else { + page = dev_alloc_page(); + if (!page) +@@ -1309,7 +1318,7 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, + va = page_to_virt(page); + } + +- *da = dma_map_single(dev, va + XDP_PACKET_HEADROOM, rxq->datasize, ++ *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, + DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, *da)) { +@@ -1732,7 +1741,7 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc, + u32 buf_idx; + int ret; + +- WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE); ++ WARN_ON(rxq->datasize == 0); + + *rxq_size = 0; + *cq_size = 0; +@@ -1788,6 +1797,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; ++ unsigned int mtu = ndev->mtu; + struct gdma_queue_spec spec; + struct mana_cq *cq = NULL; + struct gdma_context *gc; +@@ -1807,7 +1817,15 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + rxq->rxq_idx = rxq_idx; + rxq->rxobj = INVALID_MANA_HANDLE; + +- rxq->datasize = ALIGN(ETH_FRAME_LEN, 64); ++ rxq->datasize = ALIGN(mtu + ETH_HLEN, 64); ++ ++ if (mtu > MANA_XDP_MTU_MAX) { ++ rxq->alloc_size = mtu + MANA_RXBUF_PAD; ++ rxq->headroom = 0; ++ } else { ++ rxq->alloc_size = mtu + MANA_RXBUF_PAD + XDP_PACKET_HEADROOM; ++ rxq->headroom = XDP_PACKET_HEADROOM; ++ } + + err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); + if (err) +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index da556246233e..42795849d68c 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -291,6 +291,11 @@ struct mana_recv_buf_oob { + struct gdma_posted_wqe_info wqe_inf; + }; + ++#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \ ++ + ETH_HLEN) ++ ++#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM) ++ + struct mana_rxq { + struct gdma_queue *gdma_rq; + /* Cache the gdma receive queue id */ +@@ -300,6 +305,8 @@ struct mana_rxq { + u32 rxq_idx; + + u32 datasize; ++ u32 alloc_size; ++ u32 headroom; + + mana_handle_t rxobj; + +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0020-net-mana-Add-support-for-jumbo-frame.patch b/debian/patches/features/all/ethernet-microsoft/0020-net-mana-Add-support-for-jumbo-frame.patch new file mode 100644 index 000000000..5a5a44d88 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0020-net-mana-Add-support-for-jumbo-frame.patch @@ -0,0 +1,423 @@ +From 2552a80c48dd33368d0e7a2b94e1eb72e3053b99 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Wed, 12 Apr 2023 14:16:03 -0700 +Subject: [PATCH 20/23] net: mana: Add support for jumbo frame + +During probe, get the hardware-allowed max MTU by querying the device +configuration. Users can select MTU up to the device limit. +When XDP is in use, limit MTU settings so the buffer size is within +one page. And, when MTU is set to a too large value, XDP is not allowed +to run. +Also, to prevent changing MTU fails, and leaves the NIC in a bad state, +pre-allocate all buffers before starting the change. So in low memory +condition, it will return error, without affecting the NIC. + +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit 80f6215b450eb8e92d8b1f117abf5ecf867f963e) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + .../net/ethernet/microsoft/mana/mana_bpf.c | 22 +- + drivers/net/ethernet/microsoft/mana/mana_en.c | 217 ++++++++++++++++-- + include/net/mana/gdma.h | 4 + + include/net/mana/mana.h | 14 ++ + 4 files changed, 233 insertions(+), 24 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c +index 3caea631229c..23b1521c0df9 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c +@@ -133,12 +133,6 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + return act; + } + +-static unsigned int mana_xdp_fraglen(unsigned int len) +-{ +- return SKB_DATA_ALIGN(len) + +- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); +-} +- + struct bpf_prog *mana_xdp_get(struct mana_port_context *apc) + { + ASSERT_RTNL(); +@@ -179,17 +173,18 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, + { + struct mana_port_context *apc = netdev_priv(ndev); + struct bpf_prog *old_prog; +- int buf_max; ++ struct gdma_context *gc; ++ ++ gc = apc->ac->gdma_dev->gdma_context; + + old_prog = mana_xdp_get(apc); + + if (!old_prog && !prog) + return 0; + +- buf_max = XDP_PACKET_HEADROOM + mana_xdp_fraglen(ndev->mtu + ETH_HLEN); +- if (prog && buf_max > PAGE_SIZE) { +- netdev_err(ndev, "XDP: mtu:%u too large, buf_max:%u\n", +- ndev->mtu, buf_max); ++ if (prog && ndev->mtu > MANA_XDP_MTU_MAX) { ++ netdev_err(ndev, "XDP: mtu:%u too large, mtu_max:%lu\n", ++ ndev->mtu, MANA_XDP_MTU_MAX); + NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large"); + + return -EOPNOTSUPP; +@@ -206,6 +201,11 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, + if (apc->port_is_up) + mana_chn_setxdp(apc, prog); + ++ if (prog) ++ ndev->max_mtu = MANA_XDP_MTU_MAX; ++ else ++ ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; ++ + return 0; + } + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index afbbe447de1d..34fa5c758b28 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -427,6 +427,192 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, + return txq; + } + ++/* Release pre-allocated RX buffers */ ++static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) ++{ ++ struct device *dev; ++ int i; ++ ++ dev = mpc->ac->gdma_dev->gdma_context->dev; ++ ++ if (!mpc->rxbufs_pre) ++ goto out1; ++ ++ if (!mpc->das_pre) ++ goto out2; ++ ++ while (mpc->rxbpre_total) { ++ i = --mpc->rxbpre_total; ++ dma_unmap_single(dev, mpc->das_pre[i], mpc->rxbpre_datasize, ++ DMA_FROM_DEVICE); ++ put_page(virt_to_head_page(mpc->rxbufs_pre[i])); ++ } ++ ++ kfree(mpc->das_pre); ++ mpc->das_pre = NULL; ++ ++out2: ++ kfree(mpc->rxbufs_pre); ++ mpc->rxbufs_pre = NULL; ++ ++out1: ++ mpc->rxbpre_datasize = 0; ++ mpc->rxbpre_alloc_size = 0; ++ mpc->rxbpre_headroom = 0; ++} ++ ++/* Get a buffer from the pre-allocated RX buffers */ ++static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) ++{ ++ struct net_device *ndev = rxq->ndev; ++ struct mana_port_context *mpc; ++ void *va; ++ ++ mpc = netdev_priv(ndev); ++ ++ if (!mpc->rxbufs_pre || !mpc->das_pre || !mpc->rxbpre_total) { ++ netdev_err(ndev, "No RX pre-allocated bufs\n"); ++ return NULL; ++ } ++ ++ /* Check sizes to catch unexpected coding error */ ++ if (mpc->rxbpre_datasize != rxq->datasize) { ++ netdev_err(ndev, "rxbpre_datasize mismatch: %u: %u\n", ++ mpc->rxbpre_datasize, rxq->datasize); ++ return NULL; ++ } ++ ++ if (mpc->rxbpre_alloc_size != rxq->alloc_size) { ++ netdev_err(ndev, "rxbpre_alloc_size mismatch: %u: %u\n", ++ mpc->rxbpre_alloc_size, rxq->alloc_size); ++ return NULL; ++ } ++ ++ if (mpc->rxbpre_headroom != rxq->headroom) { ++ netdev_err(ndev, "rxbpre_headroom mismatch: %u: %u\n", ++ mpc->rxbpre_headroom, rxq->headroom); ++ return NULL; ++ } ++ ++ mpc->rxbpre_total--; ++ ++ *da = mpc->das_pre[mpc->rxbpre_total]; ++ va = mpc->rxbufs_pre[mpc->rxbpre_total]; ++ mpc->rxbufs_pre[mpc->rxbpre_total] = NULL; ++ ++ /* Deallocate the array after all buffers are gone */ ++ if (!mpc->rxbpre_total) ++ mana_pre_dealloc_rxbufs(mpc); ++ ++ return va; ++} ++ ++/* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ ++static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, ++ u32 *headroom) ++{ ++ if (mtu > MANA_XDP_MTU_MAX) ++ *headroom = 0; /* no support for XDP */ ++ else ++ *headroom = XDP_PACKET_HEADROOM; ++ ++ *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; ++ ++ *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); ++} ++ ++static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) ++{ ++ struct device *dev; ++ struct page *page; ++ dma_addr_t da; ++ int num_rxb; ++ void *va; ++ int i; ++ ++ mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize, ++ &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom); ++ ++ dev = mpc->ac->gdma_dev->gdma_context->dev; ++ ++ num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE; ++ ++ WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); ++ mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); ++ if (!mpc->rxbufs_pre) ++ goto error; ++ ++ mpc->das_pre = kmalloc_array(num_rxb, sizeof(dma_addr_t), GFP_KERNEL); ++ if (!mpc->das_pre) ++ goto error; ++ ++ mpc->rxbpre_total = 0; ++ ++ for (i = 0; i < num_rxb; i++) { ++ if (mpc->rxbpre_alloc_size > PAGE_SIZE) { ++ va = netdev_alloc_frag(mpc->rxbpre_alloc_size); ++ if (!va) ++ goto error; ++ } else { ++ page = dev_alloc_page(); ++ if (!page) ++ goto error; ++ ++ va = page_to_virt(page); ++ } ++ ++ da = dma_map_single(dev, va + mpc->rxbpre_headroom, ++ mpc->rxbpre_datasize, DMA_FROM_DEVICE); ++ ++ if (dma_mapping_error(dev, da)) { ++ put_page(virt_to_head_page(va)); ++ goto error; ++ } ++ ++ mpc->rxbufs_pre[i] = va; ++ mpc->das_pre[i] = da; ++ mpc->rxbpre_total = i + 1; ++ } ++ ++ return 0; ++ ++error: ++ mana_pre_dealloc_rxbufs(mpc); ++ return -ENOMEM; ++} ++ ++static int mana_change_mtu(struct net_device *ndev, int new_mtu) ++{ ++ struct mana_port_context *mpc = netdev_priv(ndev); ++ unsigned int old_mtu = ndev->mtu; ++ int err; ++ ++ /* Pre-allocate buffers to prevent failure in mana_attach later */ ++ err = mana_pre_alloc_rxbufs(mpc, new_mtu); ++ if (err) { ++ netdev_err(ndev, "Insufficient memory for new MTU\n"); ++ return err; ++ } ++ ++ err = mana_detach(ndev, false); ++ if (err) { ++ netdev_err(ndev, "mana_detach failed: %d\n", err); ++ goto out; ++ } ++ ++ ndev->mtu = new_mtu; ++ ++ err = mana_attach(ndev); ++ if (err) { ++ netdev_err(ndev, "mana_attach failed: %d\n", err); ++ ndev->mtu = old_mtu; ++ } ++ ++out: ++ mana_pre_dealloc_rxbufs(mpc); ++ return err; ++} ++ + static const struct net_device_ops mana_devops = { + .ndo_open = mana_open, + .ndo_stop = mana_close, +@@ -436,6 +622,7 @@ static const struct net_device_ops mana_devops = { + .ndo_get_stats64 = mana_get_stats64, + .ndo_bpf = mana_bpf, + .ndo_xdp_xmit = mana_xdp_xmit, ++ .ndo_change_mtu = mana_change_mtu, + }; + + static void mana_cleanup_port_context(struct mana_port_context *apc) +@@ -625,6 +812,9 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, + sizeof(req), sizeof(resp)); ++ ++ req.hdr.resp.msg_version = GDMA_MESSAGE_V2; ++ + req.proto_major_ver = proto_major_ver; + req.proto_minor_ver = proto_minor_ver; + req.proto_micro_ver = proto_micro_ver; +@@ -647,6 +837,11 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, + + *max_num_vports = resp.max_num_vports; + ++ if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2) ++ gc->adapter_mtu = resp.adapter_mtu; ++ else ++ gc->adapter_mtu = ETH_FRAME_LEN; ++ + return 0; + } + +@@ -1712,10 +1907,14 @@ static void mana_destroy_rxq(struct mana_port_context *apc, + static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, + struct mana_rxq *rxq, struct device *dev) + { ++ struct mana_port_context *mpc = netdev_priv(rxq->ndev); + dma_addr_t da; + void *va; + +- va = mana_get_rxfrag(rxq, dev, &da, false); ++ if (mpc->rxbufs_pre) ++ va = mana_get_rxbuf_pre(rxq, &da); ++ else ++ va = mana_get_rxfrag(rxq, dev, &da, false); + + if (!va) + return -ENOMEM; +@@ -1797,7 +1996,6 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; +- unsigned int mtu = ndev->mtu; + struct gdma_queue_spec spec; + struct mana_cq *cq = NULL; + struct gdma_context *gc; +@@ -1817,15 +2015,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + rxq->rxq_idx = rxq_idx; + rxq->rxobj = INVALID_MANA_HANDLE; + +- rxq->datasize = ALIGN(mtu + ETH_HLEN, 64); +- +- if (mtu > MANA_XDP_MTU_MAX) { +- rxq->alloc_size = mtu + MANA_RXBUF_PAD; +- rxq->headroom = 0; +- } else { +- rxq->alloc_size = mtu + MANA_RXBUF_PAD + XDP_PACKET_HEADROOM; +- rxq->headroom = XDP_PACKET_HEADROOM; +- } ++ mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, ++ &rxq->headroom); + + err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); + if (err) +@@ -2238,8 +2429,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, + ndev->netdev_ops = &mana_devops; + ndev->ethtool_ops = &mana_ethtool_ops; + ndev->mtu = ETH_DATA_LEN; +- ndev->max_mtu = ndev->mtu; +- ndev->min_mtu = ndev->mtu; ++ ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; ++ ndev->min_mtu = ETH_MIN_MTU; + ndev->needed_headroom = MANA_HEADROOM; + ndev->dev_port = port_idx; + SET_NETDEV_DEV(ndev, gc->dev); +diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h +index aabc7cea8a49..b27bf852471b 100644 +--- a/include/net/mana/gdma.h ++++ b/include/net/mana/gdma.h +@@ -143,6 +143,7 @@ struct gdma_general_req { + }; /* HW DATA */ + + #define GDMA_MESSAGE_V1 1 ++#define GDMA_MESSAGE_V2 2 + + struct gdma_general_resp { + struct gdma_resp_hdr hdr; +@@ -352,6 +353,9 @@ struct gdma_context { + struct gdma_resource msix_resource; + struct gdma_irq_context *irq_contexts; + ++ /* L2 MTU */ ++ u16 adapter_mtu; ++ + /* This maps a CQ index to the queue structure. */ + unsigned int max_num_cqs; + struct gdma_queue **cq_table; +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 42795849d68c..4f19d73b66ae 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -37,6 +37,7 @@ enum TRI_STATE { + #define COMP_ENTRY_SIZE 64 + + #define RX_BUFFERS_PER_QUEUE 512 ++#define MANA_RX_DATA_ALIGN 64 + + #define MAX_SEND_BUFFERS_PER_QUEUE 256 + +@@ -390,6 +391,14 @@ struct mana_port_context { + /* This points to an array of num_queues of RQ pointers. */ + struct mana_rxq **rxqs; + ++ /* pre-allocated rx buffer array */ ++ void **rxbufs_pre; ++ dma_addr_t *das_pre; ++ int rxbpre_total; ++ u32 rxbpre_datasize; ++ u32 rxbpre_alloc_size; ++ u32 rxbpre_headroom; ++ + struct bpf_prog *bpf_prog; + + /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ +@@ -486,6 +495,11 @@ struct mana_query_device_cfg_resp { + u16 max_num_vports; + u16 reserved; + u32 max_num_eqs; ++ ++ /* response v2: */ ++ u16 adapter_mtu; ++ u16 reserved2; ++ u32 reserved3; + }; /* HW DATA */ + + /* Query vPort Configuration */ +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0021-net-mana-Check-if-netdev-napi_alloc_frag-returns-sin.patch b/debian/patches/features/all/ethernet-microsoft/0021-net-mana-Check-if-netdev-napi_alloc_frag-returns-sin.patch new file mode 100644 index 000000000..33ca4257b --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0021-net-mana-Check-if-netdev-napi_alloc_frag-returns-sin.patch @@ -0,0 +1,54 @@ +From e1541b0cdf147beef46101f2b9fa8aeb9793bd8d Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Fri, 21 Apr 2023 10:06:58 -0700 +Subject: [PATCH 21/23] net: mana: Check if netdev/napi_alloc_frag returns + single page + +netdev/napi_alloc_frag() may fall back to single page which is smaller +than the requested size. +Add error checking to avoid memory overwritten. + +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +(cherry picked from commit df18f2da302f169e1a29098c6ca3b474f1b0269e) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 34fa5c758b28..fc19e62c9c84 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -553,6 +553,14 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) + va = netdev_alloc_frag(mpc->rxbpre_alloc_size); + if (!va) + goto error; ++ ++ page = virt_to_head_page(va); ++ /* Check if the frag falls back to single page */ ++ if (compound_order(page) < ++ get_order(mpc->rxbpre_alloc_size)) { ++ put_page(page); ++ goto error; ++ } + } else { + page = dev_alloc_page(); + if (!page) +@@ -1505,6 +1513,13 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, + + if (!va) + return NULL; ++ ++ page = virt_to_head_page(va); ++ /* Check if the frag falls back to single page */ ++ if (compound_order(page) < get_order(rxq->alloc_size)) { ++ put_page(page); ++ return NULL; ++ } + } else { + page = dev_alloc_page(); + if (!page) +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0022-net-mana-Fix-perf-regression-remove-rx_cqes-tx_cqes-.patch b/debian/patches/features/all/ethernet-microsoft/0022-net-mana-Fix-perf-regression-remove-rx_cqes-tx_cqes-.patch new file mode 100644 index 000000000..21a354cc8 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0022-net-mana-Fix-perf-regression-remove-rx_cqes-tx_cqes-.patch @@ -0,0 +1,121 @@ +From a6967df15a72ffd53963492b8cf52438cf0c27f7 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Fri, 26 May 2023 08:38:57 -0700 +Subject: [PATCH 22/23] net: mana: Fix perf regression: remove rx_cqes, tx_cqes + counters + +The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the +frequent and parallel code path of all queues. So, r/w into this +single shared variable by many threads on different CPUs creates a +lot caching and memory overhead, hence perf regression. And, it's +not accurate due to the high volume concurrent r/w. + +For example, a workload is iperf with 128 threads, and with RPS +enabled. We saw perf regression of 25% with the previous patch +adding the counters. And this patch eliminates the regression. + +Since the error path of mana_poll_rx_cq() already has warnings, so +keeping the counter and convert it to a per-queue variable is not +necessary. So, just remove this counter from this high frequency +code path. + +Also, remove the tx_cqes counter for the same reason. We have +warnings & other counters for errors on that path, and don't need +to count every normal cqe processing. + +Cc: stable@vger.kernel.org +Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting") +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> +Reviewed-by: Jiri Pirko <jiri@nvidia.com> +Link: https://lore.kernel.org/r/1685115537-31675-1-git-send-email-haiyangz@microsoft.com +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +(cherry picked from commit 1919b39fc6eabb9a6f9a51706ff6d03865f5df29) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ---------- + drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 2 -- + include/net/mana/mana.h | 2 -- + 3 files changed, 14 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index fc19e62c9c84..25b17213e54d 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1280,8 +1280,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq) + if (comp_read < 1) + return; + +- apc->eth_stats.tx_cqes = comp_read; +- + for (i = 0; i < comp_read; i++) { + struct mana_tx_comp_oob *cqe_oob; + +@@ -1364,8 +1362,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq) + WARN_ON_ONCE(1); + + cq->work_done = pkt_transmitted; +- +- apc->eth_stats.tx_cqes -= pkt_transmitted; + } + + static void mana_post_pkt_rxq(struct mana_rxq *rxq) +@@ -1629,15 +1625,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) + { + struct gdma_comp *comp = cq->gdma_comp_buf; + struct mana_rxq *rxq = cq->rxq; +- struct mana_port_context *apc; + int comp_read, i; + +- apc = netdev_priv(rxq->ndev); +- + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + +- apc->eth_stats.rx_cqes = comp_read; + rxq->xdp_flush = false; + + for (i = 0; i < comp_read; i++) { +@@ -1649,8 +1641,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) + return; + + mana_process_rx_cqe(rxq, cq, &comp[i]); +- +- apc->eth_stats.rx_cqes--; + } + + if (rxq->xdp_flush) +diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +index a64c81410dc1..0dc78679f620 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +@@ -13,11 +13,9 @@ static const struct { + } mana_eth_stats[] = { + {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, + {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, +- {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)}, + {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, + {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, + tx_cqe_unknown_type)}, +- {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)}, + {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, + rx_coalesced_err)}, + {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 4f19d73b66ae..3abfce420594 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -347,10 +347,8 @@ struct mana_tx_qp { + struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; +- u64 tx_cqes; + u64 tx_cqe_err; + u64 tx_cqe_unknown_type; +- u64 rx_cqes; + u64 rx_coalesced_err; + u64 rx_cqe_unknown_type; + }; +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0023-net-mana-Add-support-for-vlan-tagging.patch b/debian/patches/features/all/ethernet-microsoft/0023-net-mana-Add-support-for-vlan-tagging.patch new file mode 100644 index 000000000..74866c088 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0023-net-mana-Add-support-for-vlan-tagging.patch @@ -0,0 +1,67 @@ +From dc801500fea4fd5ddecc4007acffb5716a761f51 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Fri, 9 Jun 2023 05:47:17 -0700 +Subject: [PATCH 23/23] net: mana: Add support for vlan tagging + +To support vlan, use MANA_LONG_PKT_FMT if vlan tag is present in TX +skb. Then extract the vlan tag from the skb struct, and save it to +tx_oob for the NIC to transmit. For vlan tags on the payload, they +are accepted by the NIC too. + +For RX, extract the vlan tag from CQE and put it into skb. + +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit b803d1fded4085d268507a432dac8077ead68971) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 25b17213e54d..808dbf55beef 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -179,6 +179,14 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) + pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; + } + ++ if (skb_vlan_tag_present(skb)) { ++ pkt_fmt = MANA_LONG_PKT_FMT; ++ pkg.tx_oob.l_oob.inject_vlan_pri_tag = 1; ++ pkg.tx_oob.l_oob.pcp = skb_vlan_tag_get_prio(skb); ++ pkg.tx_oob.l_oob.dei = skb_vlan_tag_get_cfi(skb); ++ pkg.tx_oob.l_oob.vlan_id = skb_vlan_tag_get_id(skb); ++ } ++ + pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; + + if (pkt_fmt == MANA_SHORT_PKT_FMT) { +@@ -1458,6 +1466,12 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, + skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); + } + ++ if (cqe->rx_vlantag_present) { ++ u16 vlan_tci = cqe->rx_vlan_id; ++ ++ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); ++ } ++ + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; +@@ -2454,8 +2468,9 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, + ndev->hw_features |= NETIF_F_RXCSUM; + ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->hw_features |= NETIF_F_RXHASH; +- ndev->features = ndev->hw_features; +- ndev->vlan_features = 0; ++ ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | ++ NETIF_F_HW_VLAN_CTAG_RX; ++ ndev->vlan_features = ndev->features; + + err = register_netdev(ndev); + if (err) { +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0024-RDMA-mana_ib-Use-v2-version-of-cfg_rx_steer_req-to-e.patch b/debian/patches/features/all/ethernet-microsoft/0024-RDMA-mana_ib-Use-v2-version-of-cfg_rx_steer_req-to-e.patch new file mode 100644 index 000000000..fb8020cdb --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0024-RDMA-mana_ib-Use-v2-version-of-cfg_rx_steer_req-to-e.patch @@ -0,0 +1,78 @@ +From 672aac0622254a8f1f5f9d2aa8cee2319654d083 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Sat, 13 May 2023 23:18:15 -0700 +Subject: [PATCH 24/30] RDMA/mana_ib: Use v2 version of cfg_rx_steer_req to + enable RX coalescing + +With RX coalescing, one CQE entry can be used to indicate multiple packets +on the receive queue. This saves processing time and PCI bandwidth over +the CQ. + +The MANA Ethernet driver also uses the v2 version of the protocol. It +doesn't use RX coalescing and its behavior is not changed. + +Link: https://lore.kernel.org/r/1684045095-31228-1-git-send-email-longli@linuxonhyperv.com +Signed-off-by: Long Li <longli@microsoft.com> +Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> +(cherry picked from commit 2145328515c8fa9b8a9f7889250bc6c032f2a0e6) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 5 ++++- + include/net/mana/mana.h | 4 +++- + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 808dbf55beef..7d82b8949a1e 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -981,7 +981,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, + bool update_tab) + { + u16 num_entries = MANA_INDIRECT_TABLE_SIZE; +- struct mana_cfg_rx_steer_req *req = NULL; ++ struct mana_cfg_rx_steer_req_v2 *req; + struct mana_cfg_rx_steer_resp resp = {}; + struct net_device *ndev = apc->ndev; + mana_handle_t *req_indir_tab; +@@ -996,6 +996,8 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, + mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, + sizeof(resp)); + ++ req->hdr.req.msg_version = GDMA_MESSAGE_V2; ++ + req->vport = apc->port_handle; + req->num_indir_entries = num_entries; + req->indir_tab_offset = sizeof(*req); +@@ -1005,6 +1007,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, + req->update_hashkey = update_key; + req->update_indir_tab = update_tab; + req->default_rxobj = apc->default_rxobj; ++ req->cqe_coalescing_enable = 0; + + if (update_key) + memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 3abfce420594..46b5769a5b6a 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -576,7 +576,7 @@ struct mana_fence_rq_resp { + }; /* HW DATA */ + + /* Configure vPort Rx Steering */ +-struct mana_cfg_rx_steer_req { ++struct mana_cfg_rx_steer_req_v2 { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u16 num_indir_entries; +@@ -589,6 +589,8 @@ struct mana_cfg_rx_steer_req { + u8 reserved; + mana_handle_t default_rxobj; + u8 hashkey[MANA_HASH_KEY_SIZE]; ++ u8 cqe_coalescing_enable; ++ u8 reserved2[7]; + }; /* HW DATA */ + + struct mana_cfg_rx_steer_resp { +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0025-net-mana-use-vmalloc_array-and-vcalloc.patch b/debian/patches/features/all/ethernet-microsoft/0025-net-mana-use-vmalloc_array-and-vcalloc.patch new file mode 100644 index 000000000..348583771 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0025-net-mana-use-vmalloc_array-and-vcalloc.patch @@ -0,0 +1,75 @@ +From 4a202b0bfd1b2f3509361e5d3f6040f7af90c2ec Mon Sep 17 00:00:00 2001 +From: Julia Lawall <Julia.Lawall@inria.fr> +Date: Tue, 27 Jun 2023 16:43:37 +0200 +Subject: [PATCH 25/30] net: mana: use vmalloc_array and vcalloc + +Use vmalloc_array and vcalloc to protect against +multiplication overflows. + +The changes were done using the following Coccinelle +semantic patch: + +// <smpl> +@initialize:ocaml@ +@@ + +let rename alloc = + match alloc with + "vmalloc" -> "vmalloc_array" + | "vzalloc" -> "vcalloc" + | _ -> failwith "unknown" + +@@ + size_t e1,e2; + constant C1, C2; + expression E1, E2, COUNT, x1, x2, x3; + typedef u8; + typedef __u8; + type t = {u8,__u8,char,unsigned char}; + identifier alloc = {vmalloc,vzalloc}; + fresh identifier realloc = script:ocaml(alloc) { rename alloc }; +@@ + +( + alloc(x1*x2*x3) +| + alloc(C1 * C2) +| + alloc((sizeof(t)) * (COUNT), ...) +| +- alloc((e1) * (e2)) ++ realloc(e1, e2) +| +- alloc((e1) * (COUNT)) ++ realloc(COUNT, e1) +| +- alloc((E1) * (E2)) ++ realloc(E1, E2) +) +// </smpl> + +Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr> +Link: https://lore.kernel.org/r/20230627144339.144478-23-Julia.Lawall@inria.fr +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +(cherry picked from commit e9c74f8b8a31f77f8e9d7bbed5fc9f2eacbf32a5) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +index 9d1507eba5b9..2bd1d74021f7 100644 +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -627,7 +627,7 @@ static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth, + if (WARN_ON(cq->id >= gc->max_num_cqs)) + return -EPROTO; + +- gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *)); ++ gc->cq_table = vcalloc(gc->max_num_cqs, sizeof(struct gdma_queue *)); + if (!gc->cq_table) + return -ENOMEM; + +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0026-net-mana-Batch-ringing-RX-queue-doorbell-on-receivin.patch b/debian/patches/features/all/ethernet-microsoft/0026-net-mana-Batch-ringing-RX-queue-doorbell-on-receivin.patch new file mode 100644 index 000000000..6f03091cd --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0026-net-mana-Batch-ringing-RX-queue-doorbell-on-receivin.patch @@ -0,0 +1,62 @@ +From ad96bbb0b221f0cc22fba13a28234636bf7fb9f5 Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Mon, 17 Jul 2023 12:35:38 -0700 +Subject: [PATCH 26/30] net: mana: Batch ringing RX queue doorbell on receiving + packets + +It's inefficient to ring the doorbell page every time a WQE is posted to +the received queue. Excessive MMIO writes result in CPU spending more +time waiting on LOCK instructions (atomic operations), resulting in +poor scaling performance. + +Move the code for ringing doorbell page to where after we have posted all +WQEs to the receive queue during a callback from napi_poll(). + +With this change, tests showed an improvement from 120G/s to 160G/s on a +200G physical link, with 16 or 32 hardware queues. + +Tests showed no regression in network latency benchmarks on single +connection. + +Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1689622539-5334-2-git-send-email-longli@linuxonhyperv.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +(cherry picked from commit da4e8648079eb6f26f3a88d8c34270a057e2bfe6) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 7d82b8949a1e..75c1f81ce64f 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1387,8 +1387,8 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq) + + recv_buf_oob = &rxq->rx_oobs[curr_index]; + +- err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req, +- &recv_buf_oob->wqe_inf); ++ err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req, ++ &recv_buf_oob->wqe_inf); + if (WARN_ON_ONCE(err)) + return; + +@@ -1660,6 +1660,12 @@ static void mana_poll_rx_cq(struct mana_cq *cq) + mana_process_rx_cqe(rxq, cq, &comp[i]); + } + ++ if (comp_read > 0) { ++ struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; ++ ++ mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); ++ } ++ + if (rxq->xdp_flush) + xdp_do_flush(); + } +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0027-net-mana-Use-the-correct-WQE-count-for-ringing-RQ-do.patch b/debian/patches/features/all/ethernet-microsoft/0027-net-mana-Use-the-correct-WQE-count-for-ringing-RQ-do.patch new file mode 100644 index 000000000..d0bb113fc --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0027-net-mana-Use-the-correct-WQE-count-for-ringing-RQ-do.patch @@ -0,0 +1,41 @@ +From ac7f97738c05e80f7de5e4ccbbf1ea82fdad956c Mon Sep 17 00:00:00 2001 +From: Long Li <longli@microsoft.com> +Date: Mon, 17 Jul 2023 12:35:39 -0700 +Subject: [PATCH 27/30] net: mana: Use the correct WQE count for ringing RQ + doorbell + +The hardware specification specifies that WQE_COUNT should set to 0 for +the Receive Queue. Although currently the hardware doesn't enforce the +check, in the future releases it may check on this value. + +Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> +Reviewed-by: Dexuan Cui <decui@microsoft.com> +Signed-off-by: Long Li <longli@microsoft.com> +Link: https://lore.kernel.org/r/1689622539-5334-3-git-send-email-longli@linuxonhyperv.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +(cherry picked from commit f5e39b57124fd4715d7f0e2f841b8609b38f3e40) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index 97a1845c676a..6108a481edcb 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -300,8 +300,11 @@ static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index, + + void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) + { ++ /* Hardware Spec specifies that software client should set 0 for ++ * wqe_cnt for Receive Queues. This value is not used in Send Queues. ++ */ + mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, +- queue->id, queue->head * GDMA_WQE_BU_SIZE, 1); ++ queue->id, queue->head * GDMA_WQE_BU_SIZE, 0); + } + + void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0028-net-mana-Configure-hwc-timeout-from-hardware.patch b/debian/patches/features/all/ethernet-microsoft/0028-net-mana-Configure-hwc-timeout-from-hardware.patch new file mode 100644 index 000000000..b5c24df16 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0028-net-mana-Configure-hwc-timeout-from-hardware.patch @@ -0,0 +1,217 @@ +From 4c91517cbcdd16c2a9aee0e21066f2f1aa193ece Mon Sep 17 00:00:00 2001 +From: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com> +Date: Wed, 2 Aug 2023 04:07:40 -0700 +Subject: [PATCH 28/30] net: mana: Configure hwc timeout from hardware + +At present hwc timeout value is a fixed value. This patch sets the hwc +timeout from the hardware. It now uses a new hardware capability +GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG to query and set the value +in hwc_timeout. + +Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com> +Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit 62c1bff593b7e30041d0273b835af9fd6f5ee737) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + .../net/ethernet/microsoft/mana/gdma_main.c | 30 ++++++++++++++++++- + .../net/ethernet/microsoft/mana/hw_channel.c | 24 ++++++++++++++- + include/net/mana/gdma.h | 20 ++++++++++++- + include/net/mana/hw_channel.h | 5 ++++ + 4 files changed, 76 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index 6108a481edcb..4fa1901a2789 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -106,6 +106,25 @@ static int mana_gd_query_max_resources(struct pci_dev *pdev) + return 0; + } + ++static int mana_gd_query_hwc_timeout(struct pci_dev *pdev, u32 *timeout_val) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ struct gdma_query_hwc_timeout_resp resp = {}; ++ struct gdma_query_hwc_timeout_req req = {}; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_HWC_TIMEOUT, ++ sizeof(req), sizeof(resp)); ++ req.timeout_ms = *timeout_val; ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) ++ return err ? err : -EPROTO; ++ ++ *timeout_val = resp.timeout_ms; ++ ++ return 0; ++} ++ + static int mana_gd_detect_devices(struct pci_dev *pdev) + { + struct gdma_context *gc = pci_get_drvdata(pdev); +@@ -883,8 +902,10 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev) + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_verify_ver_resp resp = {}; + struct gdma_verify_ver_req req = {}; ++ struct hw_channel_context *hwc; + int err; + ++ hwc = gc->hwc.driver_data; + mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION, + sizeof(req), sizeof(resp)); + +@@ -911,7 +932,14 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev) + err, resp.hdr.status); + return err ? err : -EPROTO; + } +- ++ if (resp.pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) { ++ err = mana_gd_query_hwc_timeout(pdev, &hwc->hwc_timeout); ++ if (err) { ++ dev_err(gc->dev, "Failed to set the hwc timeout %d\n", err); ++ return err; ++ } ++ dev_dbg(gc->dev, "set the hwc timeout to %u\n", hwc->hwc_timeout); ++ } + return 0; + } + +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +index 2bd1d74021f7..9d1cd3bfcf66 100644 +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -174,7 +174,25 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, + complete(&hwc->hwc_init_eqe_comp); + break; + ++ case GDMA_EQE_HWC_SOC_RECONFIG_DATA: ++ type_data.as_uint32 = event->details[0]; ++ type = type_data.type; ++ val = type_data.value; ++ ++ switch (type) { ++ case HWC_DATA_CFG_HWC_TIMEOUT: ++ hwc->hwc_timeout = val; ++ break; ++ ++ default: ++ dev_warn(hwc->dev, "Received unknown reconfig type %u\n", type); ++ break; ++ } ++ ++ break; ++ + default: ++ dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type); + /* Ignore unknown events, which should never happen. */ + break; + } +@@ -696,6 +714,7 @@ int mana_hwc_create_channel(struct gdma_context *gc) + gd->driver_data = hwc; + hwc->gdma_dev = gd; + hwc->dev = gc->dev; ++ hwc->hwc_timeout = HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS; + + /* HWC's instance number is always 0. */ + gd->dev_id.as_uint32 = 0; +@@ -770,6 +789,8 @@ void mana_hwc_destroy_channel(struct gdma_context *gc) + hwc->gdma_dev->doorbell = INVALID_DOORBELL; + hwc->gdma_dev->pdid = INVALID_PDID; + ++ hwc->hwc_timeout = 0; ++ + kfree(hwc); + gc->hwc.driver_data = NULL; + gc->hwc.gdma_context = NULL; +@@ -825,7 +846,8 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + goto out; + } + +- if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) { ++ if (!wait_for_completion_timeout(&ctx->comp_event, ++ (msecs_to_jiffies(hwc->hwc_timeout) * HZ))) { + dev_err(hwc->dev, "HWC: Request timed out!\n"); + err = -ETIMEDOUT; + goto out; +diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h +index b27bf852471b..102bb73c7a4f 100644 +--- a/include/net/mana/gdma.h ++++ b/include/net/mana/gdma.h +@@ -31,6 +31,7 @@ enum gdma_request_type { + GDMA_DESTROY_PD = 30, + GDMA_CREATE_MR = 31, + GDMA_DESTROY_MR = 32, ++ GDMA_QUERY_HWC_TIMEOUT = 84, /* 0x54 */ + }; + + enum gdma_queue_type { +@@ -53,6 +54,8 @@ enum gdma_eqe_type { + GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, + GDMA_EQE_HWC_INIT_DATA = 130, + GDMA_EQE_HWC_INIT_DONE = 131, ++ GDMA_EQE_HWC_SOC_RECONFIG = 132, ++ GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + }; + + enum { +@@ -529,10 +532,12 @@ enum { + * so the driver is able to reliably support features like busy_poll. + */ + #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) ++#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) + + #define GDMA_DRV_CAP_FLAGS1 \ + (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ +- GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX) ++ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ ++ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) + + #define GDMA_DRV_CAP_FLAGS2 0 + +@@ -642,6 +647,19 @@ struct gdma_disable_queue_req { + u32 alloc_res_id_on_creation; + }; /* HW DATA */ + ++/* GDMA_QUERY_HWC_TIMEOUT */ ++struct gdma_query_hwc_timeout_req { ++ struct gdma_req_hdr hdr; ++ u32 timeout_ms; ++ u32 reserved; ++}; ++ ++struct gdma_query_hwc_timeout_resp { ++ struct gdma_resp_hdr hdr; ++ u32 timeout_ms; ++ u32 reserved; ++}; ++ + enum atb_page_size { + ATB_PAGE_SIZE_4K, + ATB_PAGE_SIZE_8K, +diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h +index 6a757a6e2732..3d3b5c881bc1 100644 +--- a/include/net/mana/hw_channel.h ++++ b/include/net/mana/hw_channel.h +@@ -23,6 +23,10 @@ + #define HWC_INIT_DATA_PF_DEST_RQ_ID 10 + #define HWC_INIT_DATA_PF_DEST_CQ_ID 11 + ++#define HWC_DATA_CFG_HWC_TIMEOUT 1 ++ ++#define HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS 30000 ++ + /* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ +@@ -182,6 +186,7 @@ struct hw_channel_context { + + u32 pf_dest_vrq_id; + u32 pf_dest_vrcq_id; ++ u32 hwc_timeout; + + struct hwc_caller_ctx *caller_ctx; + }; +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0029-net-mana-Rename-mana_refill_rxoob-and-remove-some-em.patch b/debian/patches/features/all/ethernet-microsoft/0029-net-mana-Rename-mana_refill_rxoob-and-remove-some-em.patch new file mode 100644 index 000000000..a9f8249bd --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0029-net-mana-Rename-mana_refill_rxoob-and-remove-some-em.patch @@ -0,0 +1,67 @@ +From 609fba0ab4ebdde830c8c52ef0dcc2ea8f5e82fe Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang <haiyangz@microsoft.com> +Date: Fri, 21 Apr 2023 10:06:57 -0700 +Subject: [PATCH 29/30] net: mana: Rename mana_refill_rxoob and remove some + empty lines + +Rename mana_refill_rxoob for naming consistency. +And remove some empty lines between function call and error +checking. + +Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +(cherry picked from commit 5c74064f43c291d9add2b436a2d70205b71a7cc7) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 75c1f81ce64f..6aa273b68327 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -579,7 +579,6 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) + + da = dma_map_single(dev, va + mpc->rxbpre_headroom, + mpc->rxbpre_datasize, DMA_FROM_DEVICE); +- + if (dma_mapping_error(dev, da)) { + put_page(virt_to_head_page(va)); + goto error; +@@ -1543,7 +1542,6 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, + + *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, + DMA_FROM_DEVICE); +- + if (dma_mapping_error(dev, *da)) { + put_page(virt_to_head_page(va)); + return NULL; +@@ -1553,14 +1551,13 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, + } + + /* Allocate frag for rx buffer, and save the old buf */ +-static void mana_refill_rxoob(struct device *dev, struct mana_rxq *rxq, +- struct mana_recv_buf_oob *rxoob, void **old_buf) ++static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, ++ struct mana_recv_buf_oob *rxoob, void **old_buf) + { + dma_addr_t da; + void *va; + + va = mana_get_rxfrag(rxq, dev, &da, true); +- + if (!va) + return; + +@@ -1625,7 +1622,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + rxbuf_oob = &rxq->rx_oobs[curr]; + WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); + +- mana_refill_rxoob(dev, rxq, rxbuf_oob, &old_buf); ++ mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf); + + /* Unsuccessful refill will have old_buf == NULL. + * In this case, mana_rx_skb() will drop the packet. +-- +2.40.1 + diff --git a/debian/patches/features/all/ethernet-microsoft/0030-net-mana-Add-gdma-stats-to-ethtool-output-for-mana.patch b/debian/patches/features/all/ethernet-microsoft/0030-net-mana-Add-gdma-stats-to-ethtool-output-for-mana.patch new file mode 100644 index 000000000..07caff6a9 --- /dev/null +++ b/debian/patches/features/all/ethernet-microsoft/0030-net-mana-Add-gdma-stats-to-ethtool-output-for-mana.patch @@ -0,0 +1,232 @@ +From b7a5d522d26c84055bcc410b51b8cefc64fcdbca Mon Sep 17 00:00:00 2001 +From: Shradha Gupta <shradhagupta@linux.microsoft.com> +Date: Wed, 9 Aug 2023 21:15:22 -0700 +Subject: [PATCH 30/30] net: mana: Add gdma stats to ethtool output for mana + +Extended performance counter stats in 'ethtool -S <interface>' +for MANA VF to include GDMA tx LSO packets and bytes count. + +Tested-on: Ubuntu22 +Testcases: +1. LISA testcase: +PERF-NETWORK-TCP-THROUGHPUT-MULTICONNECTION-NTTTCP-Synthetic +2. LISA testcase: +PERF-NETWORK-TCP-THROUGHPUT-MULTICONNECTION-NTTTCP-SRIOV +3. Validated the GDMA stat packets and byte counters +Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> +Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +(cherry picked from commit ac3899c6229649737b9d5cb86e417c98243883dc) +Signed-off-by: Bastian Blank <waldi@debian.org> +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 40 +++++++++ + .../ethernet/microsoft/mana/mana_ethtool.c | 15 ++++ + include/net/mana/mana.h | 87 +++++++++++++++++++ + 3 files changed, 142 insertions(+) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 6aa273b68327..a006bd69b26e 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -2234,6 +2234,46 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, + return 0; + } + ++void mana_query_gf_stats(struct mana_port_context *apc) ++{ ++ struct mana_query_gf_stat_resp resp = {}; ++ struct mana_query_gf_stat_req req = {}; ++ struct net_device *ndev = apc->ndev; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, ++ sizeof(req), sizeof(resp)); ++ req.req_stats = STATISTICS_FLAGS_HC_TX_BYTES | ++ STATISTICS_FLAGS_HC_TX_UCAST_PACKETS | ++ STATISTICS_FLAGS_HC_TX_UCAST_BYTES | ++ STATISTICS_FLAGS_HC_TX_MCAST_PACKETS | ++ STATISTICS_FLAGS_HC_TX_MCAST_BYTES | ++ STATISTICS_FLAGS_HC_TX_BCAST_PACKETS | ++ STATISTICS_FLAGS_HC_TX_BCAST_BYTES; ++ ++ err = mana_send_request(apc->ac, &req, sizeof(req), &resp, ++ sizeof(resp)); ++ if (err) { ++ netdev_err(ndev, "Failed to query GF stats: %d\n", err); ++ return; ++ } ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_GF_STAT, ++ sizeof(resp)); ++ if (err || resp.hdr.status) { ++ netdev_err(ndev, "Failed to query GF stats: %d, 0x%x\n", err, ++ resp.hdr.status); ++ return; ++ } ++ ++ apc->eth_stats.hc_tx_bytes = resp.hc_tx_bytes; ++ apc->eth_stats.hc_tx_ucast_pkts = resp.hc_tx_ucast_pkts; ++ apc->eth_stats.hc_tx_ucast_bytes = resp.hc_tx_ucast_bytes; ++ apc->eth_stats.hc_tx_bcast_pkts = resp.hc_tx_bcast_pkts; ++ apc->eth_stats.hc_tx_bcast_bytes = resp.hc_tx_bcast_bytes; ++ apc->eth_stats.hc_tx_mcast_pkts = resp.hc_tx_mcast_pkts; ++ apc->eth_stats.hc_tx_mcast_bytes = resp.hc_tx_mcast_bytes; ++} ++ + static int mana_init_port(struct net_device *ndev) + { + struct mana_port_context *apc = netdev_priv(ndev); +diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +index 0dc78679f620..607150165ab4 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +@@ -13,6 +13,19 @@ static const struct { + } mana_eth_stats[] = { + {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, + {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, ++ {"hc_tx_bytes", offsetof(struct mana_ethtool_stats, hc_tx_bytes)}, ++ {"hc_tx_ucast_pkts", offsetof(struct mana_ethtool_stats, ++ hc_tx_ucast_pkts)}, ++ {"hc_tx_ucast_bytes", offsetof(struct mana_ethtool_stats, ++ hc_tx_ucast_bytes)}, ++ {"hc_tx_bcast_pkts", offsetof(struct mana_ethtool_stats, ++ hc_tx_bcast_pkts)}, ++ {"hc_tx_bcast_bytes", offsetof(struct mana_ethtool_stats, ++ hc_tx_bcast_bytes)}, ++ {"hc_tx_mcast_pkts", offsetof(struct mana_ethtool_stats, ++ hc_tx_mcast_pkts)}, ++ {"hc_tx_mcast_bytes", offsetof(struct mana_ethtool_stats, ++ hc_tx_mcast_bytes)}, + {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, + {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, + tx_cqe_unknown_type)}, +@@ -114,6 +127,8 @@ static void mana_get_ethtool_stats(struct net_device *ndev, + + if (!apc->port_is_up) + return; ++ /* we call mana function to update stats from GDMA */ ++ mana_query_gf_stats(apc); + + for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) + data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 46b5769a5b6a..cc4502db2ced 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -347,6 +347,13 @@ struct mana_tx_qp { + struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; ++ u64 hc_tx_bytes; ++ u64 hc_tx_ucast_pkts; ++ u64 hc_tx_ucast_bytes; ++ u64 hc_tx_bcast_pkts; ++ u64 hc_tx_bcast_bytes; ++ u64 hc_tx_mcast_pkts; ++ u64 hc_tx_mcast_bytes; + u64 tx_cqe_err; + u64 tx_cqe_unknown_type; + u64 rx_coalesced_err; +@@ -437,6 +444,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); + void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); + int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); ++void mana_query_gf_stats(struct mana_port_context *apc); + + extern const struct ethtool_ops mana_ethtool_ops; + +@@ -575,6 +583,49 @@ struct mana_fence_rq_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + ++/* Query stats RQ */ ++struct mana_query_gf_stat_req { ++ struct gdma_req_hdr hdr; ++ u64 req_stats; ++}; /* HW DATA */ ++ ++struct mana_query_gf_stat_resp { ++ struct gdma_resp_hdr hdr; ++ u64 reported_stats; ++ /* rx errors/discards */ ++ u64 discard_rx_nowqe; ++ u64 err_rx_vport_disabled; ++ /* rx bytes/packets */ ++ u64 hc_rx_bytes; ++ u64 hc_rx_ucast_pkts; ++ u64 hc_rx_ucast_bytes; ++ u64 hc_rx_bcast_pkts; ++ u64 hc_rx_bcast_bytes; ++ u64 hc_rx_mcast_pkts; ++ u64 hc_rx_mcast_bytes; ++ /* tx errors */ ++ u64 err_tx_gf_disabled; ++ u64 err_tx_vport_disabled; ++ u64 err_tx_inval_vport_offset_pkt; ++ u64 err_tx_vlan_enforcement; ++ u64 err_tx_ethtype_enforcement; ++ u64 err_tx_SA_enforecement; ++ u64 err_tx_SQPDID_enforcement; ++ u64 err_tx_CQPDID_enforcement; ++ u64 err_tx_mtu_violation; ++ u64 err_tx_inval_oob; ++ /* tx bytes/packets */ ++ u64 hc_tx_bytes; ++ u64 hc_tx_ucast_pkts; ++ u64 hc_tx_ucast_bytes; ++ u64 hc_tx_bcast_pkts; ++ u64 hc_tx_bcast_bytes; ++ u64 hc_tx_mcast_pkts; ++ u64 hc_tx_mcast_bytes; ++ /* tx error */ ++ u64 err_tx_gdma; ++}; /* HW DATA */ ++ + /* Configure vPort Rx Steering */ + struct mana_cfg_rx_steer_req_v2 { + struct gdma_req_hdr hdr; +@@ -654,6 +705,42 @@ struct mana_deregister_filter_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + ++/* Requested GF stats Flags */ ++/* Rx discards/Errors */ ++#define STATISTICS_FLAGS_RX_DISCARDS_NO_WQE 0x0000000000000001 ++#define STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED 0x0000000000000002 ++/* Rx bytes/pkts */ ++#define STATISTICS_FLAGS_HC_RX_BYTES 0x0000000000000004 ++#define STATISTICS_FLAGS_HC_RX_UCAST_PACKETS 0x0000000000000008 ++#define STATISTICS_FLAGS_HC_RX_UCAST_BYTES 0x0000000000000010 ++#define STATISTICS_FLAGS_HC_RX_MCAST_PACKETS 0x0000000000000020 ++#define STATISTICS_FLAGS_HC_RX_MCAST_BYTES 0x0000000000000040 ++#define STATISTICS_FLAGS_HC_RX_BCAST_PACKETS 0x0000000000000080 ++#define STATISTICS_FLAGS_HC_RX_BCAST_BYTES 0x0000000000000100 ++/* Tx errors */ ++#define STATISTICS_FLAGS_TX_ERRORS_GF_DISABLED 0x0000000000000200 ++#define STATISTICS_FLAGS_TX_ERRORS_VPORT_DISABLED 0x0000000000000400 ++#define STATISTICS_FLAGS_TX_ERRORS_INVAL_VPORT_OFFSET_PACKETS \ ++ 0x0000000000000800 ++#define STATISTICS_FLAGS_TX_ERRORS_VLAN_ENFORCEMENT 0x0000000000001000 ++#define STATISTICS_FLAGS_TX_ERRORS_ETH_TYPE_ENFORCEMENT \ ++ 0x0000000000002000 ++#define STATISTICS_FLAGS_TX_ERRORS_SA_ENFORCEMENT 0x0000000000004000 ++#define STATISTICS_FLAGS_TX_ERRORS_SQPDID_ENFORCEMENT 0x0000000000008000 ++#define STATISTICS_FLAGS_TX_ERRORS_CQPDID_ENFORCEMENT 0x0000000000010000 ++#define STATISTICS_FLAGS_TX_ERRORS_MTU_VIOLATION 0x0000000000020000 ++#define STATISTICS_FLAGS_TX_ERRORS_INVALID_OOB 0x0000000000040000 ++/* Tx bytes/pkts */ ++#define STATISTICS_FLAGS_HC_TX_BYTES 0x0000000000080000 ++#define STATISTICS_FLAGS_HC_TX_UCAST_PACKETS 0x0000000000100000 ++#define STATISTICS_FLAGS_HC_TX_UCAST_BYTES 0x0000000000200000 ++#define STATISTICS_FLAGS_HC_TX_MCAST_PACKETS 0x0000000000400000 ++#define STATISTICS_FLAGS_HC_TX_MCAST_BYTES 0x0000000000800000 ++#define STATISTICS_FLAGS_HC_TX_BCAST_PACKETS 0x0000000001000000 ++#define STATISTICS_FLAGS_HC_TX_BCAST_BYTES 0x0000000002000000 ++/* Tx error */ ++#define STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR 0x0000000004000000 ++ + #define MANA_MAX_NUM_QUEUES 64 + + #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) +-- +2.40.1 + diff --git a/debian/patches/features/all/lockdown/arm64-add-kernel-config-option-to-lock-down-when.patch b/debian/patches/features/all/lockdown/arm64-add-kernel-config-option-to-lock-down-when.patch new file mode 100644 index 000000000..6f1ba8e5a --- /dev/null +++ b/debian/patches/features/all/lockdown/arm64-add-kernel-config-option-to-lock-down-when.patch @@ -0,0 +1,153 @@ +From: Linn Crosetto <linn@hpe.com> +Date: Tue, 30 Aug 2016 11:54:38 -0600 +Subject: arm64: add kernel config option to lock down when in Secure Boot mode +Bug-Debian: https://bugs.debian.org/831827 +Forwarded: no + +Add a kernel configuration option to lock down the kernel, to restrict +userspace's ability to modify the running kernel when UEFI Secure Boot is +enabled. Based on the x86 patch by Matthew Garrett. + +Determine the state of Secure Boot in the EFI stub and pass this to the +kernel using the FDT. + +Signed-off-by: Linn Crosetto <linn@hpe.com> +[bwh: Forward-ported to 4.10: adjust context] +[Lukas Wunner: Forward-ported to 4.11: drop parts applied upstream] +[bwh: Forward-ported to 4.15 and lockdown patch set: + - Pass result of efi_get_secureboot() in stub through to + efi_set_secure_boot() in main kernel + - Use lockdown API and naming] +[bwh: Forward-ported to 4.19.3: adjust context in update_fdt()] +[dannf: Moved init_lockdown() call after uefi_init(), fixing SB detection] +[bwh: Drop call to init_lockdown(), as efi_set_secure_boot() now calls this] +[bwh: Forward-ported to 5.6: efi_get_secureboot() no longer takes a + sys_table parameter] +[bwh: Forward-ported to 5.7: EFI initialisation from FDT was rewritten, so: + - Add Secure Boot mode to the parameter enumeration in fdtparams.c + - Add a parameter to efi_get_fdt_params() to return the Secure Boot mode + - Since Xen does not have a property name defined for Secure Boot mode, + change efi_get_fdt_prop() to handle a missing property name by clearing + the output variable] +[Salvatore Bonaccorso: Forward-ported to 5.10: f30f242fb131 ("efi: Rename +arm-init to efi-init common for all arch") renamed arm-init.c to efi-init.c] +--- + drivers/firmware/efi/efi-init.c | 5 ++++- + drivers/firmware/efi/fdtparams.c | 12 +++++++++++- + drivers/firmware/efi/libstub/fdt.c | 6 ++++++ + include/linux/efi.h | 3 ++- + 4 files changed, 23 insertions(+), 3 deletions(-) + +--- a/drivers/firmware/efi/efi-init.c ++++ b/drivers/firmware/efi/efi-init.c +@@ -210,9 +210,10 @@ void __init efi_init(void) + { + struct efi_memory_map_data data; + u64 efi_system_table; ++ u32 secure_boot; + + /* Grab UEFI information placed in FDT by stub */ +- efi_system_table = efi_get_fdt_params(&data); ++ efi_system_table = efi_get_fdt_params(&data, &secure_boot); + if (!efi_system_table) + return; + +@@ -234,6 +235,8 @@ void __init efi_init(void) + return; + } + ++ efi_set_secure_boot(secure_boot); ++ + reserve_regions(); + /* + * For memblock manipulation, the cap should come after the memblock_add(). +--- a/drivers/firmware/efi/fdtparams.c ++++ b/drivers/firmware/efi/fdtparams.c +@@ -16,6 +16,7 @@ enum { + MMSIZE, + DCSIZE, + DCVERS, ++ SBMODE, + + PARAMCOUNT + }; +@@ -26,6 +27,7 @@ static __initconst const char name[][22] + [MMSIZE] = "MemMap Size ", + [DCSIZE] = "MemMap Desc. Size ", + [DCVERS] = "MemMap Desc. Version ", ++ [SBMODE] = "Secure Boot Enabled ", + }; + + static __initconst const struct { +@@ -41,6 +43,7 @@ static __initconst const struct { + [MMSIZE] = "xen,uefi-mmap-size", + [DCSIZE] = "xen,uefi-mmap-desc-size", + [DCVERS] = "xen,uefi-mmap-desc-ver", ++ [SBMODE] = "", + } + }, { + #endif +@@ -51,6 +54,7 @@ static __initconst const struct { + [MMSIZE] = "linux,uefi-mmap-size", + [DCSIZE] = "linux,uefi-mmap-desc-size", + [DCVERS] = "linux,uefi-mmap-desc-ver", ++ [SBMODE] = "linux,uefi-secure-boot", + } + } + }; +@@ -62,6 +66,11 @@ static int __init efi_get_fdt_prop(const + int len; + u64 val; + ++ if (!pname[0]) { ++ memset(var, 0, size); ++ return 0; ++ } ++ + prop = fdt_getprop(fdt, node, pname, &len); + if (!prop) + return 1; +@@ -79,7 +88,7 @@ static int __init efi_get_fdt_prop(const + return 0; + } + +-u64 __init efi_get_fdt_params(struct efi_memory_map_data *mm) ++u64 __init efi_get_fdt_params(struct efi_memory_map_data *mm, u32 *secure_boot) + { + const void *fdt = initial_boot_params; + unsigned long systab; +@@ -93,6 +102,7 @@ u64 __init efi_get_fdt_params(struct efi + [MMSIZE] = { &mm->size, sizeof(mm->size) }, + [DCSIZE] = { &mm->desc_size, sizeof(mm->desc_size) }, + [DCVERS] = { &mm->desc_version, sizeof(mm->desc_version) }, ++ [SBMODE] = { secure_boot, sizeof(*secure_boot) }, + }; + + BUILD_BUG_ON(ARRAY_SIZE(target) != ARRAY_SIZE(name)); +--- a/drivers/firmware/efi/libstub/fdt.c ++++ b/drivers/firmware/efi/libstub/fdt.c +@@ -148,6 +148,12 @@ static efi_status_t update_fdt(void *ori + } + } + ++ fdt_val32 = cpu_to_fdt32(efi_get_secureboot()); ++ status = fdt_setprop(fdt, node, "linux,uefi-secure-boot", ++ &fdt_val32, sizeof(fdt_val32)); ++ if (status) ++ goto fdt_set_fail; ++ + /* Shrink the FDT back to its minimum size: */ + fdt_pack(fdt); + +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -662,7 +662,8 @@ extern void efi_mem_reserve(phys_addr_t + extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); + extern void efi_initialize_iomem_resources(struct resource *code_resource, + struct resource *data_resource, struct resource *bss_resource); +-extern u64 efi_get_fdt_params(struct efi_memory_map_data *data); ++extern u64 efi_get_fdt_params(struct efi_memory_map_data *data, ++ u32 *secure_boot); + extern struct kobject *efi_kobj; + + extern int efi_reboot_quirk_mode; diff --git a/debian/patches/features/all/lockdown/efi-add-an-efi_secure_boot-flag-to-indicate-secure-b.patch b/debian/patches/features/all/lockdown/efi-add-an-efi_secure_boot-flag-to-indicate-secure-b.patch new file mode 100644 index 000000000..b46698757 --- /dev/null +++ b/debian/patches/features/all/lockdown/efi-add-an-efi_secure_boot-flag-to-indicate-secure-b.patch @@ -0,0 +1,153 @@ +From: David Howells <dhowells@redhat.com> +Date: Mon, 18 Feb 2019 12:45:03 +0000 +Subject: [28/30] efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit?id=a5d70c55c603233c192b375f72116a395909da28 + +UEFI machines can be booted in Secure Boot mode. Add an EFI_SECURE_BOOT +flag that can be passed to efi_enabled() to find out whether secure boot is +enabled. + +Move the switch-statement in x86's setup_arch() that inteprets the +secure_boot boot parameter to generic code and set the bit there. + +Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> +Signed-off-by: David Howells <dhowells@redhat.com> +Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> +cc: linux-efi@vger.kernel.org +[rperier: Forward-ported to 5.5: + - Use pr_warn() + - Adjust context] +[bwh: Forward-ported to 5.6: adjust context] +[bwh: Forward-ported to 5.7: + - Use the next available bit in efi.flags + - Adjust context] +--- + arch/x86/kernel/setup.c | 14 +---------- + drivers/firmware/efi/Makefile | 1 + + drivers/firmware/efi/secureboot.c | 39 +++++++++++++++++++++++++++++++ + include/linux/efi.h | 16 ++++++++----- + 4 files changed, 51 insertions(+), 19 deletions(-) + create mode 100644 drivers/firmware/efi/secureboot.c + +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -1205,19 +1205,7 @@ void __init setup_arch(char **cmdline_p) + /* Allocate bigger log buffer */ + setup_log_buf(1); + +- if (efi_enabled(EFI_BOOT)) { +- switch (boot_params.secure_boot) { +- case efi_secureboot_mode_disabled: +- pr_info("Secure boot disabled\n"); +- break; +- case efi_secureboot_mode_enabled: +- pr_info("Secure boot enabled\n"); +- break; +- default: +- pr_info("Secure boot could not be determined\n"); +- break; +- } +- } ++ efi_set_secure_boot(boot_params.secure_boot); + + reserve_initrd(); + +--- a/drivers/firmware/efi/Makefile ++++ b/drivers/firmware/efi/Makefile +@@ -27,6 +27,7 @@ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_m + obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o + obj-$(CONFIG_EFI_TEST) += test/ + obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o ++obj-$(CONFIG_EFI) += secureboot.o + obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o + obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o + obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o +--- /dev/null ++++ b/drivers/firmware/efi/secureboot.c +@@ -0,0 +1,39 @@ ++ ++/* Core kernel secure boot support. ++ * ++ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include <linux/efi.h> ++#include <linux/kernel.h> ++#include <linux/printk.h> ++ ++/* ++ * Decide what to do when UEFI secure boot mode is enabled. ++ */ ++void __init efi_set_secure_boot(enum efi_secureboot_mode mode) ++{ ++ if (efi_enabled(EFI_BOOT)) { ++ switch (mode) { ++ case efi_secureboot_mode_disabled: ++ pr_info("Secure boot disabled\n"); ++ break; ++ case efi_secureboot_mode_enabled: ++ set_bit(EFI_SECURE_BOOT, &efi.flags); ++ pr_info("Secure boot enabled\n"); ++ break; ++ default: ++ pr_warn("Secure boot could not be determined (mode %u)\n", ++ mode); ++ break; ++ } ++ } ++} +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -849,6 +849,14 @@ extern int __init efi_setup_pcdp_console + #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ + #define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ + #define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ ++#define EFI_SECURE_BOOT 13 /* Are we in Secure Boot mode? */ ++ ++enum efi_secureboot_mode { ++ efi_secureboot_mode_unset, ++ efi_secureboot_mode_unknown, ++ efi_secureboot_mode_disabled, ++ efi_secureboot_mode_enabled, ++}; + + #ifdef CONFIG_EFI + /* +@@ -873,6 +881,7 @@ static inline bool efi_rt_services_suppo + return (efi.runtime_supported_mask & mask) == mask; + } + extern void efi_find_mirror(void); ++extern void __init efi_set_secure_boot(enum efi_secureboot_mode mode); + #else + static inline bool efi_enabled(int feature) + { +@@ -892,6 +901,7 @@ static inline bool efi_rt_services_suppo + } + + static inline void efi_find_mirror(void) {} ++static inline void efi_set_secure_boot(enum efi_secureboot_mode mode) {} + #endif + + extern int efi_status_to_err(efi_status_t status); +@@ -1107,13 +1117,6 @@ static inline bool efi_runtime_disabled( + extern void efi_call_virt_check_flags(unsigned long flags, const char *call); + extern unsigned long efi_call_virt_save_flags(void); + +-enum efi_secureboot_mode { +- efi_secureboot_mode_unset, +- efi_secureboot_mode_unknown, +- efi_secureboot_mode_disabled, +- efi_secureboot_mode_enabled, +-}; +- + static inline + enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) + { diff --git a/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch b/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch new file mode 100644 index 000000000..9ec425871 --- /dev/null +++ b/debian/patches/features/all/lockdown/efi-lock-down-the-kernel-if-booted-in-secure-boot-mo.patch @@ -0,0 +1,131 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Tue, 10 Sep 2019 11:54:28 +0100 +Subject: efi: Lock down the kernel if booted in secure boot mode + +Based on an earlier patch by David Howells, who wrote the following +description: + +> UEFI Secure Boot provides a mechanism for ensuring that the firmware will +> only load signed bootloaders and kernels. Certain use cases may also +> require that all kernel modules also be signed. Add a configuration option +> that to lock down the kernel - which includes requiring validly signed +> modules - if the kernel is secure-booted. + +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +[Salvatore Bonaccorso: After fixing https://bugs.debian.org/956197 the +help text for LOCK_DOWN_IN_EFI_SECURE_BOOT needs to be adjusted to +mention that lockdown is triggered in integrity mode] +Signed-off-by: Salvatore Bonaccorso <carnil@debian.org> +--- + arch/x86/kernel/setup.c | 4 ++-- + drivers/firmware/efi/secureboot.c | 3 +++ + include/linux/security.h | 6 ++++++ + security/lockdown/Kconfig | 15 +++++++++++++++ + security/lockdown/lockdown.c | 2 +- + 5 files changed, 27 insertions(+), 3 deletions(-) + +Index: debian-kernel/arch/x86/kernel/setup.c +=================================================================== +--- debian-kernel.orig/arch/x86/kernel/setup.c ++++ debian-kernel/arch/x86/kernel/setup.c +@@ -979,6 +979,8 @@ void __init setup_arch(char **cmdline_p) + if (efi_enabled(EFI_BOOT)) + efi_init(); + ++ efi_set_secure_boot(boot_params.secure_boot); ++ + dmi_setup(); + + /* +@@ -1130,8 +1132,6 @@ void __init setup_arch(char **cmdline_p) + /* Allocate bigger log buffer */ + setup_log_buf(1); + +- efi_set_secure_boot(boot_params.secure_boot); +- + reserve_initrd(); + + acpi_table_upgrade(); +Index: debian-kernel/drivers/firmware/efi/secureboot.c +=================================================================== +--- debian-kernel.orig/drivers/firmware/efi/secureboot.c ++++ debian-kernel/drivers/firmware/efi/secureboot.c +@@ -15,6 +15,7 @@ + #include <linux/efi.h> + #include <linux/kernel.h> + #include <linux/printk.h> ++#include <linux/security.h> + + /* + * Decide what to do when UEFI secure boot mode is enabled. +@@ -28,6 +29,10 @@ void __init efi_set_secure_boot(enum efi + break; + case efi_secureboot_mode_enabled: + set_bit(EFI_SECURE_BOOT, &efi.flags); ++#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT ++ lock_kernel_down("EFI Secure Boot", ++ LOCKDOWN_INTEGRITY_MAX); ++#endif + pr_info("Secure boot enabled\n"); + break; + default: +Index: debian-kernel/include/linux/security.h +=================================================================== +--- debian-kernel.orig/include/linux/security.h ++++ debian-kernel/include/linux/security.h +@@ -451,6 +451,7 @@ int security_inode_notifysecctx(struct i + int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); + int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); + int security_locked_down(enum lockdown_reason what); ++int lock_kernel_down(const char *where, enum lockdown_reason level); + #else /* CONFIG_SECURITY */ + + static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) +@@ -1291,6 +1292,11 @@ static inline int security_locked_down(e + { + return 0; + } ++static inline int ++lock_kernel_down(const char *where, enum lockdown_reason level) ++{ ++ return -EOPNOTSUPP; ++} + #endif /* CONFIG_SECURITY */ + + #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) +Index: debian-kernel/security/lockdown/Kconfig +=================================================================== +--- debian-kernel.orig/security/lockdown/Kconfig ++++ debian-kernel/security/lockdown/Kconfig +@@ -45,3 +45,18 @@ config LOCK_DOWN_KERNEL_FORCE_CONFIDENTI + disabled. + + endchoice ++ ++config LOCK_DOWN_IN_EFI_SECURE_BOOT ++ bool "Lock down the kernel in EFI Secure Boot mode" ++ default n ++ depends on SECURITY_LOCKDOWN_LSM ++ depends on EFI ++ select SECURITY_LOCKDOWN_LSM_EARLY ++ help ++ UEFI Secure Boot provides a mechanism for ensuring that the firmware ++ will only load signed bootloaders and kernels. Secure boot mode may ++ be determined from EFI variables provided by the system firmware if ++ not indicated by the boot parameters. ++ ++ Enabling this option results in kernel lockdown being ++ triggered in integrity mode if EFI Secure Boot is set. +Index: debian-kernel/security/lockdown/lockdown.c +=================================================================== +--- debian-kernel.orig/security/lockdown/lockdown.c ++++ debian-kernel/security/lockdown/lockdown.c +@@ -23,7 +23,7 @@ static const enum lockdown_reason lockdo + /* + * Put the kernel into lock-down mode. + */ +-static int lock_kernel_down(const char *where, enum lockdown_reason level) ++int lock_kernel_down(const char *where, enum lockdown_reason level) + { + if (kernel_locked_down >= level) + return -EPERM; diff --git a/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch b/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch new file mode 100644 index 000000000..c718e7e2f --- /dev/null +++ b/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch @@ -0,0 +1,75 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Fri, 30 Aug 2019 15:54:24 +0100 +Subject: mtd: phram,slram: Disable when the kernel is locked down +Forwarded: https://lore.kernel.org/linux-security-module/20190830154720.eekfjt6c4jzvlbfz@decadent.org.uk/ + +These drivers allow mapping arbitrary memory ranges as MTD devices. +This should be disabled to preserve the kernel's integrity when it is +locked down. + +* Add the HWPARAM flag to the module parameters +* When slram is built-in, it uses __setup() to read kernel parameters, + so add an explicit check security_locked_down() check + +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Cc: Matthew Garrett <mjg59@google.com> +Cc: David Howells <dhowells@redhat.com> +Cc: Joern Engel <joern@lazybastard.org> +Cc: linux-mtd@lists.infradead.org +--- + drivers/mtd/devices/phram.c | 6 +++++- + drivers/mtd/devices/slram.c | 9 ++++++++- + 2 files changed, 13 insertions(+), 2 deletions(-) + +--- a/drivers/mtd/devices/phram.c ++++ b/drivers/mtd/devices/phram.c +@@ -364,7 +364,11 @@ static int phram_param_call(const char * + #endif + } + +-module_param_call(phram, phram_param_call, NULL, NULL, 0200); ++static const struct kernel_param_ops phram_param_ops = { ++ .set = phram_param_call ++}; ++__module_param_call(MODULE_PARAM_PREFIX, phram, &phram_param_ops, NULL, ++ 0200, -1, KERNEL_PARAM_FL_HWPARAM | hwparam_iomem); + MODULE_PARM_DESC(phram, "Memory region to map. \"phram=<name>,<start>,<length>[,<erasesize>]\""); + + #ifdef CONFIG_OF +--- a/drivers/mtd/devices/slram.c ++++ b/drivers/mtd/devices/slram.c +@@ -43,6 +43,7 @@ + #include <linux/ioctl.h> + #include <linux/init.h> + #include <linux/io.h> ++#include <linux/security.h> + + #include <linux/mtd/mtd.h> + +@@ -65,7 +66,7 @@ typedef struct slram_mtd_list { + #ifdef MODULE + static char *map[SLRAM_MAX_DEVICES_PARAMS]; + +-module_param_array(map, charp, NULL, 0); ++module_param_hw_array(map, charp, iomem, NULL, 0); + MODULE_PARM_DESC(map, "List of memory regions to map. \"map=<name>, <start>, <length / end>\""); + #else + static char *map; +@@ -281,11 +282,17 @@ static int __init init_slram(void) + #ifndef MODULE + char *devstart; + char *devlength; ++ int ret; + + if (!map) { + E("slram: not enough parameters.\n"); + return(-EINVAL); + } ++ ++ ret = security_locked_down(LOCKDOWN_MODULE_PARAMETERS); ++ if (ret) ++ return ret; ++ + while (map) { + devname = devstart = devlength = NULL; + diff --git a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch new file mode 100644 index 000000000..e58668ebe --- /dev/null +++ b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch @@ -0,0 +1,80 @@ +From: Ben Hutchings <ben@decadent.org.uk> +Date: Mon, 11 Jan 2016 15:23:55 +0000 +Subject: security,perf: Allow further restriction of perf_event_open +Forwarded: https://lkml.org/lkml/2016/1/11/587 + +When kernel.perf_event_open is set to 3 (or greater), disallow all +access to performance events by users without CAP_SYS_ADMIN. +Add a Kconfig symbol CONFIG_SECURITY_PERF_EVENTS_RESTRICT that +makes this value the default. + +This is based on a similar feature in grsecurity +(CONFIG_GRKERNSEC_PERF_HARDEN). This version doesn't include making +the variable read-only. It also allows enabling further restriction +at run-time regardless of whether the default is changed. + +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + include/linux/perf_event.h | 5 +++++ + kernel/events/core.c | 8 ++++++++ + security/Kconfig | 9 +++++++++ + 3 files changed, 22 insertions(+) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -1387,6 +1387,11 @@ int perf_cpu_time_max_percent_handler(st + int perf_event_max_stack_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); + ++static inline bool perf_paranoid_any(void) ++{ ++ return sysctl_perf_event_paranoid > 2; ++} ++ + /* Access to perf_event_open(2) syscall. */ + #define PERF_SECURITY_OPEN 0 + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -415,8 +415,13 @@ static struct kmem_cache *perf_event_cac + * 0 - disallow raw tracepoint access for unpriv + * 1 - disallow cpu events for unpriv + * 2 - disallow kernel profiling for unpriv ++ * 3 - disallow all unpriv perf event use + */ ++#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT ++int sysctl_perf_event_paranoid __read_mostly = 3; ++#else + int sysctl_perf_event_paranoid __read_mostly = 2; ++#endif + + /* Minimum for 512 kiB + 1 user control page */ + int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ +@@ -12235,6 +12240,9 @@ SYSCALL_DEFINE5(perf_event_open, + if (err) + return err; + ++ if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -19,6 +19,15 @@ config SECURITY_DMESG_RESTRICT + + If you are unsure how to answer this question, answer N. + ++config SECURITY_PERF_EVENTS_RESTRICT ++ bool "Restrict unprivileged use of performance events" ++ depends on PERF_EVENTS ++ help ++ If you say Y here, the kernel.perf_event_paranoid sysctl ++ will be set to 3 by default, and no unprivileged use of the ++ perf_event_open syscall will be permitted unless it is ++ changed. ++ + config SECURITY + bool "Enable different security models" + depends on SYSFS |