summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2272
1 files changed, 2272 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
new file mode 100644
index 000000000..f01f7dfdb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -0,0 +1,2272 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_ct.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <linux/workqueue.h>
+#include <linux/refcount.h>
+#include <linux/xarray.h>
+#include <linux/if_macvlan.h>
+#include <linux/debugfs.h>
+
+#include "lib/fs_chains.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+
+#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
+#define MLX5_CT_STATE_TRK_BIT BIT(2)
+#define MLX5_CT_STATE_NAT_BIT BIT(3)
+#define MLX5_CT_STATE_REPLY_BIT BIT(4)
+#define MLX5_CT_STATE_RELATED_BIT BIT(5)
+#define MLX5_CT_STATE_INVALID_BIT BIT(6)
+
+#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
+#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
+
+/* Statically allocate modify actions for
+ * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
+ * This will be increased dynamically if needed (for the ipv6 snat + dnat).
+ */
+#define MLX5_CT_MIN_MOD_ACTS 10
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
+
+struct mlx5_tc_ct_debugfs {
+ struct {
+ atomic_t offloaded;
+ atomic_t rx_dropped;
+ } stats;
+
+ struct dentry *root;
+};
+
+struct mlx5_tc_ct_priv {
+ struct mlx5_core_dev *dev;
+ const struct net_device *netdev;
+ struct mod_hdr_tbl *mod_hdr_tbl;
+ struct xarray tuple_ids;
+ struct rhashtable zone_ht;
+ struct rhashtable ct_tuples_ht;
+ struct rhashtable ct_tuples_nat_ht;
+ struct mlx5_flow_table *ct;
+ struct mlx5_flow_table *ct_nat;
+ struct mlx5e_post_act *post_act;
+ struct mutex control_lock; /* guards parallel adds/dels */
+ struct mapping_ctx *zone_mapping;
+ struct mapping_ctx *labels_mapping;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_ct_fs *fs;
+ struct mlx5_ct_fs_ops *fs_ops;
+ spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
+
+ struct mlx5_tc_ct_debugfs debugfs;
+};
+
+struct mlx5_ct_flow {
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_flow_handle *pre_ct_rule;
+ struct mlx5_ct_ft *ft;
+ u32 chain_mapping;
+};
+
+struct mlx5_ct_zone_rule {
+ struct mlx5_ct_fs_rule *rule;
+ struct mlx5e_mod_hdr_handle *mh;
+ struct mlx5_flow_attr *attr;
+ bool nat;
+};
+
+struct mlx5_tc_ct_pre {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *flow_grp;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_modify_hdr *modify_hdr;
+};
+
+struct mlx5_ct_ft {
+ struct rhash_head node;
+ u16 zone;
+ u32 zone_restore_id;
+ refcount_t refcount;
+ struct nf_flowtable *nf_ft;
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct rhashtable ct_entries_ht;
+ struct mlx5_tc_ct_pre pre_ct;
+ struct mlx5_tc_ct_pre pre_ct_nat;
+};
+
+struct mlx5_ct_tuple {
+ u16 addr_type;
+ __be16 n_proto;
+ u8 ip_proto;
+ struct {
+ union {
+ __be32 src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ __be32 dst_v4;
+ struct in6_addr dst_v6;
+ };
+ } ip;
+ struct {
+ __be16 src;
+ __be16 dst;
+ } port;
+
+ u16 zone;
+};
+
+struct mlx5_ct_counter {
+ struct mlx5_fc *counter;
+ refcount_t refcount;
+ bool is_shared;
+};
+
+enum {
+ MLX5_CT_ENTRY_FLAG_VALID,
+};
+
+struct mlx5_ct_entry {
+ struct rhash_head node;
+ struct rhash_head tuple_node;
+ struct rhash_head tuple_nat_node;
+ struct mlx5_ct_counter *counter;
+ unsigned long cookie;
+ unsigned long restore_cookie;
+ struct mlx5_ct_tuple tuple;
+ struct mlx5_ct_tuple tuple_nat;
+ struct mlx5_ct_zone_rule zone_rules[2];
+
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct work_struct work;
+
+ refcount_t refcnt;
+ unsigned long flags;
+};
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh);
+
+static const struct rhashtable_params cts_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, node),
+ .key_offset = offsetof(struct mlx5_ct_entry, cookie),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params zone_params = {
+ .head_offset = offsetof(struct mlx5_ct_ft, node),
+ .key_offset = offsetof(struct mlx5_ct_ft, zone),
+ .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params tuples_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params tuples_nat_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
+ .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static bool
+mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
+{
+ return !!(entry->tuple_nat_node.next);
+}
+
+static int
+mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
+ u32 *labels, u32 *id)
+{
+ if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
+ *id = 0;
+ return 0;
+ }
+
+ if (mapping_add(ct_priv->labels_mapping, labels, id))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void
+mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
+{
+ if (id)
+ mapping_remove(ct_priv->labels_mapping, id);
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
+{
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+
+ flow_rule_match_basic(rule, &basic);
+ flow_rule_match_control(rule, &control);
+
+ tuple->n_proto = basic.key->n_proto;
+ tuple->ip_proto = basic.key->ip_proto;
+ tuple->addr_type = control.key->addr_type;
+
+ if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ tuple->ip.src_v4 = match.key->src;
+ tuple->ip.dst_v4 = match.key->dst;
+ } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ tuple->ip.src_v6 = match.key->src;
+ tuple->ip.dst_v6 = match.key->dst;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (tuple->ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ tuple->port.src = match.key->src;
+ tuple->port.dst = match.key->dst;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ } else {
+ if (tuple->ip_proto != IPPROTO_GRE)
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
+ struct flow_rule *rule)
+{
+ struct flow_action *flow_action = &rule->action;
+ struct flow_action_entry *act;
+ u32 offset, val, ip6_offset;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE)
+ continue;
+
+ offset = act->mangle.offset;
+ val = act->mangle.val;
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ if (offset == offsetof(struct iphdr, saddr))
+ tuple->ip.src_v4 = cpu_to_be32(val);
+ else if (offset == offsetof(struct iphdr, daddr))
+ tuple->ip.dst_v4 = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
+ ip6_offset /= 4;
+ if (ip6_offset < 4)
+ tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
+ else if (ip6_offset < 8)
+ tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ if (offset == offsetof(struct tcphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct tcphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ if (offset == offsetof(struct udphdr, source))
+ tuple->port.src = cpu_to_be16(val);
+ else if (offset == offsetof(struct udphdr, dest))
+ tuple->port.dst = cpu_to_be16(val);
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct net_device *ndev)
+{
+ struct mlx5e_priv *other_priv = netdev_priv(ndev);
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ bool vf_rep, uplink_rep;
+
+ vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+ uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+
+ if (vf_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ if (uplink_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ if (is_vlan_dev(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
+ if (netif_is_macvlan(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
+ if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_rule *rule)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+
+ mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ match.mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ match.key->ip_proto);
+
+ ip_proto = match.key->ip_proto;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.key->src, sizeof(match.key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, sizeof(match.key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(match.key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(match.key->dst));
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+
+ flow_rule_match_tcp(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(match.mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(match.key->flags));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+
+ if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
+ if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
+ spec->flow_context.flow_source =
+ mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+ dev_put(dev);
+ }
+ }
+
+ return 0;
+}
+
+static void
+mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+{
+ if (entry->counter->is_shared &&
+ !refcount_dec_and_test(&entry->counter->refcount))
+ return;
+
+ mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
+ kfree(entry->counter);
+}
+
+static void
+mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry,
+ bool nat)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5_flow_attr *attr = zone_rule->attr;
+
+ ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
+
+ ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ kfree(attr);
+}
+
+static void
+mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+ atomic_dec(&ct_priv->debugfs.stats.offloaded);
+}
+
+static struct flow_action_entry *
+mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct flow_action_entry *act;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT_METADATA)
+ return act;
+ }
+
+ return NULL;
+}
+
+static int
+mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ u8 ct_state,
+ u32 mark,
+ u32 labels_id,
+ u8 zone_restore_id)
+{
+ enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ CTSTATE_TO_REG, ct_state);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ MARK_TO_REG, mark);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ LABELS_TO_REG, labels_id);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+
+ /* Make another copy of zone id in reg_b for
+ * NIC rx flows since we don't copy reg_c1 to
+ * reg_b upon miss.
+ */
+ if (ns != MLX5_FLOW_NAMESPACE_FDB) {
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
+static int
+mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
+ char *modact)
+{
+ u32 offset = act->mangle.offset, field;
+
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct iphdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
+ else if (offset == offsetof(struct iphdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct ipv6hdr, saddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct tcphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
+ else if (offset == offsetof(struct tcphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct udphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
+ else if (offset == offsetof(struct udphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, offset, 0);
+ MLX5_SET(set_action_in, modact, field, field);
+ MLX5_SET(set_action_in, modact, data, act->mangle.val);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ struct flow_action_entry *act;
+ char *modact;
+ int err, i;
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_MANGLE: {
+ modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
+
+ err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
+ if (err)
+ return err;
+
+ mod_acts->num_actions++;
+ }
+ break;
+
+ case FLOW_ACTION_CT_METADATA:
+ /* Handled earlier */
+ continue;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule,
+ struct mlx5e_mod_hdr_handle **mh,
+ u8 zone_restore_id, bool nat_table, bool has_nat)
+{
+ DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
+ DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
+ struct flow_action_entry *meta;
+ u16 ct_state = 0;
+ int err;
+
+ meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta)
+ return -EOPNOTSUPP;
+
+ err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
+ &attr->ct_attr.ct_labels_id);
+ if (err)
+ return -EOPNOTSUPP;
+ if (nat_table) {
+ if (has_nat) {
+ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
+ if (err)
+ goto err_mapping;
+ }
+
+ ct_state |= MLX5_CT_STATE_NAT_BIT;
+ }
+
+ ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
+ ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
+ err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
+ ct_state,
+ meta->ct_metadata.mark,
+ attr->ct_attr.ct_labels_id,
+ zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ if (nat_table && has_nat) {
+ attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
+ mod_acts.num_actions,
+ mod_acts.actions);
+ if (IS_ERR(attr->modify_hdr)) {
+ err = PTR_ERR(attr->modify_hdr);
+ goto err_mapping;
+ }
+
+ *mh = NULL;
+ } else {
+ *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl,
+ ct_priv->ns_type,
+ &mod_acts);
+ if (IS_ERR(*mh)) {
+ err = PTR_ERR(*mh);
+ goto err_mapping;
+ }
+ attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
+ }
+
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ return 0;
+
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&mod_acts);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ return err;
+}
+
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_mod_hdr_handle *mh)
+{
+ if (mh)
+ mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
+ else
+ mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ bool nat, u8 zone_restore_id)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_flow_attr *attr;
+ int err;
+
+ zone_rule->nat = nat;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+
+ err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
+ &zone_rule->mh,
+ zone_restore_id,
+ nat,
+ mlx5_tc_ct_entry_has_nat(entry));
+ if (err) {
+ ct_dbg("Failed to create ct entry mod hdr");
+ goto err_mod_hdr;
+ }
+
+ attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = 0;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ if (entry->tuple.ip_proto == IPPROTO_TCP ||
+ entry->tuple.ip_proto == IPPROTO_UDP)
+ attr->outer_match_level = MLX5_MATCH_L4;
+ else
+ attr->outer_match_level = MLX5_MATCH_L3;
+ attr->counter = entry->counter->counter;
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->in_mdev = priv->mdev;
+
+ mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
+
+ zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
+ if (IS_ERR(zone_rule->rule)) {
+ err = PTR_ERR(zone_rule->rule);
+ ct_dbg("Failed to add ct entry rule, nat: %d", nat);
+ goto err_rule;
+ }
+
+ zone_rule->attr = attr;
+
+ kvfree(spec);
+ ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
+
+ return 0;
+
+err_rule:
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+err_mod_hdr:
+ kfree(attr);
+err_attr:
+ kvfree(spec);
+ return err;
+}
+
+static bool
+mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
+{
+ return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+}
+
+static struct mlx5_ct_entry *
+mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
+{
+ struct mlx5_ct_entry *entry;
+
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
+ tuples_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt)) {
+ return entry;
+ } else if (!entry) {
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
+ tuple, tuples_nat_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt))
+ return entry;
+ }
+
+ return entry ? ERR_PTR(-EINVAL) : NULL;
+}
+
+static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+ tuples_ht_params);
+}
+
+static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ kfree(entry);
+}
+
+static void
+mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
+{
+ struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void
+__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
+ queue_work(entry->ct_priv->wq, &entry->work);
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_ct_counter *counter;
+ int ret;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ counter->is_shared = false;
+ counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
+ if (IS_ERR(counter->counter)) {
+ ct_dbg("Failed to create counter for ct entry");
+ ret = PTR_ERR(counter->counter);
+ kfree(counter);
+ return ERR_PTR(ret);
+ }
+
+ return counter;
+}
+
+static struct mlx5_ct_counter *
+mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ struct mlx5_ct_tuple rev_tuple = entry->tuple;
+ struct mlx5_ct_counter *shared_counter;
+ struct mlx5_ct_entry *rev_entry;
+
+ /* get the reversed tuple */
+ swap(rev_tuple.port.src, rev_tuple.port.dst);
+
+ if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ __be32 tmp_addr = rev_tuple.ip.src_v4;
+
+ rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
+ rev_tuple.ip.dst_v4 = tmp_addr;
+ } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
+
+ rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
+ rev_tuple.ip.dst_v6 = tmp_addr;
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Use the same counter as the reverse direction */
+ spin_lock_bh(&ct_priv->ht_lock);
+ rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
+
+ if (IS_ERR(rev_entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ goto create_counter;
+ }
+
+ if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
+ ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
+ shared_counter = rev_entry->counter;
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(rev_entry);
+ return shared_counter;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+create_counter:
+
+ shared_counter = mlx5_tc_ct_counter_create(ct_priv);
+ if (IS_ERR(shared_counter))
+ return shared_counter;
+
+ shared_counter->is_shared = true;
+ refcount_set(&shared_counter->refcount, 1);
+ return shared_counter;
+}
+
+static int
+mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ u8 zone_restore_id)
+{
+ int err;
+
+ if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
+ entry->counter = mlx5_tc_ct_counter_create(ct_priv);
+ else
+ entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+
+ if (IS_ERR(entry->counter)) {
+ err = PTR_ERR(entry->counter);
+ return err;
+ }
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
+ zone_restore_id);
+ if (err)
+ goto err_orig;
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
+ zone_restore_id);
+ if (err)
+ goto err_nat;
+
+ atomic_inc(&ct_priv->debugfs.stats.offloaded);
+ return 0;
+
+err_nat:
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+err_orig:
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ struct flow_action_entry *meta_action;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+ int err;
+
+ meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta_action)
+ return -EOPNOTSUPP;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (entry && refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_put(entry);
+ return -EEXIST;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->tuple.zone = ft->zone;
+ entry->cookie = flow->cookie;
+ entry->restore_cookie = meta_action->ct_metadata.cookie;
+ refcount_set(&entry->refcnt, 2);
+ entry->ct_priv = ct_priv;
+
+ err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
+ if (err)
+ goto err_set;
+
+ memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
+ err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
+ if (err)
+ goto err_set;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+
+ err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
+ cts_ht_params);
+ if (err)
+ goto err_entries;
+
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+ if (err)
+ goto err_tuple;
+
+ if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ if (err)
+ goto err_tuple_nat;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
+ ft->zone_restore_id);
+ if (err)
+ goto err_rules;
+
+ set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+ mlx5_tc_ct_entry_put(entry); /* this function reference */
+
+ return 0;
+
+err_rules:
+ spin_lock_bh(&ct_priv->ht_lock);
+ if (mlx5_tc_ct_entry_has_nat(entry))
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node, tuples_nat_ht_params);
+err_tuple_nat:
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+err_tuple:
+ rhashtable_remove_fast(&ft->ct_entries_ht,
+ &entry->node,
+ cts_ht_params);
+err_entries:
+ spin_unlock_bh(&ct_priv->ht_lock);
+err_set:
+ kfree(entry);
+ if (err != -EEXIST)
+ netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(entry);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *f)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ unsigned long cookie = f->cookie;
+ struct mlx5_ct_entry *entry;
+ u64 lastuse, packets, bytes;
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
+ flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+
+ mlx5_tc_ct_entry_put(entry);
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload *f = type_data;
+ struct mlx5_ct_ft *ft = cb_priv;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5_tc_ct_block_flow_offload_add(ft, f);
+ case FLOW_CLS_DESTROY:
+ return mlx5_tc_ct_block_flow_offload_del(ft, f);
+ case FLOW_CLS_STATS:
+ return mlx5_tc_ct_block_flow_offload_stats(ft, f);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static bool
+mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
+ u16 zone)
+{
+ struct flow_keys flow_keys;
+
+ skb_reset_network_header(skb);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
+
+ tuple->zone = zone;
+
+ if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
+ flow_keys.basic.ip_proto != IPPROTO_UDP &&
+ flow_keys.basic.ip_proto != IPPROTO_GRE)
+ return false;
+
+ if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
+ flow_keys.basic.ip_proto == IPPROTO_UDP) {
+ tuple->port.src = flow_keys.ports.src;
+ tuple->port.dst = flow_keys.ports.dst;
+ }
+ tuple->n_proto = flow_keys.basic.n_proto;
+ tuple->ip_proto = flow_keys.basic.ip_proto;
+
+ switch (flow_keys.basic.n_proto) {
+ case htons(ETH_P_IP):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
+ tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
+ break;
+
+ case htons(ETH_P_IPV6):
+ tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
+ tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
+ break;
+ default:
+ goto out;
+ }
+
+ return true;
+
+out:
+ return false;
+}
+
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+{
+ u32 ctstate = 0, ctstate_mask = 0;
+
+ mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
+ &ctstate, &ctstate_mask);
+
+ if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
+ return -EOPNOTSUPP;
+
+ ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+
+ return 0;
+}
+
+void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
+{
+ if (!priv || !ct_attr->ct_labels_id)
+ return;
+
+ mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
+}
+
+int
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
+{
+ bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_dissector_key_ct *mask, *key;
+ u32 ctstate = 0, ctstate_mask = 0;
+ u16 ct_state_on, ct_state_off;
+ u16 ct_state, ct_state_mask;
+ struct flow_match_ct match;
+ u32 ct_labels[4];
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct matching isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_ct(rule, &match);
+
+ key = match.key;
+ mask = match.mask;
+
+ ct_state = key->ct_state;
+ ct_state_mask = mask->ct_state;
+
+ if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
+ TCA_FLOWER_KEY_CT_FLAGS_NEW |
+ TCA_FLOWER_KEY_CT_FLAGS_REPLY |
+ TCA_FLOWER_KEY_CT_FLAGS_RELATED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "only ct_state trk, est, new and rpl are supported for offload");
+ return -EOPNOTSUPP;
+ }
+
+ ct_state_on = ct_state & ct_state_mask;
+ ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
+ trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
+ est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+ untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+ uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+
+ ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
+ ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
+ ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
+
+ if (rel) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +rel isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (inv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +inv isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (new) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +new isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (mask->ct_zone)
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ key->ct_zone, MLX5_CT_ZONE_MASK);
+ if (ctstate_mask)
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+ if (mask->ct_mark)
+ mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
+ key->ct_mark, mask->ct_mark);
+ if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
+ mask->ct_labels[3]) {
+ ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
+ ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
+ ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
+ ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
+ if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
+ return -EOPNOTSUPP;
+ mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
+ MLX5_CT_LABELS_MASK);
+ }
+
+ return 0;
+}
+
+int
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct action isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ attr->ct_attr.zone = act->ct.zone;
+ attr->ct_attr.ct_action = act->ct.action;
+ attr->ct_attr.nf_ft = act->ct.flow_table;
+
+ return 0;
+}
+
+static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table *ft = pre_ct->ft;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 ctstate;
+ u16 zone;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
+ err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
+ ZONE_TO_REG, zone);
+ if (err) {
+ ct_dbg("Failed to set zone register mapping");
+ goto err_mapping;
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
+ pre_mod_acts.num_actions,
+ pre_mod_acts.actions);
+
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct->modify_hdr = mod_hdr;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.modify_hdr = mod_hdr;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ /* add flow rule */
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ zone, MLX5_CT_ZONE_MASK);
+ ctstate = MLX5_CT_STATE_TRK_BIT;
+ if (nat)
+ ctstate |= MLX5_CT_STATE_NAT_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
+
+ dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct flow rule zone %d", zone);
+ goto err_flow_rule;
+ }
+ pre_ct->flow_rule = rule;
+
+ /* add miss rule */
+ dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct miss rule zone %d", zone);
+ goto err_miss_rule;
+ }
+ pre_ct->miss_rule = rule;
+
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return 0;
+
+err_miss_rule:
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+err_flow_rule:
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
+ kvfree(spec);
+ return err;
+}
+
+static void
+tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+ mlx5_del_flow_rules(pre_ct->miss_rule);
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *g;
+ u32 metadata_reg_c_2_mask;
+ u32 *flow_group_in;
+ void *misc;
+ int err;
+
+ ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ ct_dbg("Failed to get flow namespace");
+ return err;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to create pre ct table");
+ goto out_free;
+ }
+ pre_ct->ft = ft;
+
+ /* create flow group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria.misc_parameters_2);
+
+ metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
+ if (nat)
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
+ metadata_reg_c_2_mask);
+
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct group");
+ goto err_flow_grp;
+ }
+ pre_ct->flow_grp = g;
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct miss group");
+ goto err_miss_grp;
+ }
+ pre_ct->miss_grp = g;
+
+ err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
+ if (err)
+ goto err_add_rules;
+
+ kvfree(flow_group_in);
+ return 0;
+
+err_add_rules:
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+err_miss_grp:
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+err_flow_grp:
+ mlx5_destroy_flow_table(ft);
+out_free:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+ mlx5_destroy_flow_table(pre_ct->ft);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ int err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
+ if (err)
+ return err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
+ if (err)
+ goto err_pre_ct_nat;
+
+ return 0;
+
+err_pre_ct_nat:
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+}
+
+/* To avoid false lock dependency warning set the ct_entries_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key ct_entries_ht_lock_key;
+
+static struct mlx5_ct_ft *
+mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
+ struct nf_flowtable *nf_ft)
+{
+ struct mlx5_ct_ft *ft;
+ int err;
+
+ ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
+ if (ft) {
+ refcount_inc(&ft->refcount);
+ return ft;
+ }
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
+ if (err)
+ goto err_mapping;
+
+ ft->zone = zone;
+ ft->nf_ft = nf_ft;
+ ft->ct_priv = ct_priv;
+ refcount_set(&ft->refcount, 1);
+
+ err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
+ if (err)
+ goto err_alloc_pre_ct;
+
+ err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
+ if (err)
+ goto err_init;
+
+ lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
+
+ err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
+ zone_params);
+ if (err)
+ goto err_insert;
+
+ err = nf_flow_table_offload_add_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ if (err)
+ goto err_add_cb;
+
+ return ft;
+
+err_add_cb:
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+err_insert:
+ rhashtable_destroy(&ft->ct_entries_ht);
+err_init:
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+err_alloc_pre_ct:
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+err_mapping:
+ kfree(ft);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
+{
+ struct mlx5_ct_entry *entry = ptr;
+
+ mlx5_tc_ct_entry_put(entry);
+}
+
+static void
+mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+{
+ if (!refcount_dec_and_test(&ft->refcount))
+ return;
+
+ flush_workqueue(ct_priv->wq);
+ nf_flow_table_offload_del_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+ rhashtable_free_and_destroy(&ft->ct_entries_ht,
+ mlx5_tc_ct_flush_ft_entry,
+ ct_priv);
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+ kfree(ft);
+}
+
+/* We translate the tc filter with CT action to the following HW model:
+ *
+ * +---------------------+
+ * + ft prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * | set chain miss mapping
+ * | set fte_id
+ * | set tunnel_id
+ * | do decap
+ * v
+ * +---------------------+
+ * + pre_ct/pre_ct_nat + if matches +-------------------------+
+ * + zone+nat match +---------------->+ post_act (see below) +
+ * +---------------------+ set zone +-------------------------+
+ * | set zone
+ * v
+ * +--------------------+
+ * + CT (nat or no nat) +
+ * + tuple + zone match +
+ * +--------------------+
+ * | set mark
+ * | set labels_id
+ * | set established
+ * | set zone_restore
+ * | do nat (if needed)
+ * v
+ * +--------------+
+ * + post_act + original filter actions
+ * + fte_id match +------------------------>
+ * +--------------+
+ */
+static struct mlx5_flow_handle *
+__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *orig_spec,
+ struct mlx5_flow_attr *attr)
+{
+ bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
+ u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_ct_flow *ct_flow;
+ int chain_mapping = 0, err;
+ struct mlx5_ct_ft *ft;
+
+ ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
+ if (!ct_flow) {
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Register for CT established events */
+ ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
+ attr->ct_attr.nf_ft);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to register to ft callback");
+ goto err_ft;
+ }
+ ct_flow->ft = ft;
+
+ /* Base flow attributes of both rules on original rule attribute */
+ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->pre_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre;
+ }
+
+ pre_ct_attr = ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, attr_sz);
+ pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
+
+ /* Modify the original rule's action to fwd and modify, leave decap */
+ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Write chain miss tag for miss in ct table as we
+ * don't go though all prios of this chain as normal tc rules
+ * miss.
+ */
+ err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
+ &chain_mapping);
+ if (err) {
+ ct_dbg("Failed to get chain register mapping for chain");
+ goto err_get_chain;
+ }
+ ct_flow->chain_mapping = chain_mapping;
+
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
+ CHAIN_TO_REG, chain_mapping);
+ if (err) {
+ ct_dbg("Failed to set chain register mapping");
+ goto err_mapping;
+ }
+
+ /* If original flow is decap, we do it before going into ct table
+ * so add a rewrite for the tunnel match_id.
+ */
+ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ attr->chain == 0) {
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
+ ct_priv->ns_type,
+ TUNNEL_TO_REG,
+ attr->tunnel_id);
+ if (err) {
+ ct_dbg("Failed to set tunnel register mapping");
+ goto err_mapping;
+ }
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
+ pre_mod_acts->num_actions,
+ pre_mod_acts->actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct_attr->modify_hdr = mod_hdr;
+
+ /* Change original rule point to ct table */
+ pre_ct_attr->dest_chain = 0;
+ pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
+ ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
+ pre_ct_attr);
+ if (IS_ERR(ct_flow->pre_ct_rule)) {
+ err = PTR_ERR(ct_flow->pre_ct_rule);
+ ct_dbg("Failed to add pre ct rule");
+ goto err_insert_orig;
+ }
+
+ attr->ct_attr.ct_flow = ct_flow;
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+
+ return ct_flow->pre_ct_rule;
+
+err_insert_orig:
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+err_mapping:
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+err_get_chain:
+ kfree(ct_flow->pre_ct_attr);
+err_alloc_pre:
+ mlx5_tc_ct_del_ft_cb(ct_priv, ft);
+err_ft:
+ kfree(ct_flow);
+ netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
+ return ERR_PTR(err);
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ struct mlx5_flow_handle *rule;
+
+ if (!priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&priv->control_lock);
+ rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
+ mutex_unlock(&priv->control_lock);
+
+ return rule;
+}
+
+static void
+__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_flow *ct_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+ mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
+
+ kfree(ct_flow->pre_ct_attr);
+ kfree(ct_flow);
+}
+
+void
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
+
+ /* We are called on error to clean up stuff from parsing
+ * but we don't have anything for now
+ */
+ if (!ct_flow)
+ return;
+
+ mutex_lock(&priv->control_lock);
+ __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
+ mutex_unlock(&priv->control_lock);
+}
+
+static int
+mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
+ int err;
+
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
+ ct_dbg("Using SMFS ct flow steering provider");
+ fs_ops = mlx5_ct_fs_smfs_ops_get();
+ }
+
+ ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
+ if (!ct_priv->fs)
+ return -ENOMEM;
+
+ ct_priv->fs->netdev = ct_priv->netdev;
+ ct_priv->fs->dev = ct_priv->dev;
+ ct_priv->fs_ops = fs_ops;
+
+ err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
+ if (err)
+ goto err_init;
+
+ return 0;
+
+err_init:
+ kfree(ct_priv->fs);
+ return err;
+}
+
+static int
+mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
+ const char **err_msg)
+{
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
+ /* vlan workaround should be avoided for multi chain rules.
+ * This is just a sanity check as pop vlan action should
+ * be supported by any FW that supports ignore_flow_level
+ */
+
+ *err_msg = "firmware vlan actions support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
+ fdb_modify_header_fwd_to_table)) {
+ /* CT always writes to registers which are mod header actions.
+ * Therefore, mod header and goto is required
+ */
+
+ *err_msg = "firmware fwd and modify support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *err_msg = "register loopback isn't supported";
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ const char *err_msg = NULL;
+ int err = 0;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ /* cannot restore chain ID on HW miss */
+
+ err_msg = "tc skb extension missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+#endif
+ if (IS_ERR_OR_NULL(post_act)) {
+ /* Ignore_flow_level support isn't supported by default for VFs and so post_act
+ * won't be supported. Skip showing error msg.
+ */
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
+ err_msg = "post action is missing";
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
+
+out_err:
+ if (err && err_msg)
+ netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
+ return err;
+}
+
+static void
+mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+
+ ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
+ debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.offloaded);
+ debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.rx_dropped);
+}
+
+static void
+mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ debugfs_remove_recursive(ct_priv->debugfs.root);
+}
+
+#define INIT_ERR_PREFIX "tc ct offload init failed"
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct mlx5_core_dev *dev;
+ u64 mapping_id;
+ int err;
+
+ dev = priv->mdev;
+ err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
+ if (err)
+ goto err_support;
+
+ ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
+ if (!ct_priv)
+ goto err_alloc;
+
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+ sizeof(u16), 0, true);
+ if (IS_ERR(ct_priv->zone_mapping)) {
+ err = PTR_ERR(ct_priv->zone_mapping);
+ goto err_mapping_zone;
+ }
+
+ ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+ sizeof(u32) * 4, 0, true);
+ if (IS_ERR(ct_priv->labels_mapping)) {
+ err = PTR_ERR(ct_priv->labels_mapping);
+ goto err_mapping_labels;
+ }
+
+ spin_lock_init(&ct_priv->ht_lock);
+ ct_priv->ns_type = ns_type;
+ ct_priv->chains = chains;
+ ct_priv->netdev = priv->netdev;
+ ct_priv->dev = priv->mdev;
+ ct_priv->mod_hdr_tbl = mod_hdr;
+ ct_priv->ct = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct)) {
+ err = PTR_ERR(ct_priv->ct);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_tbl;
+ }
+
+ ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(ct_priv->ct_nat)) {
+ err = PTR_ERR(ct_priv->ct_nat);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct nat table err: %d\n",
+ INIT_ERR_PREFIX, err);
+ goto err_ct_nat_tbl;
+ }
+
+ ct_priv->post_act = post_act;
+ mutex_init(&ct_priv->control_lock);
+ if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
+ goto err_ct_zone_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
+ goto err_ct_tuples_ht;
+ if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
+ goto err_ct_tuples_nat_ht;
+
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ err = mlx5_tc_ct_fs_init(ct_priv);
+ if (err)
+ goto err_init_fs;
+
+ mlx5_ct_tc_create_dbgfs(ct_priv);
+ return ct_priv;
+
+err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+err_ct_tuples_nat_ht:
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+err_ct_tuples_ht:
+ rhashtable_destroy(&ct_priv->zone_ht);
+err_ct_zone_ht:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+err_ct_nat_tbl:
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+err_ct_tbl:
+ mapping_destroy(ct_priv->labels_mapping);
+err_mapping_labels:
+ mapping_destroy(ct_priv->zone_mapping);
+err_mapping_zone:
+ kfree(ct_priv);
+err_alloc:
+err_support:
+
+ return NULL;
+}
+
+void
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_fs_chains *chains;
+
+ if (!ct_priv)
+ return;
+
+ destroy_workqueue(ct_priv->wq);
+ mlx5_ct_tc_remove_dbgfs(ct_priv);
+ chains = ct_priv->chains;
+
+ ct_priv->fs_ops->destroy(ct_priv->fs);
+ kfree(ct_priv->fs);
+
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
+ mapping_destroy(ct_priv->zone_mapping);
+ mapping_destroy(ct_priv->labels_mapping);
+
+ rhashtable_destroy(&ct_priv->ct_tuples_ht);
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
+ rhashtable_destroy(&ct_priv->zone_ht);
+ mutex_destroy(&ct_priv->control_lock);
+ kfree(ct_priv);
+}
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct sk_buff *skb, u8 zone_restore_id)
+{
+ struct mlx5_ct_tuple tuple = {};
+ struct mlx5_ct_entry *entry;
+ u16 zone;
+
+ if (!ct_priv || !zone_restore_id)
+ return true;
+
+ if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
+ goto out_inc_drop;
+
+ if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
+ goto out_inc_drop;
+
+ spin_lock(&ct_priv->ht_lock);
+
+ entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
+ if (!entry) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+
+ if (IS_ERR(entry)) {
+ spin_unlock(&ct_priv->ht_lock);
+ goto out_inc_drop;
+ }
+ spin_unlock(&ct_priv->ht_lock);
+
+ tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
+ __mlx5_tc_ct_entry_put(entry);
+
+ return true;
+
+out_inc_drop:
+ atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
+ return false;
+}