diff options
Diffstat (limited to 'drivers/net/ethernet/netronome/nfp/bpf')
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/Makefile | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | 486 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/fw.h | 178 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/jit.c | 4123 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.c | 511 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.h | 521 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/offload.c | 627 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 693 |
8 files changed, 7141 insertions, 0 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/Makefile b/drivers/net/ethernet/netronome/nfp/bpf/Makefile new file mode 100644 index 000000000..805fa28f3 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +# kbuild requires Makefile in a directory to build individual objects diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c new file mode 100644 index 000000000..2572a4b91 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -0,0 +1,486 @@ +/* + * Copyright (C) 2017-2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bpf.h> +#include <linux/bitops.h> +#include <linux/bug.h> +#include <linux/jiffies.h> +#include <linux/skbuff.h> +#include <linux/wait.h> + +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "fw.h" +#include "main.h" + +#define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4) + +static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf) +{ + u16 used_tags; + + used_tags = bpf->tag_alloc_next - bpf->tag_alloc_last; + + return used_tags > NFP_BPF_TAG_ALLOC_SPAN; +} + +static int nfp_bpf_alloc_tag(struct nfp_app_bpf *bpf) +{ + /* All FW communication for BPF is request-reply. To make sure we + * don't reuse the message ID too early after timeout - limit the + * number of requests in flight. + */ + if (nfp_bpf_all_tags_busy(bpf)) { + cmsg_warn(bpf, "all FW request contexts busy!\n"); + return -EAGAIN; + } + + WARN_ON(__test_and_set_bit(bpf->tag_alloc_next, bpf->tag_allocator)); + return bpf->tag_alloc_next++; +} + +static void nfp_bpf_free_tag(struct nfp_app_bpf *bpf, u16 tag) +{ + WARN_ON(!__test_and_clear_bit(tag, bpf->tag_allocator)); + + while (!test_bit(bpf->tag_alloc_last, bpf->tag_allocator) && + bpf->tag_alloc_last != bpf->tag_alloc_next) + bpf->tag_alloc_last++; +} + +static struct sk_buff * +nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size) +{ + struct sk_buff *skb; + + skb = nfp_app_ctrl_msg_alloc(bpf->app, size, GFP_KERNEL); + skb_put(skb, size); + + return skb; +} + +static struct sk_buff * +nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n) +{ + unsigned int size; + + size = sizeof(struct cmsg_req_map_op); + size += sizeof(struct cmsg_key_value_pair) * n; + + return nfp_bpf_cmsg_alloc(bpf, size); +} + +static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb) +{ + struct cmsg_hdr *hdr; + + hdr = (struct cmsg_hdr *)skb->data; + + return hdr->type; +} + +static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb) +{ + struct cmsg_hdr *hdr; + + hdr = (struct cmsg_hdr *)skb->data; + + return be16_to_cpu(hdr->tag); +} + +static struct sk_buff *__nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag) +{ + unsigned int msg_tag; + struct sk_buff *skb; + + skb_queue_walk(&bpf->cmsg_replies, skb) { + msg_tag = nfp_bpf_cmsg_get_tag(skb); + if (msg_tag == tag) { + nfp_bpf_free_tag(bpf, tag); + __skb_unlink(skb, &bpf->cmsg_replies); + return skb; + } + } + + return NULL; +} + +static struct sk_buff *nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag) +{ + struct sk_buff *skb; + + nfp_ctrl_lock(bpf->app->ctrl); + skb = __nfp_bpf_reply(bpf, tag); + nfp_ctrl_unlock(bpf->app->ctrl); + + return skb; +} + +static struct sk_buff *nfp_bpf_reply_drop_tag(struct nfp_app_bpf *bpf, u16 tag) +{ + struct sk_buff *skb; + + nfp_ctrl_lock(bpf->app->ctrl); + skb = __nfp_bpf_reply(bpf, tag); + if (!skb) + nfp_bpf_free_tag(bpf, tag); + nfp_ctrl_unlock(bpf->app->ctrl); + + return skb; +} + +static struct sk_buff * +nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type, + int tag) +{ + struct sk_buff *skb; + int i, err; + + for (i = 0; i < 50; i++) { + udelay(4); + skb = nfp_bpf_reply(bpf, tag); + if (skb) + return skb; + } + + err = wait_event_interruptible_timeout(bpf->cmsg_wq, + skb = nfp_bpf_reply(bpf, tag), + msecs_to_jiffies(5000)); + /* We didn't get a response - try last time and atomically drop + * the tag even if no response is matched. + */ + if (!skb) + skb = nfp_bpf_reply_drop_tag(bpf, tag); + if (err < 0) { + cmsg_warn(bpf, "%s waiting for response to 0x%02x: %d\n", + err == ERESTARTSYS ? "interrupted" : "error", + type, err); + return ERR_PTR(err); + } + if (!skb) { + cmsg_warn(bpf, "timeout waiting for response to 0x%02x\n", + type); + return ERR_PTR(-ETIMEDOUT); + } + + return skb; +} + +static struct sk_buff * +nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb, + enum nfp_bpf_cmsg_type type, unsigned int reply_size) +{ + struct cmsg_hdr *hdr; + int tag; + + nfp_ctrl_lock(bpf->app->ctrl); + tag = nfp_bpf_alloc_tag(bpf); + if (tag < 0) { + nfp_ctrl_unlock(bpf->app->ctrl); + dev_kfree_skb_any(skb); + return ERR_PTR(tag); + } + + hdr = (void *)skb->data; + hdr->ver = CMSG_MAP_ABI_VERSION; + hdr->type = type; + hdr->tag = cpu_to_be16(tag); + + __nfp_app_ctrl_tx(bpf->app, skb); + + nfp_ctrl_unlock(bpf->app->ctrl); + + skb = nfp_bpf_cmsg_wait_reply(bpf, type, tag); + if (IS_ERR(skb)) + return skb; + + hdr = (struct cmsg_hdr *)skb->data; + if (hdr->type != __CMSG_REPLY(type)) { + cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n", + hdr->type, __CMSG_REPLY(type)); + goto err_free; + } + /* 0 reply_size means caller will do the validation */ + if (reply_size && skb->len != reply_size) { + cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n", + type, skb->len, reply_size); + goto err_free; + } + + return skb; +err_free: + dev_kfree_skb_any(skb); + return ERR_PTR(-EIO); +} + +static int +nfp_bpf_ctrl_rc_to_errno(struct nfp_app_bpf *bpf, + struct cmsg_reply_map_simple *reply) +{ + static const int res_table[] = { + [CMSG_RC_SUCCESS] = 0, + [CMSG_RC_ERR_MAP_FD] = -EBADFD, + [CMSG_RC_ERR_MAP_NOENT] = -ENOENT, + [CMSG_RC_ERR_MAP_ERR] = -EINVAL, + [CMSG_RC_ERR_MAP_PARSE] = -EIO, + [CMSG_RC_ERR_MAP_EXIST] = -EEXIST, + [CMSG_RC_ERR_MAP_NOMEM] = -ENOMEM, + [CMSG_RC_ERR_MAP_E2BIG] = -E2BIG, + }; + u32 rc; + + rc = be32_to_cpu(reply->rc); + if (rc >= ARRAY_SIZE(res_table)) { + cmsg_warn(bpf, "FW responded with invalid status: %u\n", rc); + return -EIO; + } + + return res_table[rc]; +} + +long long int +nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map) +{ + struct cmsg_reply_map_alloc_tbl *reply; + struct cmsg_req_map_alloc_tbl *req; + struct sk_buff *skb; + u32 tid; + int err; + + skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req)); + if (!skb) + return -ENOMEM; + + req = (void *)skb->data; + req->key_size = cpu_to_be32(map->key_size); + req->value_size = cpu_to_be32(map->value_size); + req->max_entries = cpu_to_be32(map->max_entries); + req->map_type = cpu_to_be32(map->map_type); + req->map_flags = 0; + + skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_ALLOC, + sizeof(*reply)); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + reply = (void *)skb->data; + err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr); + if (err) + goto err_free; + + tid = be32_to_cpu(reply->tid); + dev_consume_skb_any(skb); + + return tid; +err_free: + dev_kfree_skb_any(skb); + return err; +} + +void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map) +{ + struct cmsg_reply_map_free_tbl *reply; + struct cmsg_req_map_free_tbl *req; + struct sk_buff *skb; + int err; + + skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req)); + if (!skb) { + cmsg_warn(bpf, "leaking map - failed to allocate msg\n"); + return; + } + + req = (void *)skb->data; + req->tid = cpu_to_be32(nfp_map->tid); + + skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_FREE, + sizeof(*reply)); + if (IS_ERR(skb)) { + cmsg_warn(bpf, "leaking map - I/O error\n"); + return; + } + + reply = (void *)skb->data; + err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr); + if (err) + cmsg_warn(bpf, "leaking map - FW responded with: %d\n", err); + + dev_consume_skb_any(skb); +} + +static int +nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, + enum nfp_bpf_cmsg_type op, + u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value) +{ + struct nfp_bpf_map *nfp_map = offmap->dev_priv; + struct nfp_app_bpf *bpf = nfp_map->bpf; + struct bpf_map *map = &offmap->map; + struct cmsg_reply_map_op *reply; + struct cmsg_req_map_op *req; + struct sk_buff *skb; + int err; + + /* FW messages have no space for more than 32 bits of flags */ + if (flags >> 32) + return -EOPNOTSUPP; + + skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1); + if (!skb) + return -ENOMEM; + + req = (void *)skb->data; + req->tid = cpu_to_be32(nfp_map->tid); + req->count = cpu_to_be32(1); + req->flags = cpu_to_be32(flags); + + /* Copy inputs */ + if (key) + memcpy(&req->elem[0].key, key, map->key_size); + if (value) + memcpy(&req->elem[0].value, value, map->value_size); + + skb = nfp_bpf_cmsg_communicate(bpf, skb, op, + sizeof(*reply) + sizeof(*reply->elem)); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + reply = (void *)skb->data; + err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr); + if (err) + goto err_free; + + /* Copy outputs */ + if (out_key) + memcpy(out_key, &reply->elem[0].key, map->key_size); + if (out_value) + memcpy(out_value, &reply->elem[0].value, map->value_size); + + dev_consume_skb_any(skb); + + return 0; +err_free: + dev_kfree_skb_any(skb); + return err; +} + +int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags) +{ + return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_UPDATE, + key, value, flags, NULL, NULL); +} + +int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key) +{ + return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_DELETE, + key, NULL, 0, NULL, NULL); +} + +int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, + void *key, void *value) +{ + return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_LOOKUP, + key, NULL, 0, NULL, value); +} + +int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap, + void *next_key) +{ + return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETFIRST, + NULL, NULL, 0, next_key, NULL); +} + +int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, + void *key, void *next_key) +{ + return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETNEXT, + key, NULL, 0, next_key, NULL); +} + +void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_app_bpf *bpf = app->priv; + unsigned int tag; + + if (unlikely(skb->len < sizeof(struct cmsg_reply_map_simple))) { + cmsg_warn(bpf, "cmsg drop - too short %d!\n", skb->len); + goto err_free; + } + + if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) { + if (!nfp_bpf_event_output(bpf, skb->data, skb->len)) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); + return; + } + + nfp_ctrl_lock(bpf->app->ctrl); + + tag = nfp_bpf_cmsg_get_tag(skb); + if (unlikely(!test_bit(tag, bpf->tag_allocator))) { + cmsg_warn(bpf, "cmsg drop - no one is waiting for tag %u!\n", + tag); + goto err_unlock; + } + + __skb_queue_tail(&bpf->cmsg_replies, skb); + wake_up_interruptible_all(&bpf->cmsg_wq); + + nfp_ctrl_unlock(bpf->app->ctrl); + + return; +err_unlock: + nfp_ctrl_unlock(bpf->app->ctrl); +err_free: + dev_kfree_skb_any(skb); +} + +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + struct nfp_app_bpf *bpf = app->priv; + const struct cmsg_hdr *hdr = data; + + if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) { + cmsg_warn(bpf, "cmsg drop - too short %d!\n", len); + return; + } + + if (hdr->type == CMSG_TYPE_BPF_EVENT) + nfp_bpf_event_output(bpf, data, len); + else + cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n", + hdr->type); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h new file mode 100644 index 000000000..e4f9b7ec8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2017-2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef NFP_BPF_FW_H +#define NFP_BPF_FW_H 1 + +#include <linux/bitops.h> +#include <linux/types.h> + +/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking + * our FW ABI. In that case we will do translation in the driver. + */ +#define NFP_BPF_SCALAR_VALUE 1 +#define NFP_BPF_MAP_VALUE 4 +#define NFP_BPF_STACK 6 +#define NFP_BPF_PACKET_DATA 8 + +enum bpf_cap_tlv_type { + NFP_BPF_CAP_TYPE_FUNC = 1, + NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, + NFP_BPF_CAP_TYPE_MAPS = 3, + NFP_BPF_CAP_TYPE_RANDOM = 4, + NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5, + NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6, +}; + +struct nfp_bpf_cap_tlv_func { + __le32 func_id; + __le32 func_addr; +}; + +struct nfp_bpf_cap_tlv_adjust_head { + __le32 flags; + __le32 off_min; + __le32 off_max; + __le32 guaranteed_sub; + __le32 guaranteed_add; +}; + +#define NFP_BPF_ADJUST_HEAD_NO_META BIT(0) + +struct nfp_bpf_cap_tlv_maps { + __le32 types; + __le32 max_maps; + __le32 max_elems; + __le32 max_key_sz; + __le32 max_val_sz; + __le32 max_elem_sz; +}; + +/* + * Types defined for map related control messages + */ +#define CMSG_MAP_ABI_VERSION 1 + +enum nfp_bpf_cmsg_type { + CMSG_TYPE_MAP_ALLOC = 1, + CMSG_TYPE_MAP_FREE = 2, + CMSG_TYPE_MAP_LOOKUP = 3, + CMSG_TYPE_MAP_UPDATE = 4, + CMSG_TYPE_MAP_DELETE = 5, + CMSG_TYPE_MAP_GETNEXT = 6, + CMSG_TYPE_MAP_GETFIRST = 7, + CMSG_TYPE_BPF_EVENT = 8, + __CMSG_TYPE_MAP_MAX, +}; + +#define CMSG_TYPE_MAP_REPLY_BIT 7 +#define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req)) + +#define CMSG_MAP_KEY_LW 16 +#define CMSG_MAP_VALUE_LW 16 + +enum nfp_bpf_cmsg_status { + CMSG_RC_SUCCESS = 0, + CMSG_RC_ERR_MAP_FD = 1, + CMSG_RC_ERR_MAP_NOENT = 2, + CMSG_RC_ERR_MAP_ERR = 3, + CMSG_RC_ERR_MAP_PARSE = 4, + CMSG_RC_ERR_MAP_EXIST = 5, + CMSG_RC_ERR_MAP_NOMEM = 6, + CMSG_RC_ERR_MAP_E2BIG = 7, +}; + +struct cmsg_hdr { + u8 type; + u8 ver; + __be16 tag; +}; + +struct cmsg_reply_map_simple { + struct cmsg_hdr hdr; + __be32 rc; +}; + +struct cmsg_req_map_alloc_tbl { + struct cmsg_hdr hdr; + __be32 key_size; /* in bytes */ + __be32 value_size; /* in bytes */ + __be32 max_entries; + __be32 map_type; + __be32 map_flags; /* reserved */ +}; + +struct cmsg_reply_map_alloc_tbl { + struct cmsg_reply_map_simple reply_hdr; + __be32 tid; +}; + +struct cmsg_req_map_free_tbl { + struct cmsg_hdr hdr; + __be32 tid; +}; + +struct cmsg_reply_map_free_tbl { + struct cmsg_reply_map_simple reply_hdr; + __be32 count; +}; + +struct cmsg_key_value_pair { + __be32 key[CMSG_MAP_KEY_LW]; + __be32 value[CMSG_MAP_VALUE_LW]; +}; + +struct cmsg_req_map_op { + struct cmsg_hdr hdr; + __be32 tid; + __be32 count; + __be32 flags; + struct cmsg_key_value_pair elem[0]; +}; + +struct cmsg_reply_map_op { + struct cmsg_reply_map_simple reply_hdr; + __be32 count; + __be32 resv; + struct cmsg_key_value_pair elem[0]; +}; + +struct cmsg_bpf_event { + struct cmsg_hdr hdr; + __be32 cpu_id; + __be64 map_ptr; + __be32 data_size; + __be32 pkt_size; + u8 data[0]; +}; +#endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c new file mode 100644 index 000000000..c3ce0fb47 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -0,0 +1,4123 @@ +/* + * Copyright (C) 2016-2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include <linux/bug.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/kernel.h> +#include <linux/pkt_cls.h> +#include <linux/reciprocal_div.h> +#include <linux/unistd.h> + +#include "main.h" +#include "../nfp_asm.h" +#include "../nfp_net_ctrl.h" + +/* --- NFP prog --- */ +/* Foreach "multiple" entries macros provide pos and next<n> pointers. + * It's safe to modify the next pointers (but not pos). + */ +#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos)) + +#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l), \ + next2 = list_next_entry(next, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l && \ + &(nfp_prog)->insns != &next2->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos), \ + next2 = nfp_meta_next(next)) + +static bool +nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.prev != &nfp_prog->insns; +} + +static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) +{ + if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) { + pr_warn("instruction limit reached (%u NFP instructions)\n", + nfp_prog->prog_len); + nfp_prog->error = -ENOSPC; + return; + } + + nfp_prog->prog[nfp_prog->prog_len] = insn; + nfp_prog->prog_len++; +} + +static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) +{ + return nfp_prog->prog_len; +} + +static bool +nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) +{ + /* If there is a recorded error we may have dropped instructions; + * that doesn't have to be due to translator bug, and the translation + * will fail anyway, so just return OK. + */ + if (nfp_prog->error) + return true; + return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); +} + +/* --- Emitters --- */ +static void +__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx, + bool indir) +{ + u64 insn; + + insn = FIELD_PREP(OP_CMD_A_SRC, areg) | + FIELD_PREP(OP_CMD_CTX, ctx) | + FIELD_PREP(OP_CMD_B_SRC, breg) | + FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | + FIELD_PREP(OP_CMD_XFER, xfer) | + FIELD_PREP(OP_CMD_CNT, size) | + FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) | + FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_INDIR, indir) | + FIELD_PREP(OP_CMD_MODE, mode); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); + if (err) { + nfp_prog->error = err; + return; + } + if (reg.swap) { + pr_err("cmd can't swap arguments\n"); + nfp_prog->error = -EFAULT; + return; + } + if (reg.dst_lmextn || reg.src_lmextn) { + pr_err("cmd can't use LMextn\n"); + nfp_prog->error = -EFAULT; + return; + } + + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx, + indir); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) +{ + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false); +} + +static void +emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) +{ + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true); +} + +static void +__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, + enum br_ctx_signal_state css, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BASE | + FIELD_PREP(OP_BR_MASK, mask) | + FIELD_PREP(OP_BR_EV_PIP, ev_pip) | + FIELD_PREP(OP_BR_CSS, css) | + FIELD_PREP(OP_BR_DEFBR, defer) | + FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, + enum nfp_relo_type relo) +{ + if (mask == BR_UNC && defer > 2) { + pr_err("BUG: branch defer out of bounds %d\n", defer); + nfp_prog->error = -EFAULT; + return; + } + + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); +} + +static void +emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) +{ + emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); +} + +static void +__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer, + bool set, bool src_lmextn) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BIT_BASE | + FIELD_PREP(OP_BR_BIT_A_SRC, areg) | + FIELD_PREP(OP_BR_BIT_B_SRC, breg) | + FIELD_PREP(OP_BR_BIT_BV, set) | + FIELD_PREP(OP_BR_BIT_DEFBR, defer) | + FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) | + FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, + u8 defer, bool set, enum nfp_relo_type relo) +{ + struct nfp_insn_re_regs reg; + int err; + + /* NOTE: The bit to test is specified as an rotation amount, such that + * the bit to test will be placed on the MSB of the result when + * doing a rotate right. For bit X, we need right rotate X + 1. + */ + bit += 1; + + err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set, + reg.src_lmextn); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); +} + +static void +emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) +{ + emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL); +} + +static void +__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + enum immed_width width, bool invert, + enum immed_shift shift, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_IMMED_BASE | + FIELD_PREP(OP_IMMED_A_SRC, areg) | + FIELD_PREP(OP_IMMED_B_SRC, breg) | + FIELD_PREP(OP_IMMED_IMM, imm_hi) | + FIELD_PREP(OP_IMMED_WIDTH, width) | + FIELD_PREP(OP_IMMED_INV, invert) | + FIELD_PREP(OP_IMMED_SHIFT, shift) | + FIELD_PREP(OP_IMMED_WR_AB, wr_both) | + FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, + enum immed_width width, bool invert, enum immed_shift shift) +{ + struct nfp_insn_ur_regs reg; + int err; + + if (swreg_type(dst) == NN_REG_IMM) { + nfp_prog->error = -EFAULT; + return; + } + + err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); + if (err) { + nfp_prog->error = err; + return; + } + + /* Use reg.dst when destination is No-Dest. */ + __emit_immed(nfp_prog, + swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, + reg.breg, imm >> 8, width, invert, shift, + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); +} + +static void +__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + enum shf_sc sc, u8 shift, + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { + nfp_prog->error = -EFAULT; + return; + } + + /* NFP shift instruction has something special. If shift direction is + * left then shift amount of 1 to 31 is specified as 32 minus the amount + * to shift. + * + * But no need to do this for indirect shift which has shift amount be + * 0. Even after we do this subtraction, shift amount 0 will be turned + * into 32 which will eventually be encoded the same as 0 because only + * low 5 bits are encoded, but shift amount be 32 will fail the + * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of + * mask range. + */ + if (sc == SHF_SC_L_SHF && shift) + shift = 32 - shift; + + insn = OP_SHF_BASE | + FIELD_PREP(OP_SHF_A_SRC, areg) | + FIELD_PREP(OP_SHF_SC, sc) | + FIELD_PREP(OP_SHF_B_SRC, breg) | + FIELD_PREP(OP_SHF_I8, i8) | + FIELD_PREP(OP_SHF_SW, sw) | + FIELD_PREP(OP_SHF_DST, dst) | + FIELD_PREP(OP_SHF_SHIFT, shift) | + FIELD_PREP(OP_SHF_OP, op) | + FIELD_PREP(OP_SHF_DST_AB, dst_ab) | + FIELD_PREP(OP_SHF_WR_AB, wr_both) | + FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_shf(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(dst, lreg, rreg, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); +} + +static void +emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc) +{ + if (sc == SHF_SC_R_ROT) { + pr_err("indirect shift is not allowed on rotation\n"); + nfp_prog->error = -EFAULT; + return; + } + + emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0); +} + +static void +__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_ALU_BASE | + FIELD_PREP(OP_ALU_A_SRC, areg) | + FIELD_PREP(OP_ALU_B_SRC, breg) | + FIELD_PREP(OP_ALU_DST, dst) | + FIELD_PREP(OP_ALU_SW, swap) | + FIELD_PREP(OP_ALU_OP, op) | + FIELD_PREP(OP_ALU_DST_AB, dst_ab) | + FIELD_PREP(OP_ALU_WR_AB, wr_both) | + FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_alu(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum alu_op op, swreg rreg) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(dst, lreg, rreg, ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_alu(nfp_prog, reg.dst, reg.dst_ab, + reg.areg, op, reg.breg, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); +} + +static void +__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, + enum mul_type type, enum mul_step step, u16 breg, bool swap, + bool wr_both, bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_MUL_BASE | + FIELD_PREP(OP_MUL_A_SRC, areg) | + FIELD_PREP(OP_MUL_B_SRC, breg) | + FIELD_PREP(OP_MUL_STEP, step) | + FIELD_PREP(OP_MUL_DST_AB, dst_ab) | + FIELD_PREP(OP_MUL_SW, swap) | + FIELD_PREP(OP_MUL_TYPE, type) | + FIELD_PREP(OP_MUL_WR_AB, wr_both) | + FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, + enum mul_step step, swreg rreg) +{ + struct nfp_insn_ur_regs reg; + u16 areg; + int err; + + if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { + nfp_prog->error = -EINVAL; + return; + } + + if (step == MUL_LAST || step == MUL_LAST_2) { + /* When type is step and step Number is LAST or LAST2, left + * source is used as destination. + */ + err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®); + areg = reg.dst; + } else { + err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®); + areg = reg.areg; + } + + if (err) { + nfp_prog->error = err; + return; + } + + __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap, + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); +} + +static void +__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, + u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, + bool zero, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_LDF_BASE | + FIELD_PREP(OP_LDF_A_SRC, areg) | + FIELD_PREP(OP_LDF_SC, sc) | + FIELD_PREP(OP_LDF_B_SRC, breg) | + FIELD_PREP(OP_LDF_I8, imm8) | + FIELD_PREP(OP_LDF_SW, swap) | + FIELD_PREP(OP_LDF_ZF, zero) | + FIELD_PREP(OP_LDF_BMASK, bmask) | + FIELD_PREP(OP_LDF_SHF, shift) | + FIELD_PREP(OP_LDF_WR_AB, wr_both) | + FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, + enum shf_sc sc, u8 shift, bool zero) +{ + struct nfp_insn_re_regs reg; + int err; + + /* Note: ld_field is special as it uses one of the src regs as dst */ + err = swreg_to_restricted(dst, dst, src, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, + reg.i8, zero, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); +} + +static void +emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, + enum shf_sc sc, u8 shift) +{ + emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); +} + +static void +__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_LCSR_BASE | + FIELD_PREP(OP_LCSR_A_SRC, areg) | + FIELD_PREP(OP_LCSR_B_SRC, breg) | + FIELD_PREP(OP_LCSR_WRITE, wr) | + FIELD_PREP(OP_LCSR_ADDR, addr / 4) | + FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) +{ + struct nfp_insn_ur_regs reg; + int err; + + /* This instruction takes immeds instead of reg_none() for the ignored + * operand, but we can't encode 2 immeds in one instr with our normal + * swreg infra so if param is an immed, we encode as reg_none() and + * copy the immed to both operands. + */ + if (swreg_type(src) == NN_REG_IMM) { + err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); + reg.breg = reg.areg; + } else { + err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); + } + if (err) { + nfp_prog->error = err; + return; + } + + __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr, + false, reg.src_lmextn); +} + +/* CSR value is read in following immed[gpr, 0] */ +static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr) +{ + __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false); +} + +static void emit_nop(struct nfp_prog *nfp_prog) +{ + __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); +} + +/* --- Wrappers --- */ +static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) +{ + if (!(imm & 0xffff0000)) { + *val = imm; + *shift = IMMED_SHIFT_0B; + } else if (!(imm & 0xff0000ff)) { + *val = imm >> 8; + *shift = IMMED_SHIFT_1B; + } else if (!(imm & 0x0000ffff)) { + *val = imm >> 16; + *shift = IMMED_SHIFT_2B; + } else { + return false; + } + + return true; +} + +static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) +{ + enum immed_shift shift; + u16 val; + + if (pack_immed(imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); + } else if (pack_immed(~imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); + } else { + emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, + false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, + false, IMMED_SHIFT_2B); + } +} + +static void +wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, + enum nfp_relo_type relo) +{ + if (imm > 0xffff) { + pr_err("relocation of a large immediate!\n"); + nfp_prog->error = -EFAULT; + return; + } + emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); +} + +/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) +{ + if (FIELD_FIT(UR_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +/* re_load_imm_any() - encode immediate or use tmp register (restricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) +{ + if (FIELD_FIT(RE_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) +{ + while (count--) + emit_nop(nfp_prog); +} + +static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) +{ + emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); +} + +static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) +{ + wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); +} + +/* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the + * result to @dst from low end. + */ +static void +wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, + u8 offset) +{ + enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; + u8 mask = (1 << field_len) - 1; + + emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); +} + +/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the + * result to @dst from offset, there is no change on the other bits of @dst. + */ +static void +wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, + u8 field_len, u8 offset) +{ + enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE; + u8 mask = ((1 << field_len) - 1) << offset; + + emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8); +} + +static void +addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, + swreg *rega, swreg *regb) +{ + if (offset == reg_imm(0)) { + *rega = reg_a(src_gpr); + *regb = reg_b(src_gpr + 1); + return; + } + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C, + reg_imm(0)); + *rega = imm_a(nfp_prog); + *regb = imm_b(nfp_prog); +} + +/* NFP has Command Push Pull bus which supports bluk memory operations. */ +static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + bool descending_seq = meta->ldst_gather_len < 0; + s16 len = abs(meta->ldst_gather_len); + swreg src_base, off; + bool src_40bit_addr; + unsigned int i; + u8 xfer_num; + + off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE; + src_base = reg_a(meta->insn.src_reg * 2); + xfer_num = round_up(len, 4) / 4; + + if (src_40bit_addr) + addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base, + &off); + + /* Setup PREV_ALU fields to override memory read length. */ + if (len > 32) + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + + /* Memory read from source addr into transfer-in registers. */ + emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, + src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0, + src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32); + + /* Move from transfer-in to transfer-out. */ + for (i = 0; i < xfer_num; i++) + wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); + + off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); + + if (len <= 8) { + /* Use single direct_ref write8. */ + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, len - 1, + CMD_CTX_SWAP); + } else if (len <= 32 && IS_ALIGNED(len, 4)) { + /* Use single direct_ref write32. */ + emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, + CMD_CTX_SWAP); + } else if (len <= 32) { + /* Use single indirect_ref write8. */ + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + len - 1, CMD_CTX_SWAP); + } else if (IS_ALIGNED(len, 4)) { + /* Use single indirect_ref write32. */ + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 1, CMD_CTX_SWAP); + } else if (len <= 40) { + /* Use one direct_ref write32 to write the first 32-bytes, then + * another direct_ref write8 to write the remaining bytes. + */ + emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, 7, + CMD_CTX_SWAP); + + off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, + imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, + reg_a(meta->paired_st->dst_reg * 2), off, len - 33, + CMD_CTX_SWAP); + } else { + /* Use one indirect_ref write32 to write 4-bytes aligned length, + * then another direct_ref write8 to write the remaining bytes. + */ + u8 new_off; + + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 2, CMD_CTX_SWAP); + new_off = meta->paired_st->off + (xfer_num - 1) * 4; + off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, + xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, + (len & 0x3) - 1, CMD_CTX_SWAP); + } + + /* TODO: The following extra load is to make sure data flow be identical + * before and after we do memory copy optimization. + * + * The load destination register is not guaranteed to be dead, so we + * need to make sure it is loaded with the value the same as before + * this transformation. + * + * These extra loads could be removed once we have accurate register + * usage information. + */ + if (descending_seq) + xfer_num = 0; + else if (BPF_SIZE(meta->insn.code) != BPF_DW) + xfer_num = xfer_num - 1; + else + xfer_num = xfer_num - 2; + + switch (BPF_SIZE(meta->insn.code)) { + case BPF_B: + wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num), 1, + IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); + break; + case BPF_H: + wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num), 2, (len & 3) ^ 2); + break; + case BPF_W: + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(0)); + break; + case BPF_DW: + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num)); + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), + reg_xfer(xfer_num + 1)); + break; + } + + if (BPF_SIZE(meta->insn.code) != BPF_DW) + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int +data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) +{ + unsigned int i; + u16 shift, sz; + + /* We load the value from the address indicated in @offset and then + * shift out the data we don't need. Note: this is big endian! + */ + sz = max(size, 4); + shift = size < 4 ? 4 - size : 0; + + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP); + + i = 0; + if (shift) + emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, + reg_xfer(0), SHF_SC_R_SHF, shift * 8); + else + for (; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + + return 0; +} + +static int +data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, + swreg lreg, swreg rreg, int size, enum cmd_mode mode) +{ + unsigned int i; + u8 mask, sz; + + /* We load the value from the address indicated in rreg + lreg and then + * mask out the data we don't need. Note: this is little endian! + */ + sz = max(size, 4); + mask = size < 4 ? GENMASK(size - 1, 0) : 0; + + emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0, + lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP); + + i = 0; + if (mask) + emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, + reg_xfer(0), SHF_SC_NONE, 0, true); + else + for (; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + + return 0; +} + +static int +data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, + u8 dst_gpr, u8 size) +{ + return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset, + size, CMD_MODE_32b); +} + +static int +data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, + u8 dst_gpr, u8 size) +{ + swreg rega, regb; + + addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b); + + return data_ld_host_order(nfp_prog, dst_gpr, rega, regb, + size, CMD_MODE_40b_BA); +} + +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) +{ + swreg tmp_reg; + + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); + + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); + + /* Load data */ + return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); +} + +static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +{ + swreg tmp_reg; + + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); + + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + return data_ld(nfp_prog, tmp_reg, 0, size); +} + +static int +data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u8 src_gpr, u8 size) +{ + unsigned int i; + + for (i = 0; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); + + return 0; +} + +static int +data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u64 imm, u8 size) +{ + wrp_immed(nfp_prog, reg_xfer(0), imm); + if (size == 8) + wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); + + return 0; +} + +typedef int +(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc); + +static int +wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) +{ + bool should_inc = needs_inc && new_gpr && !last; + u32 idx, src_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, reg_both(dst), + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); + return 0; + } + + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + + src_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(lm3 ? 3 : 0, idx); + } else { + reg = imm_a(nfp_prog); + /* If it's not the first part of the load and we start a new GPR + * that means we are loading a second part of the LMEM word into + * a new GPR. IOW we've already looked that LMEM word and + * therefore it has been loaded into imm_a(). + */ + if (first || !new_gpr) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); + + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + + return 0; +} + +static int +wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) +{ + bool should_inc = needs_inc && new_gpr && !last; + u32 idx, dst_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), + reg_b(src)); + return 0; + } + + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + + dst_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(lm3 ? 3 : 0, idx); + } else { + reg = imm_a(nfp_prog); + /* Only first and last LMEM locations are going to need RMW, + * the middle location will be overwritten fully. + */ + if (first || last) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); + + if (new_gpr || last) { + if (idx > RE_REG_LM_IDX_MAX) + wrp_mov(nfp_prog, reg_lm(0, idx), reg); + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + } + + return 0; +} + +static int +mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, + bool clr_gpr, lmem_step step) +{ + s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; + bool first = true, last; + bool needs_inc = false; + swreg stack_off_reg; + u8 prev_gpr = 255; + u32 gpr_byte = 0; + bool lm3 = true; + int ret; + + if (meta->ptr_not_const) { + /* Use of the last encountered ptr_off is OK, they all have + * the same alignment. Depend on low bits of value being + * discarded when written to LMaddr register. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } else if (off + size <= 64) { + /* We can reach bottom 64B with LMaddr0 */ + lm3 = false; + } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { + /* We have to set up a new pointer. If we know the offset + * and the entire access falls into a single 32 byte aligned + * window we won't have to increment the LM pointer. + * The 32 byte alignment is imporant because offset is ORed in + * not added when doing *l$indexN[off]. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), + stack_imm(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + off %= 32; + } else { + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } + if (lm3) { + emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); + /* For size < 4 one slot will be filled by zeroing of upper. */ + wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); + } + + if (clr_gpr && size < 8) + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + + while (size) { + u32 slice_end; + u8 slice_size; + + slice_size = min(size, 4 - gpr_byte); + slice_end = min(off + slice_size, round_up(off + 1, 4)); + slice_size = slice_end - off; + + last = slice_size == size; + + if (needs_inc) + off %= 4; + + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, + first, gpr != prev_gpr, last, lm3, needs_inc); + if (ret) + return ret; + + prev_gpr = gpr; + first = false; + + gpr_byte += slice_size; + if (gpr_byte >= 4) { + gpr_byte -= 4; + gpr++; + } + + size -= slice_size; + off += slice_size; + } + + return 0; +} + +static void +wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) +{ + swreg tmp_reg; + + if (alu_op == ALU_OP_AND) { + if (!imm) + wrp_immed(nfp_prog, reg_both(dst), 0); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_OR) { + if (!~imm) + wrp_immed(nfp_prog, reg_both(dst), ~0U); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_XOR) { + if (!~imm) + emit_alu(nfp_prog, reg_both(dst), reg_none(), + ALU_OP_NOT, reg_b(dst)); + if (!imm || !~imm) + return; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); +} + +static int +wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); + + return 0; +} + +static int +wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + emit_alu(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), alu_op, reg_b(src + 1)); + + return 0; +} + +static int +wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int +wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static void +wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, + enum br_mask br_mask, u16 off) +{ + emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); + emit_br(nfp_prog, br_mask, off, 0); +} + +static int +wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, enum br_mask br_mask) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, + insn->src_reg * 2, br_mask, insn->off); + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); + + return 0; +} + +static const struct jmp_code_map { + enum br_mask br_mask; + bool swap; +} jmp_code_map[] = { + [BPF_JGT >> 4] = { BR_BLO, true }, + [BPF_JGE >> 4] = { BR_BHS, false }, + [BPF_JLT >> 4] = { BR_BLO, false }, + [BPF_JLE >> 4] = { BR_BHS, true }, + [BPF_JSGT >> 4] = { BR_BLT, true }, + [BPF_JSGE >> 4] = { BR_BGE, false }, + [BPF_JSLT >> 4] = { BR_BLT, false }, + [BPF_JSLE >> 4] = { BR_BGE, true }, +}; + +static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta) +{ + unsigned int op; + + op = BPF_OP(meta->insn.code) >> 4; + /* br_mask of 0 is BR_BEQ which we don't use in jump code table */ + if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) || + !jmp_code_map[op].br_mask, + "no code found for jump instruction")) + return NULL; + + return &jmp_code_map[op]; +} + +static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + const struct jmp_code_map *code; + enum alu_op alu_op, carry_op; + u8 reg = insn->dst_reg * 2; + swreg tmp_reg; + + code = nfp_jmp_code_get(meta); + if (!code) + return -EINVAL; + + alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB; + carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + if (!code->swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg)); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!code->swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), carry_op, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, carry_op, reg_a(reg + 1)); + + emit_br(nfp_prog, code->br_mask, insn->off, 0); + + return 0; +} + +static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + const struct jmp_code_map *code; + u8 areg, breg; + + code = nfp_jmp_code_get(meta); + if (!code) + return -EINVAL; + + areg = insn->dst_reg * 2; + breg = insn->src_reg * 2; + + if (code->swap) { + areg ^= breg; + breg ^= areg; + areg ^= breg; + } + + emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + emit_br(nfp_prog, code->br_mask, insn->off, 0); + + return 0; +} + +static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) +{ + emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), + SHF_SC_R_ROT, 16); +} + +static void +wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg, bool gen_high_half) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none()); + if (gen_high_half) + emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2, + reg_none()); + else + wrp_immed(nfp_prog, dst_hi, 0); +} + +static void +wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none()); +} + +static int +wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + bool gen_high_half, bool ropnd_from_reg) +{ + swreg multiplier, multiplicand, dst_hi, dst_lo; + const struct bpf_insn *insn = &meta->insn; + u32 lopnd_max, ropnd_max; + u8 dst_reg; + + dst_reg = insn->dst_reg; + multiplicand = reg_a(dst_reg * 2); + dst_hi = reg_both(dst_reg * 2 + 1); + dst_lo = reg_both(dst_reg * 2); + lopnd_max = meta->umax_dst; + if (ropnd_from_reg) { + multiplier = reg_b(insn->src_reg * 2); + ropnd_max = meta->umax_src; + } else { + u32 imm = insn->imm; + + multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + ropnd_max = imm; + } + if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) + wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier, + gen_high_half); + else + wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier); + + return 0; +} + +static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) +{ + swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); + struct reciprocal_value_adv rvalue; + u8 pre_shift, exp; + swreg magic; + + if (imm > U32_MAX) { + wrp_immed(nfp_prog, dst_both, 0); + return 0; + } + + /* NOTE: because we are using "reciprocal_value_adv" which doesn't + * support "divisor > (1u << 31)", we need to JIT separate NFP sequence + * to handle such case which actually equals to the result of unsigned + * comparison "dst >= imm" which could be calculated using the following + * NFP sequence: + * + * alu[--, dst, -, imm] + * immed[imm, 0] + * alu[dst, imm, +carry, 0] + * + */ + if (imm > 1U << 31) { + swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + + emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b); + wrp_immed(nfp_prog, imm_a(nfp_prog), 0); + emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C, + reg_imm(0)); + return 0; + } + + rvalue = reciprocal_value_adv(imm, 32); + exp = rvalue.exp; + if (rvalue.is_wide_m && !(imm & 1)) { + pre_shift = fls(imm & -imm) - 1; + rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift); + } else { + pre_shift = 0; + } + magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); + if (imm == 1U << exp) { + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, exp); + } else if (rvalue.is_wide_m) { + wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, + magic, true); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, 1); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, rvalue.sh - 1); + } else { + if (pre_shift) + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, pre_shift); + wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, rvalue.sh); + } + + return 0; +} + +static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); + struct nfp_bpf_cap_adjust_head *adjust_head; + u32 ret_einval, end; + + adjust_head = &nfp_prog->bpf->adjust_head; + + /* Optimized version - 5 vs 14 cycles */ + if (nfp_prog->adjust_head_location != UINT_MAX) { + if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n)) + return -EINVAL; + + emit_alu(nfp_prog, pptr_reg(nfp_prog), + reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog)); + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + /* TODO: when adjust head is guaranteed to succeed we can + * also eliminate the following if (r0 == 0) branch. + */ + + return 0; + } + + ret_einval = nfp_prog_current_offset(nfp_prog) + 14; + end = ret_einval + 2; + + /* We need to use a temp because offset is just a part of the pkt ptr */ + emit_alu(nfp_prog, tmp, + reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog)); + + /* Validate result will fit within FW datapath constraints */ + emit_alu(nfp_prog, reg_none(), + tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min)); + emit_br(nfp_prog, BR_BLO, ret_einval, 0); + emit_alu(nfp_prog, reg_none(), + reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp); + emit_br(nfp_prog, BR_BLO, ret_einval, 0); + + /* Validate the length is at least ETH_HLEN */ + emit_alu(nfp_prog, tmp_len, + plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + emit_alu(nfp_prog, reg_none(), + tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN)); + emit_br(nfp_prog, BR_BMI, ret_einval, 0); + + /* Load the ret code */ + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + /* Modify the packet metadata */ + emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); + + /* Skip over the -EINVAL ret code (defer 2) */ + emit_br(nfp_prog, BR_UNC, end, 2); + + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); + + /* return -EINVAL target */ + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) + return -EINVAL; + + wrp_immed(nfp_prog, reg_both(0), -22); + wrp_immed(nfp_prog, reg_both(1), ~0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, end)) + return -EINVAL; + + return 0; +} + +static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_einval, end; + swreg plen, delta; + + BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); + + plen = imm_a(nfp_prog); + delta = reg_a(2 * 2); + + ret_einval = nfp_prog_current_offset(nfp_prog) + 9; + end = nfp_prog_current_offset(nfp_prog) + 11; + + /* Calculate resulting length */ + emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta); + /* delta == 0 is not allowed by the kernel, add must overflow to make + * length smaller. + */ + emit_br(nfp_prog, BR_BCC, ret_einval, 0); + + /* if (new_len < 14) then -EINVAL */ + emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN)); + emit_br(nfp_prog, BR_BMI, ret_einval, 0); + + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_ADD, delta); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_ADD, delta); + + emit_br(nfp_prog, BR_UNC, end, 2); + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) + return -EINVAL; + + wrp_immed(nfp_prog, reg_both(0), -22); + wrp_immed(nfp_prog, reg_both(1), ~0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, end)) + return -EINVAL; + + return 0; +} + +static int +map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + bool load_lm_ptr; + u32 ret_tgt; + s64 lm_off; + + /* We only have to reload LM0 if the key is not at start of stack */ + lm_off = nfp_prog->stack_depth; + lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; + load_lm_ptr = meta->arg2.var_off || lm_off; + + /* Set LM0 to start of key */ + if (load_lm_ptr) + emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); + if (meta->func_id == BPF_FUNC_map_update_elem) + emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, + 2, RELO_BR_HELPER); + ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; + + /* Load map ID into A0 */ + wrp_mov(nfp_prog, reg_a(0), reg_a(2)); + + /* Load the return address into B0 */ + wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) + return -EINVAL; + + /* Reset the LM0 pointer */ + if (!load_lm_ptr) + return 0; + + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); + wrp_nops(nfp_prog, 3); + + return 0; +} + +static int +nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM); + /* CSR value is read in following immed[gpr, 0] */ + emit_immed(nfp_prog, reg_both(0), 0, + IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, reg_both(1), 0, + IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + return 0; +} + +static int +nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + swreg ptr_type; + u32 ret_tgt; + + ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog)); + + ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, + 2, RELO_BR_HELPER); + + /* Load ptr type into A1 */ + wrp_mov(nfp_prog, reg_a(1), ptr_type); + + /* Load the return address into B0 */ + wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) + return -EINVAL; + + return 0; +} + +static int +nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 jmp_tgt; + + jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5; + + /* Make sure the queue id fits into FW field */ + emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2), + ALU_OP_AND_NOT_B, reg_imm(0xff)); + emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2); + + /* Set the 'queue selected' bit and the queue value */ + emit_shf(nfp_prog, pv_qsel_set(nfp_prog), + pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1), + SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT); + emit_ld_field(nfp_prog, + pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2), + SHF_SC_NONE, 0); + /* Delay slots end here, we will jump over next instruction if queue + * value fits into the field. + */ + emit_ld_field(nfp_prog, + pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX), + SHF_SC_NONE, 0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt)) + return -EINVAL; + + return 0; +} + +/* --- Callbacks --- */ +static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + u8 src = insn->src_reg * 2; + + if (insn->src_reg == BPF_REG_10) { + swreg stack_depth_reg; + + stack_depth_reg = ur_load_imm_any(nfp_prog, + nfp_prog->stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), + stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else { + wrp_reg_mov(nfp_prog, dst, src); + wrp_reg_mov(nfp_prog, dst + 1, src + 1); + } + + return 0; +} + +static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u64 imm = meta->insn.imm; /* sign extend */ + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); + + return 0; +} + +static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); +} + +static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_ADD, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); + + return 0; +} + +static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_SUB, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); + + return 0; +} + +static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, true); +} + +static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, false); +} + +static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm); +} + +static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + /* NOTE: verifier hook has rejected cases for which verifier doesn't + * know whether the source operand is constant or not. + */ + return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src); +} + +static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), + ALU_OP_SUB, reg_b(insn->dst_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), + ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); + + return 0; +} + +/* Pseudo code: + * if shift_amt >= 32 + * dst_high = dst_low << shift_amt[4:0] + * dst_low = 0; + * else + * dst_high = (dst_high, dst_low) >> (32 - shift_amt) + * dst_low = dst_low << shift_amt + * + * The indirect shift will use the same logic at runtime. + */ +static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), + SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, + 32 - shift_amt); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt); + } else if (shift_amt == 32) { + wrp_reg_mov(nfp_prog, dst + 1, dst); + wrp_immed(nfp_prog, reg_both(dst), 0); + } else if (shift_amt > 32) { + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt - 32); + wrp_immed(nfp_prog, reg_both(dst), 0); + } + + return 0; +} + +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shl_imm64(nfp_prog, dst, insn->imm); +} + +static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB, + reg_b(src)); + emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF); +} + +/* NOTE: for indirect left shift, HIGH part should be calculated first. */ +static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF); +} + +static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + shl_reg64_lt32_high(nfp_prog, dst, src); + shl_reg64_lt32_low(nfp_prog, dst, src); +} + +static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF); + wrp_immed(nfp_prog, reg_both(dst), 0); +} + +static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shl_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + shl_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + shl_reg64_ge32(nfp_prog, dst, src); + } else { + /* Generate different instruction sequences depending on runtime + * value of shift amount. + */ + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 7; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + + shl_reg64_lt32_high(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* shl_reg64_lt32_low packed in delay slot. */ + shl_reg64_lt32_low(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + shl_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +/* Pseudo code: + * if shift_amt >= 32 + * dst_high = 0; + * dst_low = dst_high >> shift_amt[4:0] + * else + * dst_high = dst_high >> shift_amt + * dst_low = (dst_high, dst_low) >> shift_amt + * + * The indirect shift will use the same logic at runtime. + */ +static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF, shift_amt); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); + } else if (shift_amt == 32) { + wrp_reg_mov(nfp_prog, dst, dst + 1); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else if (shift_amt > 32) { + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } + + return 0; +} + +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shr_imm64(nfp_prog, dst, insn->imm); +} + +/* NOTE: for indirect right shift, LOW part should be calculated first. */ +static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF); +} + +static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF); +} + +static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + shr_reg64_lt32_low(nfp_prog, dst, src); + shr_reg64_lt32_high(nfp_prog, dst, src); +} + +static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); +} + +static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shr_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + shr_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + shr_reg64_ge32(nfp_prog, dst, src); + } else { + /* Generate different instruction sequences depending on runtime + * value of shift amount. + */ + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + shr_reg64_lt32_low(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* shr_reg64_lt32_high packed in delay slot. */ + shr_reg64_lt32_high(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + shr_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +/* Code logic is the same as __shr_imm64 except ashr requires signedness bit + * told through PREV_ALU result. + */ +static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF, shift_amt); + /* Set signedness bit. */ + emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); + } else if (shift_amt == 32) { + /* NOTE: this also helps setting signedness bit. */ + wrp_reg_mov(nfp_prog, dst, dst + 1); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); + } else if (shift_amt > 32) { + emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); + } + + return 0; +} + +static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __ashr_imm64(nfp_prog, dst, insn->imm); +} + +static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + /* NOTE: the first insn will set both indirect shift amount (source A) + * and signedness bit (MSB of result). + */ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF); +} + +static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + /* NOTE: it is the same as logic shift because we don't need to shift in + * signedness bit when the shift amount is less than 32. + */ + return shr_reg64_lt32_low(nfp_prog, dst, src); +} + +static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + ashr_reg64_lt32_low(nfp_prog, dst, src); + ashr_reg64_lt32_high(nfp_prog, dst, src); +} + +static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); +} + +/* Like ashr_imm64, but need to use indirect shift. */ +static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __ashr_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + ashr_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + ashr_reg64_ge32(nfp_prog, dst, src); + } else { + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + ashr_reg64_lt32_low(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* ashr_reg64_lt32_high packed in delay slot. */ + ashr_reg64_lt32_high(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + ashr_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR); +} + +static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND); +} + +static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR); +} + +static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); +} + +static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD); +} + +static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); +} + +static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB); +} + +static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, true); +} + +static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, false); +} + +static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_reg64(nfp_prog, meta); +} + +static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_imm64(nfp_prog, meta); +} + +static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u8 dst = meta->insn.dst_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (!insn->imm) + return 1; /* TODO: zero shift means indirect */ + + emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), + reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), + SHF_SC_L_SHF, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 gpr = insn->dst_reg * 2; + + switch (insn->imm) { + case 16: + emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), + SHF_SC_R_SHF, 16); + + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 32: + wrp_end32(nfp_prog, reg_a(gpr), gpr); + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 64: + wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); + + wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); + wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); + break; + } + + return 0; +} + +static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + struct nfp_insn_meta *prev = nfp_meta_prev(meta); + u32 imm_lo, imm_hi; + u8 dst; + + dst = prev->insn.dst_reg * 2; + imm_lo = prev->insn.imm; + imm_hi = meta->insn.imm; + + wrp_immed(nfp_prog, reg_both(dst), imm_lo); + + /* mov is always 1 insn, load imm may be two, so try to use mov */ + if (imm_hi == imm_lo) + wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); + else + wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); + + return 0; +} + +static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + meta->double_cb = imm_ld8_part2; + return 0; +} + +static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 1); +} + +static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 2); +} + +static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 4); +} + +static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, 1); +} + +static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, 2); +} + +static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, 4); +} + +static int +mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off) +{ + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.dst_reg * 2, meta->insn.src_reg * 2, + true, wrp_lmem_load); +} + +static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) +{ + swreg dst = reg_both(meta->insn.dst_reg * 2); + + switch (meta->insn.off) { + case offsetof(struct __sk_buff, len): + if (size != FIELD_SIZEOF(struct __sk_buff, len)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); + break; + case offsetof(struct __sk_buff, data): + if (size != FIELD_SIZEOF(struct __sk_buff, data)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct __sk_buff, data_end): + if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) + return -EOPNOTSUPP; + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); + break; + default: + return -EOPNOTSUPP; + } + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) +{ + swreg dst = reg_both(meta->insn.dst_reg * 2); + + switch (meta->insn.off) { + case offsetof(struct xdp_md, data): + if (size != FIELD_SIZEOF(struct xdp_md, data)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct xdp_md, data_end): + if (size != FIELD_SIZEOF(struct xdp_md, data_end)) + return -EOPNOTSUPP; + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); + break; + default: + return -EOPNOTSUPP; + } + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int +mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg tmp_reg; + + tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2, + tmp_reg, meta->insn.dst_reg * 2, size); +} + +static int +mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg tmp_reg; + + tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2, + tmp_reg, meta->insn.dst_reg * 2, size); +} + +static void +mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta) +{ + s16 range_start = meta->pkt_cache.range_start; + s16 range_end = meta->pkt_cache.range_end; + swreg src_base, off; + u8 xfer_num, len; + bool indir; + + off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog)); + src_base = reg_a(meta->insn.src_reg * 2); + len = range_end - range_start; + xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH; + + indir = len > 8 * REG_WIDTH; + /* Setup PREV_ALU for indirect mode. */ + if (indir) + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + + /* Cache memory into transfer-in registers. */ + emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, + off, xfer_num - 1, CMD_CTX_SWAP, indir); +} + +static int +mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + unsigned int size) +{ + s16 range_start = meta->pkt_cache.range_start; + s16 insn_off = meta->insn.off - range_start; + swreg dst_lo, dst_hi, src_lo, src_mid; + u8 dst_gpr = meta->insn.dst_reg * 2; + u8 len_lo = size, len_mid = 0; + u8 idx = insn_off / REG_WIDTH; + u8 off = insn_off % REG_WIDTH; + + dst_hi = reg_both(dst_gpr + 1); + dst_lo = reg_both(dst_gpr); + src_lo = reg_xfer(idx); + + /* The read length could involve as many as three registers. */ + if (size > REG_WIDTH - off) { + /* Calculate the part in the second register. */ + len_lo = REG_WIDTH - off; + len_mid = size - len_lo; + + /* Calculate the part in the third register. */ + if (size > 2 * REG_WIDTH - off) + len_mid = REG_WIDTH; + } + + wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); + + if (!len_mid) { + wrp_immed(nfp_prog, dst_hi, 0); + return 0; + } + + src_mid = reg_xfer(idx + 1); + + if (size <= REG_WIDTH) { + wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); + wrp_immed(nfp_prog, dst_hi, 0); + } else { + swreg src_hi = reg_xfer(idx + 2); + + wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, + REG_WIDTH - len_lo, len_lo); + wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo, + REG_WIDTH - len_lo); + wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo, + len_lo); + } + + return 0; +} + +static int +mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg dst_lo, dst_hi, src_lo; + u8 dst_gpr, idx; + + idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH; + dst_gpr = meta->insn.dst_reg * 2; + dst_hi = reg_both(dst_gpr + 1); + dst_lo = reg_both(dst_gpr); + src_lo = reg_xfer(idx); + + if (size < REG_WIDTH) { + wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); + wrp_immed(nfp_prog, dst_hi, 0); + } else if (size == REG_WIDTH) { + wrp_mov(nfp_prog, dst_lo, src_lo); + wrp_immed(nfp_prog, dst_hi, 0); + } else { + swreg src_hi = reg_xfer(idx + 1); + + wrp_mov(nfp_prog, dst_lo, src_lo); + wrp_mov(nfp_prog, dst_hi, src_hi); + } + + return 0; +} + +static int +mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, unsigned int size) +{ + u8 off = meta->insn.off - meta->pkt_cache.range_start; + + if (IS_ALIGNED(off, REG_WIDTH)) + return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size); + + return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size); +} + +static int +mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ldst_gather_len) + return nfp_cpp_memcpy(nfp_prog, meta); + + if (meta->ptr.type == PTR_TO_CTX) { + if (nfp_prog->type == BPF_PROG_TYPE_XDP) + return mem_ldx_xdp(nfp_prog, meta, size); + else + return mem_ldx_skb(nfp_prog, meta, size); + } + + if (meta->ptr.type == PTR_TO_PACKET) { + if (meta->pkt_cache.range_end) { + if (meta->pkt_cache.do_init) + mem_ldx_data_init_pktcache(nfp_prog, meta); + + return mem_ldx_data_from_pktcache(nfp_prog, meta, size); + } else { + return mem_ldx_data(nfp_prog, meta, size); + } + } + + if (meta->ptr.type == PTR_TO_STACK) + return mem_ldx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); + + if (meta->ptr.type == PTR_TO_MAP_VALUE) + return mem_ldx_emem(nfp_prog, meta, size); + + return -EOPNOTSUPP; +} + +static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 1); +} + +static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 2); +} + +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 4); +} + +static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 8); +} + +static int +mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + u64 imm = meta->insn.imm; /* sign extend */ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + imm, size); +} + +static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_PACKET) + return mem_st_data(nfp_prog, meta, size); + + return -EOPNOTSUPP; +} + +static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 1); +} + +static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 2); +} + +static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 4); +} + +static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 8); +} + +static int +mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + meta->insn.src_reg * 2, size); +} + +static int +mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off) +{ + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.src_reg * 2, meta->insn.dst_reg * 2, + false, wrp_lmem_store); +} + +static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + switch (meta->insn.off) { + case offsetof(struct xdp_md, rx_queue_index): + return nfp_queue_select(nfp_prog, meta); + } + + WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */ + return -EOPNOTSUPP; +} + +static int +mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_PACKET) + return mem_stx_data(nfp_prog, meta, size); + + if (meta->ptr.type == PTR_TO_STACK) + return mem_stx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); + + return -EOPNOTSUPP; +} + +static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 1); +} + +static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 2); +} + +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->ptr.type == PTR_TO_CTX) + if (nfp_prog->type == BPF_PROG_TYPE_XDP) + return mem_stx_xdp(nfp_prog, meta); + return mem_stx(nfp_prog, meta, 4); +} + +static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 8); +} + +static int +mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) +{ + u8 dst_gpr = meta->insn.dst_reg * 2; + u8 src_gpr = meta->insn.src_reg * 2; + unsigned int full_add, out; + swreg addra, addrb, off; + + off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + /* We can fit 16 bits into command immediate, if we know the immediate + * is guaranteed to either always or never fit into 16 bit we only + * generate code to handle that particular case, otherwise generate + * code for both. + */ + out = nfp_prog_current_offset(nfp_prog); + full_add = nfp_prog_current_offset(nfp_prog); + + if (meta->insn.off) { + out += 2; + full_add += 2; + } + if (meta->xadd_maybe_16bit) { + out += 3; + full_add += 3; + } + if (meta->xadd_over_16bit) + out += 2 + is64; + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + out += 5; + full_add += 5; + } + + /* Generate the branch for choosing add_imm vs add */ + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + swreg max_imm = imm_a(nfp_prog); + + wrp_immed(nfp_prog, max_imm, 0xffff); + emit_alu(nfp_prog, reg_none(), + max_imm, ALU_OP_SUB, reg_b(src_gpr)); + emit_alu(nfp_prog, reg_none(), + reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1)); + emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0); + /* defer for add */ + } + + /* If insn has an offset add to the address */ + if (!meta->insn.off) { + addra = reg_a(dst_gpr); + addrb = reg_b(dst_gpr + 1); + } else { + emit_alu(nfp_prog, imma_a(nfp_prog), + reg_a(dst_gpr), ALU_OP_ADD, off); + emit_alu(nfp_prog, imma_b(nfp_prog), + reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0)); + addra = imma_a(nfp_prog); + addrb = imma_b(nfp_prog); + } + + /* Generate the add_imm if 16 bits are possible */ + if (meta->xadd_maybe_16bit) { + swreg prev_alu = imm_a(nfp_prog); + + wrp_immed(nfp_prog, prev_alu, + FIELD_PREP(CMD_OVE_DATA, 2) | + CMD_OVE_LEN | + FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); + wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); + emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, + addra, addrb, 0, CMD_CTX_NO_SWAP); + + if (meta->xadd_over_16bit) + emit_br(nfp_prog, BR_UNC, out, 0); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, full_add)) + return -EINVAL; + + /* Generate the add if 16 bits are not guaranteed */ + if (meta->xadd_over_16bit) { + emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0, + addra, addrb, is64 << 2, + is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1); + + wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr)); + if (is64) + wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1)); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, out)) + return -EINVAL; + + return 0; +} + +static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_xadd(nfp_prog, meta, false); +} + +static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_xadd(nfp_prog, meta, true); +} + +static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); + + return 0; +} + +static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + swreg or1, or2, tmp_reg; + + or1 = reg_a(insn->dst_reg * 2); + or2 = reg_b(insn->dst_reg * 2 + 1); + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_a(nfp_prog), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + or1 = imm_a(nfp_prog); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + or2 = imm_b(nfp_prog); + } + + emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + swreg tmp_reg; + + if (!imm) { + meta->skip = true; + return 0; + } + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + return 0; +} + +static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + swreg tmp_reg; + + if (!imm) { + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + return 0; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), + ALU_OP_XOR, reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), + ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), + imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); +} + +static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); +} + +static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + switch (meta->insn.imm) { + case BPF_FUNC_xdp_adjust_head: + return adjust_head(nfp_prog, meta); + case BPF_FUNC_xdp_adjust_tail: + return adjust_tail(nfp_prog, meta); + case BPF_FUNC_map_lookup_elem: + case BPF_FUNC_map_update_elem: + case BPF_FUNC_map_delete_elem: + return map_call_stack_common(nfp_prog, meta); + case BPF_FUNC_get_prandom_u32: + return nfp_get_prandom_u32(nfp_prog, meta); + case BPF_FUNC_perf_event_output: + return nfp_perf_event_output(nfp_prog, meta); + default: + WARN_ONCE(1, "verifier allowed unsupported function\n"); + return -EOPNOTSUPP; + } +} + +static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); + + return 0; +} + +static const instr_cb_t instr_cb[256] = { + [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, + [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, + [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, + [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, + [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, + [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, + [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, + [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, + [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, + [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, + [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, + [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, + [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, + [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64, + [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64, + [BPF_ALU64 | BPF_NEG] = neg_reg64, + [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, + [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64, + [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64, + [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64, + [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, + [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, + [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, + [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, + [BPF_ALU | BPF_AND | BPF_X] = and_reg, + [BPF_ALU | BPF_AND | BPF_K] = and_imm, + [BPF_ALU | BPF_OR | BPF_X] = or_reg, + [BPF_ALU | BPF_OR | BPF_K] = or_imm, + [BPF_ALU | BPF_ADD | BPF_X] = add_reg, + [BPF_ALU | BPF_ADD | BPF_K] = add_imm, + [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, + [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, + [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, + [BPF_ALU | BPF_DIV | BPF_X] = div_reg, + [BPF_ALU | BPF_DIV | BPF_K] = div_imm, + [BPF_ALU | BPF_NEG] = neg_reg, + [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_ALU | BPF_END | BPF_X] = end_reg32, + [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, + [BPF_LD | BPF_ABS | BPF_B] = data_ld1, + [BPF_LD | BPF_ABS | BPF_H] = data_ld2, + [BPF_LD | BPF_ABS | BPF_W] = data_ld4, + [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, + [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, + [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, + [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, + [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, + [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, + [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, + [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, + [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, + [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4, + [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8, + [BPF_ST | BPF_MEM | BPF_B] = mem_st1, + [BPF_ST | BPF_MEM | BPF_H] = mem_st2, + [BPF_ST | BPF_MEM | BPF_W] = mem_st4, + [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, + [BPF_JMP | BPF_JA | BPF_K] = jump, + [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, + [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, + [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, + [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP | BPF_CALL] = call, + [BPF_JMP | BPF_EXIT] = goto_out, +}; + +/* --- Assembler logic --- */ +static int nfp_fixup_branches(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *jmp_dst; + u32 idx, br_idx; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + if (meta->insn.code == (BPF_JMP | BPF_CALL)) + continue; + if (BPF_CLASS(meta->insn.code) != BPF_JMP) + continue; + + if (list_is_last(&meta->l, &nfp_prog->insns)) + br_idx = nfp_prog->last_bpf_off; + else + br_idx = list_next_entry(meta, l)->off - 1; + + if (!nfp_is_br(nfp_prog->prog[br_idx])) { + pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", + br_idx, meta->insn.code, nfp_prog->prog[br_idx]); + return -ELOOP; + } + /* Leave special branches for later */ + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL) + continue; + + if (!meta->jmp_dst) { + pr_err("Non-exit jump doesn't have destination info recorded!!\n"); + return -ELOOP; + } + + jmp_dst = meta->jmp_dst; + + if (jmp_dst->skip) { + pr_err("Branch landing on removed instruction!!\n"); + return -ELOOP; + } + + for (idx = meta->off; idx <= br_idx; idx++) { + if (!nfp_is_br(nfp_prog->prog[idx])) + continue; + br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); + } + } + + return 0; +} + +static void nfp_intro(struct nfp_prog *nfp_prog) +{ + wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); +} + +static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) +{ + /* TC direct-action mode: + * 0,1 ok NOT SUPPORTED[1] + * 2 drop 0x22 -> drop, count as stat1 + * 4,5 nuke 0x02 -> drop + * 7 redir 0x44 -> redir, count as stat2 + * * unspec 0x11 -> pass, count as stat0 + * + * [1] We can't support OK and RECLASSIFY because we can't tell TC + * the exact decision made. We are forced to support UNSPEC + * to handle aborts so that's the only one we handle for passing + * packets up the stack. + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 7 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + + wrp_immed(nfp_prog, reg_b(2), 0x41221211); + wrp_immed(nfp_prog, reg_b(3), 0x41001211); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_a(2), + reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + emit_shf(nfp_prog, reg_b(2), + reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + +static void nfp_outro_xdp(struct nfp_prog *nfp_prog) +{ + /* XDP return codes: + * 0 aborted 0x82 -> drop, count as stat3 + * 1 drop 0x22 -> drop, count as stat1 + * 2 pass 0x11 -> pass, count as stat0 + * 3 tx 0x44 -> redir, count as stat2 + * * unknown 0x82 -> drop, count as stat3 + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 3 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + + wrp_immed(nfp_prog, reg_b(2), 0x44112282); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); + + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + +static void nfp_outro(struct nfp_prog *nfp_prog) +{ + switch (nfp_prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + nfp_outro_tc_da(nfp_prog); + break; + case BPF_PROG_TYPE_XDP: + nfp_outro_xdp(nfp_prog); + break; + default: + WARN_ON(1); + } +} + +static int nfp_translate(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int err; + + nfp_intro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + instr_cb_t cb = instr_cb[meta->insn.code]; + + meta->off = nfp_prog_current_offset(nfp_prog); + + if (meta->skip) { + nfp_prog->n_translated++; + continue; + } + + if (nfp_meta_has_prev(nfp_prog, meta) && + nfp_meta_prev(meta)->double_cb) + cb = nfp_meta_prev(meta)->double_cb; + if (!cb) + return -ENOENT; + err = cb(nfp_prog, meta); + if (err) + return err; + if (nfp_prog->error) + return nfp_prog->error; + + nfp_prog->n_translated++; + } + + nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; + + nfp_outro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); + if (nfp_prog->error) + return nfp_prog->error; + + return nfp_fixup_branches(nfp_prog); +} + +/* --- Optimizations --- */ +static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + /* Programs converted from cBPF start with register xoring */ + if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && + insn.src_reg == insn.dst_reg) + continue; + + /* Programs start with R6 = R1 but we ignore the skb pointer */ + if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn.src_reg == 1 && insn.dst_reg == 6) + meta->skip = true; + + /* Return as soon as something doesn't match */ + if (!meta->skip) + return; + } +} + +/* abs(insn.imm) will fit better into unrestricted reg immediate - + * convert add/sub of a negative number into a sub/add of a positive one. + */ +static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + if (meta->skip) + continue; + + if (BPF_CLASS(insn.code) != BPF_ALU && + BPF_CLASS(insn.code) != BPF_ALU64 && + BPF_CLASS(insn.code) != BPF_JMP) + continue; + if (BPF_SRC(insn.code) != BPF_K) + continue; + if (insn.imm >= 0) + continue; + + if (BPF_CLASS(insn.code) == BPF_JMP) { + switch (BPF_OP(insn.code)) { + case BPF_JGE: + case BPF_JSGE: + case BPF_JLT: + case BPF_JSLT: + meta->jump_neg_op = true; + break; + default: + continue; + } + } else { + if (BPF_OP(insn.code) == BPF_ADD) + insn.code = BPF_CLASS(insn.code) | BPF_SUB; + else if (BPF_OP(insn.code) == BPF_SUB) + insn.code = BPF_CLASS(insn.code) | BPF_ADD; + else + continue; + + meta->insn.code = insn.code | BPF_K; + } + + meta->insn.imm = -insn.imm; + } +} + +/* Remove masking after load since our load guarantees this is not needed */ +static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + const s32 exp_mask[] = { + [BPF_B] = 0x000000ffU, + [BPF_H] = 0x0000ffffU, + [BPF_W] = 0xffffffffU, + }; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn insn, next; + + insn = meta1->insn; + next = meta2->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + + if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) + continue; + + if (!exp_mask[BPF_SIZE(insn.code)]) + continue; + if (exp_mask[BPF_SIZE(insn.code)] != next.imm) + continue; + + if (next.src_reg || next.dst_reg) + continue; + + if (meta2->flags & FLAG_INSN_IS_JUMP_DST) + continue; + + meta2->skip = true; + } +} + +static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2, *meta3; + + nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { + struct bpf_insn insn, next1, next2; + + insn = meta1->insn; + next1 = meta2->insn; + next2 = meta3->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + if (BPF_SIZE(insn.code) != BPF_W) + continue; + + if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && + !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) + continue; + + if (next1.src_reg || next1.dst_reg || + next2.src_reg || next2.dst_reg) + continue; + + if (next1.imm != 0x20 || next2.imm != 0x20) + continue; + + if (meta2->flags & FLAG_INSN_IS_JUMP_DST || + meta3->flags & FLAG_INSN_IS_JUMP_DST) + continue; + + meta2->skip = true; + meta3->skip = true; + } +} + +/* load/store pair that forms memory copy sould look like the following: + * + * ld_width R, [addr_src + offset_src] + * st_width [addr_dest + offset_dest], R + * + * The destination register of load and source register of store should + * be the same, load and store should also perform at the same width. + * If either of addr_src or addr_dest is stack pointer, we don't do the + * CPP optimization as stack is modelled by registers on NFP. + */ +static bool +curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, + struct nfp_insn_meta *st_meta) +{ + struct bpf_insn *ld = &ld_meta->insn; + struct bpf_insn *st = &st_meta->insn; + + if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) + return false; + + if (ld_meta->ptr.type != PTR_TO_PACKET && + ld_meta->ptr.type != PTR_TO_MAP_VALUE) + return false; + + if (st_meta->ptr.type != PTR_TO_PACKET) + return false; + + if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) + return false; + + if (ld->dst_reg != st->src_reg) + return false; + + /* There is jump to the store insn in this pair. */ + if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) + return false; + + return true; +} + +/* Currently, we only support chaining load/store pairs if: + * + * - Their address base registers are the same. + * - Their address offsets are in the same order. + * - They operate at the same memory width. + * - There is no jump into the middle of them. + */ +static bool +curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, + struct nfp_insn_meta *st_meta, + struct bpf_insn *prev_ld, + struct bpf_insn *prev_st) +{ + u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; + struct bpf_insn *ld = &ld_meta->insn; + struct bpf_insn *st = &st_meta->insn; + s16 prev_ld_off, prev_st_off; + + /* This pair is the start pair. */ + if (!prev_ld) + return true; + + prev_size = BPF_LDST_BYTES(prev_ld); + curr_size = BPF_LDST_BYTES(ld); + prev_ld_base = prev_ld->src_reg; + prev_st_base = prev_st->dst_reg; + prev_ld_dst = prev_ld->dst_reg; + prev_ld_off = prev_ld->off; + prev_st_off = prev_st->off; + + if (ld->dst_reg != prev_ld_dst) + return false; + + if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) + return false; + + if (curr_size != prev_size) + return false; + + /* There is jump to the head of this pair. */ + if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) + return false; + + /* Both in ascending order. */ + if (prev_ld_off + prev_size == ld->off && + prev_st_off + prev_size == st->off) + return true; + + /* Both in descending order. */ + if (ld->off + curr_size == prev_ld_off && + st->off + curr_size == prev_st_off) + return true; + + return false; +} + +/* Return TRUE if cross memory access happens. Cross memory access means + * store area is overlapping with load area that a later load might load + * the value from previous store, for this case we can't treat the sequence + * as an memory copy. + */ +static bool +cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, + struct nfp_insn_meta *head_st_meta) +{ + s16 head_ld_off, head_st_off, ld_off; + + /* Different pointer types does not overlap. */ + if (head_ld_meta->ptr.type != head_st_meta->ptr.type) + return false; + + /* load and store are both PTR_TO_PACKET, check ID info. */ + if (head_ld_meta->ptr.id != head_st_meta->ptr.id) + return true; + + /* Canonicalize the offsets. Turn all of them against the original + * base register. + */ + head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; + head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; + ld_off = ld->off + head_ld_meta->ptr.off; + + /* Ascending order cross. */ + if (ld_off > head_ld_off && + head_ld_off < head_st_off && ld_off >= head_st_off) + return true; + + /* Descending order cross. */ + if (ld_off < head_ld_off && + head_ld_off > head_st_off && ld_off <= head_st_off) + return true; + + return false; +} + +/* This pass try to identify the following instructoin sequences. + * + * load R, [regA + offA] + * store [regB + offB], R + * load R, [regA + offA + const_imm_A] + * store [regB + offB + const_imm_A], R + * load R, [regA + offA + 2 * const_imm_A] + * store [regB + offB + 2 * const_imm_A], R + * ... + * + * Above sequence is typically generated by compiler when lowering + * memcpy. NFP prefer using CPP instructions to accelerate it. + */ +static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *head_ld_meta = NULL; + struct nfp_insn_meta *head_st_meta = NULL; + struct nfp_insn_meta *meta1, *meta2; + struct bpf_insn *prev_ld = NULL; + struct bpf_insn *prev_st = NULL; + u8 count = 0; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn *ld = &meta1->insn; + struct bpf_insn *st = &meta2->insn; + + /* Reset record status if any of the following if true: + * - The current insn pair is not load/store. + * - The load/store pair doesn't chain with previous one. + * - The chained load/store pair crossed with previous pair. + * - The chained load/store pair has a total size of memory + * copy beyond 128 bytes which is the maximum length a + * single NFP CPP command can transfer. + */ + if (!curr_pair_is_memcpy(meta1, meta2) || + !curr_pair_chain_with_previous(meta1, meta2, prev_ld, + prev_st) || + (head_ld_meta && (cross_mem_access(ld, head_ld_meta, + head_st_meta) || + head_ld_meta->ldst_gather_len >= 128))) { + if (!count) + continue; + + if (count > 1) { + s16 prev_ld_off = prev_ld->off; + s16 prev_st_off = prev_st->off; + s16 head_ld_off = head_ld_meta->insn.off; + + if (prev_ld_off < head_ld_off) { + head_ld_meta->insn.off = prev_ld_off; + head_st_meta->insn.off = prev_st_off; + head_ld_meta->ldst_gather_len = + -head_ld_meta->ldst_gather_len; + } + + head_ld_meta->paired_st = &head_st_meta->insn; + head_st_meta->skip = true; + } else { + head_ld_meta->ldst_gather_len = 0; + } + + /* If the chain is ended by an load/store pair then this + * could serve as the new head of the the next chain. + */ + if (curr_pair_is_memcpy(meta1, meta2)) { + head_ld_meta = meta1; + head_st_meta = meta2; + head_ld_meta->ldst_gather_len = + BPF_LDST_BYTES(ld); + meta1 = nfp_meta_next(meta1); + meta2 = nfp_meta_next(meta2); + prev_ld = ld; + prev_st = st; + count = 1; + } else { + head_ld_meta = NULL; + head_st_meta = NULL; + prev_ld = NULL; + prev_st = NULL; + count = 0; + } + + continue; + } + + if (!head_ld_meta) { + head_ld_meta = meta1; + head_st_meta = meta2; + } else { + meta1->skip = true; + meta2->skip = true; + } + + head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); + meta1 = nfp_meta_next(meta1); + meta2 = nfp_meta_next(meta2); + prev_ld = ld; + prev_st = st; + count++; + } +} + +static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *range_node = NULL; + s16 range_start = 0, range_end = 0; + bool cache_avail = false; + struct bpf_insn *insn; + s32 range_ptr_off = 0; + u32 range_ptr_id = 0; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->flags & FLAG_INSN_IS_JUMP_DST) + cache_avail = false; + + if (meta->skip) + continue; + + insn = &meta->insn; + + if (is_mbpf_store_pkt(meta) || + insn->code == (BPF_JMP | BPF_CALL) || + is_mbpf_classic_store_pkt(meta) || + is_mbpf_classic_load(meta)) { + cache_avail = false; + continue; + } + + if (!is_mbpf_load(meta)) + continue; + + if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) { + cache_avail = false; + continue; + } + + if (!cache_avail) { + cache_avail = true; + if (range_node) + goto end_current_then_start_new; + goto start_new; + } + + /* Check ID to make sure two reads share the same + * variable offset against PTR_TO_PACKET, and check OFF + * to make sure they also share the same constant + * offset. + * + * OFFs don't really need to be the same, because they + * are the constant offsets against PTR_TO_PACKET, so + * for different OFFs, we could canonicalize them to + * offsets against original packet pointer. We don't + * support this. + */ + if (meta->ptr.id == range_ptr_id && + meta->ptr.off == range_ptr_off) { + s16 new_start = range_start; + s16 end, off = insn->off; + s16 new_end = range_end; + bool changed = false; + + if (off < range_start) { + new_start = off; + changed = true; + } + + end = off + BPF_LDST_BYTES(insn); + if (end > range_end) { + new_end = end; + changed = true; + } + + if (!changed) + continue; + + if (new_end - new_start <= 64) { + /* Install new range. */ + range_start = new_start; + range_end = new_end; + continue; + } + } + +end_current_then_start_new: + range_node->pkt_cache.range_start = range_start; + range_node->pkt_cache.range_end = range_end; +start_new: + range_node = meta; + range_node->pkt_cache.do_init = true; + range_ptr_id = range_node->ptr.id; + range_ptr_off = range_node->ptr.off; + range_start = insn->off; + range_end = insn->off + BPF_LDST_BYTES(insn); + } + + if (range_node) { + range_node->pkt_cache.range_start = range_start; + range_node->pkt_cache.range_end = range_end; + } + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + + if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { + if (meta->pkt_cache.do_init) { + range_start = meta->pkt_cache.range_start; + range_end = meta->pkt_cache.range_end; + } else { + meta->pkt_cache.range_start = range_start; + meta->pkt_cache.range_end = range_end; + } + } + } +} + +static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) +{ + nfp_bpf_opt_reg_init(nfp_prog); + + nfp_bpf_opt_neg_add_sub(nfp_prog); + nfp_bpf_opt_ld_mask(nfp_prog); + nfp_bpf_opt_ld_shift(nfp_prog); + nfp_bpf_opt_ldst_gather(nfp_prog); + nfp_bpf_opt_pkt_cache(nfp_prog); + + return 0; +} + +static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + struct nfp_bpf_map *nfp_map; + struct bpf_map *map; + u32 id; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + if (meta1->skip || meta2->skip) + continue; + + if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || + meta1->insn.src_reg != BPF_PSEUDO_MAP_FD) + continue; + + map = (void *)(unsigned long)((u32)meta1->insn.imm | + (u64)meta2->insn.imm << 32); + if (bpf_map_offload_neutral(map)) { + id = map->id; + } else { + nfp_map = map_to_offmap(map)->dev_priv; + id = nfp_map->tid; + } + + meta1->insn.imm = id; + meta2->insn.imm = 0; + } + + return 0; +} + +static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) +{ + __le64 *ustore = (__force __le64 *)prog; + int i; + + for (i = 0; i < len; i++) { + int err; + + err = nfp_ustore_check_valid_no_ecc(prog[i]); + if (err) + return err; + + ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); + } + + return 0; +} + +static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) +{ + void *prog; + + prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); + if (!prog) + return; + + nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); + memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); + kvfree(nfp_prog->prog); + nfp_prog->prog = prog; +} + +int nfp_bpf_jit(struct nfp_prog *nfp_prog) +{ + int ret; + + ret = nfp_bpf_replace_map_ptrs(nfp_prog); + if (ret) + return ret; + + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + return ret; + + ret = nfp_translate(nfp_prog); + if (ret) { + pr_err("Translation failed with error %d (translated: %u)\n", + ret, nfp_prog->n_translated); + return -EINVAL; + } + + nfp_bpf_prog_trim(nfp_prog); + + return ret; +} + +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) +{ + struct nfp_insn_meta *meta; + + /* Another pass to record jump information. */ + list_for_each_entry(meta, &nfp_prog->insns, l) { + u64 code = meta->insn.code; + + if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && + BPF_OP(code) != BPF_CALL) { + struct nfp_insn_meta *dst_meta; + unsigned short dst_indx; + + dst_indx = meta->n + 1 + meta->insn.off; + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, + cnt); + + meta->jmp_dst = dst_meta; + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; + } + } +} + +bool nfp_bpf_supported_opcode(u8 code) +{ + return !!instr_cb[code]; +} + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) +{ + unsigned int i; + u64 *prog; + int err; + + prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), + GFP_KERNEL); + if (!prog) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nfp_prog->prog_len; i++) { + enum nfp_relo_type special; + u32 val; + + special = FIELD_GET(OP_RELO_TYPE, prog[i]); + switch (special) { + case RELO_NONE: + continue; + case RELO_BR_REL: + br_add_offset(&prog[i], bv->start_off); + break; + case RELO_BR_GO_OUT: + br_set_offset(&prog[i], + nfp_prog->tgt_out + bv->start_off); + break; + case RELO_BR_GO_ABORT: + br_set_offset(&prog[i], + nfp_prog->tgt_abort + bv->start_off); + break; + case RELO_BR_NEXT_PKT: + br_set_offset(&prog[i], bv->tgt_done); + break; + case RELO_BR_HELPER: + val = br_get_offset(prog[i]); + val -= BR_OFF_RELO; + switch (val) { + case BPF_FUNC_map_lookup_elem: + val = nfp_prog->bpf->helpers.map_lookup; + break; + case BPF_FUNC_map_update_elem: + val = nfp_prog->bpf->helpers.map_update; + break; + case BPF_FUNC_map_delete_elem: + val = nfp_prog->bpf->helpers.map_delete; + break; + case BPF_FUNC_perf_event_output: + val = nfp_prog->bpf->helpers.perf_event_output; + break; + default: + pr_err("relocation of unknown helper %d\n", + val); + err = -EINVAL; + goto err_free_prog; + } + br_set_offset(&prog[i], val); + break; + case RELO_IMMED_REL: + immed_add_value(&prog[i], bv->start_off); + break; + } + + prog[i] &= ~OP_RELO_TYPE; + } + + err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); + if (err) + goto err_free_prog; + + return prog; + +err_free_prog: + kfree(prog); + return ERR_PTR(err); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c new file mode 100644 index 000000000..970af07f4 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -0,0 +1,511 @@ +/* + * Copyright (C) 2017-2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/pkt_cls.h> + +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nffw.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_port.h" +#include "fw.h" +#include "main.h" + +const struct rhashtable_params nfp_bpf_maps_neutral_params = { + .nelem_hint = 4, + .key_len = FIELD_SIZEOF(struct bpf_map, id), + .key_offset = offsetof(struct nfp_bpf_neutral_map, map_id), + .head_offset = offsetof(struct nfp_bpf_neutral_map, l), + .automatic_shrinking = true, +}; + +static bool nfp_net_ebpf_capable(struct nfp_net *nn) +{ +#ifdef __LITTLE_ENDIAN + if (nn->cap & NFP_NET_CFG_CTRL_BPF && + nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) + return true; +#endif + return false; +} + +static int +nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog, struct netlink_ext_ack *extack) +{ + bool running, xdp_running; + + if (!nfp_net_ebpf_capable(nn)) + return -EINVAL; + + running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + xdp_running = running && nn->xdp_hw.prog; + + if (!prog && !xdp_running) + return 0; + if (prog && running && !xdp_running) + return -EBUSY; + + return nfp_net_bpf_offload(nn, prog, running, extack); +} + +static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) +{ + return nfp_net_ebpf_capable(nn) ? "BPF" : ""; +} + +static int +nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) +{ + struct nfp_pf *pf = app->pf; + struct nfp_bpf_vnic *bv; + int err; + + if (!pf->eth_tbl) { + nfp_err(pf->cpp, "No ETH table\n"); + return -EINVAL; + } + if (pf->max_data_vnics != pf->eth_tbl->count) { + nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n", + pf->max_data_vnics, pf->eth_tbl->count); + return -EINVAL; + } + + bv = kzalloc(sizeof(*bv), GFP_KERNEL); + if (!bv) + return -ENOMEM; + nn->app_priv = bv; + + err = nfp_app_nic_vnic_alloc(app, nn, id); + if (err) + goto err_free_priv; + + bv->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + bv->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + return 0; +err_free_priv: + kfree(nn->app_priv); + return err; +} + +static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_bpf_vnic *bv = nn->app_priv; + + WARN_ON(bv->tc_prog); + kfree(bv); +} + +static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct tc_cls_bpf_offload *cls_bpf = type_data; + struct nfp_net *nn = cb_priv; + struct bpf_prog *oldprog; + struct nfp_bpf_vnic *bv; + int err; + + if (type != TC_SETUP_CLSBPF) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only offload of BPF classifiers supported"); + return -EOPNOTSUPP; + } + if (!tc_cls_can_offload_and_chain0(nn->dp.netdev, &cls_bpf->common)) + return -EOPNOTSUPP; + if (!nfp_net_ebpf_capable(nn)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "NFP firmware does not support eBPF offload"); + return -EOPNOTSUPP; + } + if (cls_bpf->common.protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only ETH_P_ALL supported as filter protocol"); + return -EOPNOTSUPP; + } + + /* Only support TC direct action */ + if (!cls_bpf->exts_integrated || + tcf_exts_has_actions(cls_bpf->exts)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only direct action with no legacy actions supported"); + return -EOPNOTSUPP; + } + + if (cls_bpf->command != TC_CLSBPF_OFFLOAD) + return -EOPNOTSUPP; + + bv = nn->app_priv; + oldprog = cls_bpf->oldprog; + + /* Don't remove if oldprog doesn't match driver's state */ + if (bv->tc_prog != oldprog) { + oldprog = NULL; + if (!cls_bpf->prog) + return 0; + } + + err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog, + cls_bpf->common.extack); + if (err) + return err; + + bv->tc_prog = cls_bpf->prog; + nn->port->tc_offload_cnt = !!bv->tc_prog; + return 0; +} + +static int nfp_bpf_setup_tc_block(struct net_device *netdev, + struct tc_block_offload *f) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, + nfp_bpf_setup_tc_block_cb, + nn, nn, f->extack); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + nfp_bpf_setup_tc_block_cb, + nn); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return nfp_bpf_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int +nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int max_mtu; + + if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return 0; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (new_mtu > max_mtu) { + nn_info(nn, "BPF offload active, MTU over %u not supported\n", + max_mtu); + return -EBUSY; + } + return 0; +} + +static int +nfp_bpf_parse_cap_adjust_head(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + struct nfp_bpf_cap_tlv_adjust_head __iomem *cap = value; + struct nfp_cpp *cpp = bpf->app->pf->cpp; + + if (length < sizeof(*cap)) { + nfp_err(cpp, "truncated adjust_head TLV: %d\n", length); + return -EINVAL; + } + + bpf->adjust_head.flags = readl(&cap->flags); + bpf->adjust_head.off_min = readl(&cap->off_min); + bpf->adjust_head.off_max = readl(&cap->off_max); + bpf->adjust_head.guaranteed_sub = readl(&cap->guaranteed_sub); + bpf->adjust_head.guaranteed_add = readl(&cap->guaranteed_add); + + if (bpf->adjust_head.off_min > bpf->adjust_head.off_max) { + nfp_err(cpp, "invalid adjust_head TLV: min > max\n"); + return -EINVAL; + } + if (!FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_min) || + !FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_max)) { + nfp_warn(cpp, "disabling adjust_head - driver expects min/max to fit in as immediates\n"); + memset(&bpf->adjust_head, 0, sizeof(bpf->adjust_head)); + return 0; + } + + return 0; +} + +static int +nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) +{ + struct nfp_bpf_cap_tlv_func __iomem *cap = value; + + if (length < sizeof(*cap)) { + nfp_err(bpf->app->cpp, "truncated function TLV: %d\n", length); + return -EINVAL; + } + + switch (readl(&cap->func_id)) { + case BPF_FUNC_map_lookup_elem: + bpf->helpers.map_lookup = readl(&cap->func_addr); + break; + case BPF_FUNC_map_update_elem: + bpf->helpers.map_update = readl(&cap->func_addr); + break; + case BPF_FUNC_map_delete_elem: + bpf->helpers.map_delete = readl(&cap->func_addr); + break; + case BPF_FUNC_perf_event_output: + bpf->helpers.perf_event_output = readl(&cap->func_addr); + break; + } + + return 0; +} + +static int +nfp_bpf_parse_cap_maps(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) +{ + struct nfp_bpf_cap_tlv_maps __iomem *cap = value; + + if (length < sizeof(*cap)) { + nfp_err(bpf->app->cpp, "truncated maps TLV: %d\n", length); + return -EINVAL; + } + + bpf->maps.types = readl(&cap->types); + bpf->maps.max_maps = readl(&cap->max_maps); + bpf->maps.max_elems = readl(&cap->max_elems); + bpf->maps.max_key_sz = readl(&cap->max_key_sz); + bpf->maps.max_val_sz = readl(&cap->max_val_sz); + bpf->maps.max_elem_sz = readl(&cap->max_elem_sz); + + return 0; +} + +static int +nfp_bpf_parse_cap_random(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->pseudo_random = true; + return 0; +} + +static int +nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) +{ + bpf->queue_select = true; + return 0; +} + +static int +nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->adjust_tail = true; + return 0; +} + +static int nfp_bpf_parse_capabilities(struct nfp_app *app) +{ + struct nfp_cpp *cpp = app->pf->cpp; + struct nfp_cpp_area *area; + u8 __iomem *mem, *start; + + mem = nfp_rtsym_map(app->pf->rtbl, "_abi_bpf_capabilities", "bpf.cap", + 8, &area); + if (IS_ERR(mem)) + return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem); + + start = mem; + while (mem - start + 8 <= nfp_cpp_area_size(area)) { + u8 __iomem *value; + u32 type, length; + + type = readl(mem); + length = readl(mem + 4); + value = mem + 8; + + mem += 8 + length; + if (mem - start > nfp_cpp_area_size(area)) + goto err_release_free; + + switch (type) { + case NFP_BPF_CAP_TYPE_FUNC: + if (nfp_bpf_parse_cap_func(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_ADJUST_HEAD: + if (nfp_bpf_parse_cap_adjust_head(app->priv, value, + length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_MAPS: + if (nfp_bpf_parse_cap_maps(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_RANDOM: + if (nfp_bpf_parse_cap_random(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_QUEUE_SELECT: + if (nfp_bpf_parse_cap_qsel(app->priv, value, length)) + goto err_release_free; + break; + case NFP_BPF_CAP_TYPE_ADJUST_TAIL: + if (nfp_bpf_parse_cap_adjust_tail(app->priv, value, + length)) + goto err_release_free; + break; + default: + nfp_dbg(cpp, "unknown BPF capability: %d\n", type); + break; + } + } + if (mem - start != nfp_cpp_area_size(area)) { + nfp_err(cpp, "BPF capabilities left after parsing, parsed:%zd total length:%zu\n", + mem - start, nfp_cpp_area_size(area)); + goto err_release_free; + } + + nfp_cpp_area_release_free(area); + + return 0; + +err_release_free: + nfp_err(cpp, "invalid BPF capabilities at offset:%zd\n", mem - start); + nfp_cpp_area_release_free(area); + return -EINVAL; +} + +static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_app_bpf *bpf = app->priv; + + return bpf_offload_dev_netdev_register(bpf->bpf_dev, netdev); +} + +static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_app_bpf *bpf = app->priv; + + bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev); +} + +static int nfp_bpf_init(struct nfp_app *app) +{ + struct nfp_app_bpf *bpf; + int err; + + bpf = kzalloc(sizeof(*bpf), GFP_KERNEL); + if (!bpf) + return -ENOMEM; + bpf->app = app; + app->priv = bpf; + + skb_queue_head_init(&bpf->cmsg_replies); + init_waitqueue_head(&bpf->cmsg_wq); + INIT_LIST_HEAD(&bpf->map_list); + + err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params); + if (err) + goto err_free_bpf; + + err = nfp_bpf_parse_capabilities(app); + if (err) + goto err_free_neutral_maps; + + bpf->bpf_dev = bpf_offload_dev_create(); + err = PTR_ERR_OR_ZERO(bpf->bpf_dev); + if (err) + goto err_free_neutral_maps; + + return 0; + +err_free_neutral_maps: + rhashtable_destroy(&bpf->maps_neutral); +err_free_bpf: + kfree(bpf); + return err; +} + +static void nfp_check_rhashtable_empty(void *ptr, void *arg) +{ + WARN_ON_ONCE(1); +} + +static void nfp_bpf_clean(struct nfp_app *app) +{ + struct nfp_app_bpf *bpf = app->priv; + + bpf_offload_dev_destroy(bpf->bpf_dev); + WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); + WARN_ON(!list_empty(&bpf->map_list)); + WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); + rhashtable_free_and_destroy(&bpf->maps_neutral, + nfp_check_rhashtable_empty, NULL); + kfree(bpf); +} + +const struct nfp_app_type app_bpf = { + .id = NFP_APP_BPF_NIC, + .name = "ebpf", + + .ctrl_cap_mask = 0, + + .init = nfp_bpf_init, + .clean = nfp_bpf_clean, + + .check_mtu = nfp_bpf_check_mtu, + + .extra_cap = nfp_bpf_extra_cap, + + .ndo_init = nfp_bpf_ndo_init, + .ndo_uninit = nfp_bpf_ndo_uninit, + + .vnic_alloc = nfp_bpf_vnic_alloc, + .vnic_free = nfp_bpf_vnic_free, + + .ctrl_msg_rx = nfp_bpf_ctrl_msg_rx, + .ctrl_msg_rx_raw = nfp_bpf_ctrl_msg_rx_raw, + + .setup_tc = nfp_bpf_setup_tc, + .bpf = nfp_ndo_bpf, + .xdp_offload = nfp_bpf_xdp_offload, +}; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h new file mode 100644 index 000000000..2134045e1 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -0,0 +1,521 @@ +/* + * Copyright (C) 2016-2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_BPF_H__ +#define __NFP_BPF_H__ 1 + +#include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/bpf_verifier.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/rhashtable.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/wait.h> + +#include "../nfp_asm.h" +#include "fw.h" + +#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) + +/* For relocation logic use up-most byte of branch instruction as scratch + * area. Remember to clear this before sending instructions to HW! + */ +#define OP_RELO_TYPE 0xff00000000000000ULL + +enum nfp_relo_type { + RELO_NONE = 0, + /* standard internal jumps */ + RELO_BR_REL, + /* internal jumps to parts of the outro */ + RELO_BR_GO_OUT, + RELO_BR_GO_ABORT, + /* external jumps to fixed addresses */ + RELO_BR_NEXT_PKT, + RELO_BR_HELPER, + /* immediate relocation against load address */ + RELO_IMMED_REL, +}; + +/* To make absolute relocated branches (branches other than RELO_BR_REL) + * distinguishable in user space dumps from normal jumps, add a large offset + * to them. + */ +#define BR_OFF_RELO 15000 + +enum static_regs { + STATIC_REG_IMMA = 20, /* Bank AB */ + STATIC_REG_IMM = 21, /* Bank AB */ + STATIC_REG_STACK = 22, /* Bank A */ + STATIC_REG_PKT_LEN = 22, /* Bank B */ +}; + +enum pkt_vec { + PKT_VEC_PKT_LEN = 0, + PKT_VEC_PKT_PTR = 2, + PKT_VEC_QSEL_SET = 4, + PKT_VEC_QSEL_VAL = 6, +}; + +#define PKT_VEL_QSEL_SET_BIT 4 + +#define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) +#define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) +#define pv_qsel_set(np) reg_lm(1, PKT_VEC_QSEL_SET) +#define pv_qsel_val(np) reg_lm(1, PKT_VEC_QSEL_VAL) + +#define stack_reg(np) reg_a(STATIC_REG_STACK) +#define stack_imm(np) imm_b(np) +#define plen_reg(np) reg_b(STATIC_REG_PKT_LEN) +#define pptr_reg(np) pv_ctm_ptr(np) +#define imm_a(np) reg_a(STATIC_REG_IMM) +#define imm_b(np) reg_b(STATIC_REG_IMM) +#define imma_a(np) reg_a(STATIC_REG_IMMA) +#define imma_b(np) reg_b(STATIC_REG_IMMA) +#define imm_both(np) reg_both(STATIC_REG_IMM) + +#define NFP_BPF_ABI_FLAGS reg_imm(0) +#define NFP_BPF_ABI_FLAG_MARK 1 + +/** + * struct nfp_app_bpf - bpf app priv structure + * @app: backpointer to the app + * + * @bpf_dev: BPF offload device handle + * + * @tag_allocator: bitmap of control message tags in use + * @tag_alloc_next: next tag bit to allocate + * @tag_alloc_last: next tag bit to be freed + * + * @cmsg_replies: received cmsg replies waiting to be consumed + * @cmsg_wq: work queue for waiting for cmsg replies + * + * @map_list: list of offloaded maps + * @maps_in_use: number of currently offloaded maps + * @map_elems_in_use: number of elements allocated to offloaded maps + * + * @maps_neutral: hash table of offload-neutral maps (on pointer) + * + * @adjust_head: adjust head capability + * @adjust_head.flags: extra flags for adjust head + * @adjust_head.off_min: minimal packet offset within buffer required + * @adjust_head.off_max: maximum packet offset within buffer required + * @adjust_head.guaranteed_sub: negative adjustment guaranteed possible + * @adjust_head.guaranteed_add: positive adjustment guaranteed possible + * + * @maps: map capability + * @maps.types: supported map types + * @maps.max_maps: max number of maps supported + * @maps.max_elems: max number of entries in each map + * @maps.max_key_sz: max size of map key + * @maps.max_val_sz: max size of map value + * @maps.max_elem_sz: max size of map entry (key + value) + * + * @helpers: helper addressess for various calls + * @helpers.map_lookup: map lookup helper address + * @helpers.map_update: map update helper address + * @helpers.map_delete: map delete helper address + * @helpers.perf_event_output: output perf event to a ring buffer + * + * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) + * @queue_select: BPF can set the RX queue ID in packet vector + * @adjust_tail: BPF can simply trunc packet size for adjust tail + */ +struct nfp_app_bpf { + struct nfp_app *app; + + struct bpf_offload_dev *bpf_dev; + + DECLARE_BITMAP(tag_allocator, U16_MAX + 1); + u16 tag_alloc_next; + u16 tag_alloc_last; + + struct sk_buff_head cmsg_replies; + struct wait_queue_head cmsg_wq; + + struct list_head map_list; + unsigned int maps_in_use; + unsigned int map_elems_in_use; + + struct rhashtable maps_neutral; + + struct nfp_bpf_cap_adjust_head { + u32 flags; + int off_min; + int off_max; + int guaranteed_sub; + int guaranteed_add; + } adjust_head; + + struct { + u32 types; + u32 max_maps; + u32 max_elems; + u32 max_key_sz; + u32 max_val_sz; + u32 max_elem_sz; + } maps; + + struct { + u32 map_lookup; + u32 map_update; + u32 map_delete; + u32 perf_event_output; + } helpers; + + bool pseudo_random; + bool queue_select; + bool adjust_tail; +}; + +enum nfp_bpf_map_use { + NFP_MAP_UNUSED = 0, + NFP_MAP_USE_READ, + NFP_MAP_USE_WRITE, + NFP_MAP_USE_ATOMIC_CNT, +}; + +struct nfp_bpf_map_word { + unsigned char type :4; + unsigned char non_zero_update :1; +}; + +/** + * struct nfp_bpf_map - private per-map data attached to BPF maps for offload + * @offmap: pointer to the offloaded BPF map + * @bpf: back pointer to bpf app private structure + * @tid: table id identifying map on datapath + * @l: link on the nfp_app_bpf->map_list list + * @use_map: map of how the value is used (in 4B chunks) + */ +struct nfp_bpf_map { + struct bpf_offloaded_map *offmap; + struct nfp_app_bpf *bpf; + u32 tid; + struct list_head l; + struct nfp_bpf_map_word use_map[]; +}; + +struct nfp_bpf_neutral_map { + struct rhash_head l; + struct bpf_map *ptr; + u32 map_id; + u32 count; +}; + +extern const struct rhashtable_params nfp_bpf_maps_neutral_params; + +struct nfp_prog; +struct nfp_insn_meta; +typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); + +#define nfp_prog_first_meta(nfp_prog) \ + list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_prog_last_meta(nfp_prog) \ + list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_meta_next(meta) list_next_entry(meta, l) +#define nfp_meta_prev(meta) list_prev_entry(meta, l) + +/** + * struct nfp_bpf_reg_state - register state for calls + * @reg: BPF register state from latest path + * @var_off: for stack arg - changes stack offset on different paths + */ +struct nfp_bpf_reg_state { + struct bpf_reg_state reg; + bool var_off; +}; + +#define FLAG_INSN_IS_JUMP_DST BIT(0) + +/** + * struct nfp_insn_meta - BPF instruction wrapper + * @insn: BPF instruction + * @ptr: pointer type for memory operations + * @ldst_gather_len: memcpy length gathered from load/store sequence + * @paired_st: the paired store insn at the head of the sequence + * @ptr_not_const: pointer is not always constant + * @pkt_cache: packet data cache information + * @pkt_cache.range_start: start offset for associated packet data cache + * @pkt_cache.range_end: end offset for associated packet data cache + * @pkt_cache.do_init: this read needs to initialize packet data cache + * @xadd_over_16bit: 16bit immediate is not guaranteed + * @xadd_maybe_16bit: 16bit immediate is possible + * @jmp_dst: destination info for jump instructions + * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB + * @func_id: function id for call instructions + * @arg1: arg1 for call instructions + * @arg2: arg2 for call instructions + * @umin_src: copy of core verifier umin_value for src opearnd. + * @umax_src: copy of core verifier umax_value for src operand. + * @umin_dst: copy of core verifier umin_value for dst opearnd. + * @umax_dst: copy of core verifier umax_value for dst operand. + * @off: index of first generated machine instruction (in nfp_prog.prog) + * @n: eBPF instruction number + * @flags: eBPF instruction extra optimization flags + * @skip: skip this instruction (optimized out) + * @double_cb: callback for second part of the instruction + * @l: link on nfp_prog->insns list + */ +struct nfp_insn_meta { + struct bpf_insn insn; + union { + /* pointer ops (ld/st/xadd) */ + struct { + struct bpf_reg_state ptr; + struct bpf_insn *paired_st; + s16 ldst_gather_len; + bool ptr_not_const; + struct { + s16 range_start; + s16 range_end; + bool do_init; + } pkt_cache; + bool xadd_over_16bit; + bool xadd_maybe_16bit; + }; + /* jump */ + struct { + struct nfp_insn_meta *jmp_dst; + bool jump_neg_op; + }; + /* function calls */ + struct { + u32 func_id; + struct bpf_reg_state arg1; + struct nfp_bpf_reg_state arg2; + }; + /* We are interested in range info for operands of ALU + * operations. For example, shift amount, multiplicand and + * multiplier etc. + */ + struct { + u64 umin_src; + u64 umax_src; + u64 umin_dst; + u64 umax_dst; + }; + }; + unsigned int off; + unsigned short n; + unsigned short flags; + bool skip; + instr_cb_t double_cb; + + struct list_head l; +}; + +#define BPF_SIZE_MASK 0x18 + +static inline u8 mbpf_class(const struct nfp_insn_meta *meta) +{ + return BPF_CLASS(meta->insn.code); +} + +static inline u8 mbpf_src(const struct nfp_insn_meta *meta) +{ + return BPF_SRC(meta->insn.code); +} + +static inline u8 mbpf_op(const struct nfp_insn_meta *meta) +{ + return BPF_OP(meta->insn.code); +} + +static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) +{ + return BPF_MODE(meta->insn.code); +} + +static inline bool is_mbpf_alu(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_ALU64 || mbpf_class(meta) == BPF_ALU; +} + +static inline bool is_mbpf_load(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM); +} + +static inline bool is_mbpf_store(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM); +} + +static inline bool is_mbpf_load_pkt(const struct nfp_insn_meta *meta) +{ + return is_mbpf_load(meta) && meta->ptr.type == PTR_TO_PACKET; +} + +static inline bool is_mbpf_store_pkt(const struct nfp_insn_meta *meta) +{ + return is_mbpf_store(meta) && meta->ptr.type == PTR_TO_PACKET; +} + +static inline bool is_mbpf_classic_load(const struct nfp_insn_meta *meta) +{ + u8 code = meta->insn.code; + + return BPF_CLASS(code) == BPF_LD && + (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND); +} + +static inline bool is_mbpf_classic_store(const struct nfp_insn_meta *meta) +{ + u8 code = meta->insn.code; + + return BPF_CLASS(code) == BPF_ST && BPF_MODE(code) == BPF_MEM; +} + +static inline bool is_mbpf_classic_store_pkt(const struct nfp_insn_meta *meta) +{ + return is_mbpf_classic_store(meta) && meta->ptr.type == PTR_TO_PACKET; +} + +static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); +} + +static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta) +{ + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL; +} + +static inline bool is_mbpf_div(const struct nfp_insn_meta *meta) +{ + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV; +} + +/** + * struct nfp_prog - nfp BPF program + * @bpf: backpointer to the bpf app priv structure + * @prog: machine code + * @prog_len: number of valid instructions in @prog array + * @__prog_alloc_len: alloc size of @prog array + * @verifier_meta: temporary storage for verifier's insn meta + * @type: BPF program type + * @last_bpf_off: address of the last instruction translated from BPF + * @tgt_out: jump target for normal exit + * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @n_translated: number of successfully translated instructions (for errors) + * @error: error code if something went wrong + * @stack_depth: max stack depth from the verifier + * @adjust_head_location: if program has single adjust head call - the insn no. + * @map_records_cnt: the number of map pointers recorded for this prog + * @map_records: the map record pointers from bpf->maps_neutral + * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) + */ +struct nfp_prog { + struct nfp_app_bpf *bpf; + + u64 *prog; + unsigned int prog_len; + unsigned int __prog_alloc_len; + + struct nfp_insn_meta *verifier_meta; + + enum bpf_prog_type type; + + unsigned int last_bpf_off; + unsigned int tgt_out; + unsigned int tgt_abort; + + unsigned int n_translated; + int error; + + unsigned int stack_depth; + unsigned int adjust_head_location; + + unsigned int map_records_cnt; + struct nfp_bpf_neutral_map **map_records; + + struct list_head insns; +}; + +/** + * struct nfp_bpf_vnic - per-vNIC BPF priv structure + * @tc_prog: currently loaded cls_bpf program + * @start_off: address of the first instruction in the memory + * @tgt_done: jump target to get the next packet + */ +struct nfp_bpf_vnic { + struct bpf_prog *tc_prog; + unsigned int start_off; + unsigned int tgt_done; +}; + +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); +int nfp_bpf_jit(struct nfp_prog *prog); +bool nfp_bpf_supported_opcode(u8 code); + +extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; + +struct netdev_bpf; +struct nfp_app; +struct nfp_net; + +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog, struct netlink_ext_ack *extack); + +struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns); + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); + +long long int +nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map); +void +nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map); +int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap, + void *next_key); +int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags); +int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key); +int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, + void *key, void *value); +int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, + void *key, void *next_key); + +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len); + +void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, + unsigned int len); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c new file mode 100644 index 000000000..6140e4650 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -0,0 +1,627 @@ +/* + * Copyright (C) 2016-2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_offload.c + * Netronome network device driver: TC offload functions for PF and VF + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include <linux/bpf.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <linux/list.h> +#include <linux/mm.h> + +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> + +#include "main.h" +#include "../nfp_app.h" +#include "../nfp_net_ctrl.h" +#include "../nfp_net.h" + +static int +nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct bpf_map *map) +{ + struct nfp_bpf_neutral_map *record; + int err; + + /* Map record paths are entered via ndo, update side is protected. */ + ASSERT_RTNL(); + + /* Reuse path - other offloaded program is already tracking this map. */ + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id, + nfp_bpf_maps_neutral_params); + if (record) { + nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; + record->count++; + return 0; + } + + /* Grab a single ref to the map for our record. The prog destroy ndo + * happens after free_used_maps(). + */ + map = bpf_map_inc(map, false); + if (IS_ERR(map)) + return PTR_ERR(map); + + record = kmalloc(sizeof(*record), GFP_KERNEL); + if (!record) { + err = -ENOMEM; + goto err_map_put; + } + + record->ptr = map; + record->map_id = map->id; + record->count = 1; + + err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, + nfp_bpf_maps_neutral_params); + if (err) + goto err_free_rec; + + nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; + + return 0; + +err_free_rec: + kfree(record); +err_map_put: + bpf_map_put(map); + return err; +} + +static void +nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog) +{ + bool freed = false; + int i; + + ASSERT_RTNL(); + + for (i = 0; i < nfp_prog->map_records_cnt; i++) { + if (--nfp_prog->map_records[i]->count) { + nfp_prog->map_records[i] = NULL; + continue; + } + + WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral, + &nfp_prog->map_records[i]->l, + nfp_bpf_maps_neutral_params)); + freed = true; + } + + if (freed) { + synchronize_rcu(); + + for (i = 0; i < nfp_prog->map_records_cnt; i++) + if (nfp_prog->map_records[i]) { + bpf_map_put(nfp_prog->map_records[i]->ptr); + kfree(nfp_prog->map_records[i]); + } + } + + kfree(nfp_prog->map_records); + nfp_prog->map_records = NULL; + nfp_prog->map_records_cnt = 0; +} + +static int +nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct bpf_prog *prog) +{ + int i, cnt, err; + + /* Quickly count the maps we will have to remember */ + cnt = 0; + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (bpf_map_offload_neutral(prog->aux->used_maps[i])) + cnt++; + if (!cnt) + return 0; + + nfp_prog->map_records = kmalloc_array(cnt, + sizeof(nfp_prog->map_records[0]), + GFP_KERNEL); + if (!nfp_prog->map_records) + return -ENOMEM; + + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (bpf_map_offload_neutral(prog->aux->used_maps[i])) { + err = nfp_map_ptr_record(bpf, nfp_prog, + prog->aux->used_maps[i]); + if (err) { + nfp_map_ptrs_forget(bpf, nfp_prog); + return err; + } + } + WARN_ON(cnt != nfp_prog->map_records_cnt); + + return 0; +} + +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + struct nfp_insn_meta *meta; + unsigned int i; + + for (i = 0; i < cnt; i++) { + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + if (is_mbpf_alu(meta)) { + meta->umin_src = U64_MAX; + meta->umin_dst = U64_MAX; + } + + list_add_tail(&meta->l, &nfp_prog->insns); + } + + nfp_bpf_jit_prepare(nfp_prog, cnt); + + return 0; +} + +static void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + +static int +nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->verifier.prog; + struct nfp_prog *nfp_prog; + int ret; + + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + prog->aux->offload->dev_priv = nfp_prog; + + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->type = prog->type; + nfp_prog->bpf = app->priv; + + ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len); + if (ret) + goto err_free; + + nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); + bpf->verifier.ops = &nfp_bpf_analyzer_ops; + + return 0; + +err_free: + nfp_prog_free(nfp_prog); + + return ret; +} + +static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int stack_size; + unsigned int max_instr; + int err; + + stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + if (prog->aux->stack_depth > stack_size) { + nn_info(nn, "stack too large: program %dB > FW stack %dB\n", + prog->aux->stack_depth, stack_size); + return -EOPNOTSUPP; + } + nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4); + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); + + nfp_prog->prog = kvmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + if (!nfp_prog->prog) + return -ENOMEM; + + err = nfp_bpf_jit(nfp_prog); + if (err) + return err; + + prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); + prog->aux->offload->jited_image = nfp_prog->prog; + + return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog); +} + +static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + + kvfree(nfp_prog->prog); + nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog); + nfp_prog_free(nfp_prog); + + return 0; +} + +/* Atomic engine requires values to be in big endian, we need to byte swap + * the value words used with xadd. + */ +static void nfp_map_bpf_byte_swap(struct nfp_bpf_map *nfp_map, void *value) +{ + u32 *word = value; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) + if (nfp_map->use_map[i].type == NFP_MAP_USE_ATOMIC_CNT) + word[i] = (__force u32)cpu_to_be32(word[i]); +} + +/* Mark value as unsafely initialized in case it becomes atomic later + * and we didn't byte swap something non-byte swap neutral. + */ +static void +nfp_map_bpf_byte_swap_record(struct nfp_bpf_map *nfp_map, void *value) +{ + u32 *word = value; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) + if (nfp_map->use_map[i].type == NFP_MAP_UNUSED && + word[i] != (__force u32)cpu_to_be32(word[i])) + nfp_map->use_map[i].non_zero_update = 1; +} + +static int +nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap, + void *key, void *value) +{ + int err; + + err = nfp_bpf_ctrl_lookup_entry(offmap, key, value); + if (err) + return err; + + nfp_map_bpf_byte_swap(offmap->dev_priv, value); + return 0; +} + +static int +nfp_bpf_map_update_entry(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags) +{ + nfp_map_bpf_byte_swap(offmap->dev_priv, value); + nfp_map_bpf_byte_swap_record(offmap->dev_priv, value); + return nfp_bpf_ctrl_update_entry(offmap, key, value, flags); +} + +static int +nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap, + void *key, void *next_key) +{ + if (!key) + return nfp_bpf_ctrl_getfirst_entry(offmap, next_key); + return nfp_bpf_ctrl_getnext_entry(offmap, key, next_key); +} + +static int +nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) +{ + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) + return -EINVAL; + return nfp_bpf_ctrl_del_entry(offmap, key); +} + +static const struct bpf_map_dev_ops nfp_bpf_map_ops = { + .map_get_next_key = nfp_bpf_map_get_next_key, + .map_lookup_elem = nfp_bpf_map_lookup_entry, + .map_update_elem = nfp_bpf_map_update_entry, + .map_delete_elem = nfp_bpf_map_delete_elem, +}; + +static int +nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) +{ + struct nfp_bpf_map *nfp_map; + unsigned int use_map_size; + long long int res; + + if (!bpf->maps.types) + return -EOPNOTSUPP; + + if (offmap->map.map_flags || + offmap->map.numa_node != NUMA_NO_NODE) { + pr_info("map flags are not supported\n"); + return -EINVAL; + } + + if (!(bpf->maps.types & 1 << offmap->map.map_type)) { + pr_info("map type not supported\n"); + return -EOPNOTSUPP; + } + if (bpf->maps.max_maps == bpf->maps_in_use) { + pr_info("too many maps for a device\n"); + return -ENOMEM; + } + if (bpf->maps.max_elems - bpf->map_elems_in_use < + offmap->map.max_entries) { + pr_info("map with too many elements: %u, left: %u\n", + offmap->map.max_entries, + bpf->maps.max_elems - bpf->map_elems_in_use); + return -ENOMEM; + } + + if (round_up(offmap->map.key_size, 8) + + round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) { + pr_info("map elements too large: %u, FW max element size (key+value): %u\n", + round_up(offmap->map.key_size, 8) + + round_up(offmap->map.value_size, 8), + bpf->maps.max_elem_sz); + return -ENOMEM; + } + if (offmap->map.key_size > bpf->maps.max_key_sz) { + pr_info("map key size %u, FW max is %u\n", + offmap->map.key_size, bpf->maps.max_key_sz); + return -ENOMEM; + } + if (offmap->map.value_size > bpf->maps.max_val_sz) { + pr_info("map value size %u, FW max is %u\n", + offmap->map.value_size, bpf->maps.max_val_sz); + return -ENOMEM; + } + + use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) * + FIELD_SIZEOF(struct nfp_bpf_map, use_map[0]); + + nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER); + if (!nfp_map) + return -ENOMEM; + + offmap->dev_priv = nfp_map; + nfp_map->offmap = offmap; + nfp_map->bpf = bpf; + + res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map); + if (res < 0) { + kfree(nfp_map); + return res; + } + + nfp_map->tid = res; + offmap->dev_ops = &nfp_bpf_map_ops; + bpf->maps_in_use++; + bpf->map_elems_in_use += offmap->map.max_entries; + list_add_tail(&nfp_map->l, &bpf->map_list); + + return 0; +} + +static int +nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) +{ + struct nfp_bpf_map *nfp_map = offmap->dev_priv; + + nfp_bpf_ctrl_free_map(bpf, nfp_map); + list_del_init(&nfp_map->l); + bpf->map_elems_in_use -= offmap->map.max_entries; + bpf->maps_in_use--; + kfree(nfp_map); + + return 0; +} + +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case BPF_OFFLOAD_VERIFIER_PREP: + return nfp_bpf_verifier_prep(app, nn, bpf); + case BPF_OFFLOAD_TRANSLATE: + return nfp_bpf_translate(nn, bpf->offload.prog); + case BPF_OFFLOAD_DESTROY: + return nfp_bpf_destroy(nn, bpf->offload.prog); + case BPF_OFFLOAD_MAP_ALLOC: + return nfp_bpf_map_alloc(app->priv, bpf->offmap); + case BPF_OFFLOAD_MAP_FREE: + return nfp_bpf_map_free(app->priv, bpf->offmap); + default: + return -EINVAL; + } +} + +static unsigned long +nfp_bpf_perf_event_copy(void *dst, const void *src, + unsigned long off, unsigned long len) +{ + memcpy(dst, src + off, len); + return 0; +} + +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len) +{ + struct cmsg_bpf_event *cbe = (void *)data; + struct nfp_bpf_neutral_map *record; + u32 pkt_size, data_size, map_id; + u64 map_id_full; + + if (len < sizeof(struct cmsg_bpf_event)) + return -EINVAL; + + pkt_size = be32_to_cpu(cbe->pkt_size); + data_size = be32_to_cpu(cbe->data_size); + map_id_full = be64_to_cpu(cbe->map_ptr); + map_id = map_id_full; + + if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) + return -EINVAL; + if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION) + return -EINVAL; + + rcu_read_lock(); + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id, + nfp_bpf_maps_neutral_params); + if (!record || map_id_full > U32_MAX) { + rcu_read_unlock(); + cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n", + map_id_full, map_id_full); + return -EINVAL; + } + + bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id), + &cbe->data[round_up(pkt_size, 4)], data_size, + cbe->data, pkt_size, nfp_bpf_perf_event_copy); + rcu_read_unlock(); + + return 0; +} + +static int +nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int max_mtu; + dma_addr_t dma_addr; + void *img; + int err; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (max_mtu < nn->dp.netdev->mtu) { + NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary"); + return -EOPNOTSUPP; + } + + img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv); + if (IS_ERR(img)) + return PTR_ERR(img); + + dma_addr = dma_map_single(nn->dp.dev, img, + nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + if (dma_mapping_error(nn->dp.dev, dma_addr)) { + kfree(img); + return -ENOMEM; + } + + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); + + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "FW command error while loading BPF"); + + dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + kfree(img); + + return err; +} + +static void +nfp_net_bpf_start(struct nfp_net *nn, struct netlink_ext_ack *extack) +{ + int err; + + /* Enable passing packets through BPF function */ + nn->dp.ctrl |= NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "FW command error while enabling BPF"); +} + +static int nfp_net_bpf_stop(struct nfp_net *nn) +{ + if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)) + return 0; + + nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); +} + +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog, struct netlink_ext_ack *extack) +{ + int err; + + if (prog && !bpf_offload_dev_match(prog, nn->dp.netdev)) + return -EINVAL; + + if (prog && old_prog) { + u8 cap; + + cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP); + if (!(cap & NFP_NET_BPF_CAP_RELO)) { + NL_SET_ERR_MSG_MOD(extack, + "FW does not support live reload"); + return -EBUSY; + } + } + + /* Something else is loaded, different program type? */ + if (!old_prog && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + + if (old_prog && !prog) + return nfp_net_bpf_stop(nn); + + err = nfp_net_bpf_load(nn, prog, extack); + if (err) + return err; + + if (!old_prog) + nfp_net_bpf_start(nn, extack); + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c new file mode 100644 index 000000000..db7e186da --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -0,0 +1,693 @@ +/* + * Copyright (C) 2016-2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bpf.h> +#include <linux/bpf_verifier.h> +#include <linux/kernel.h> +#include <linux/pkt_cls.h> + +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "fw.h" +#include "main.h" + +#define pr_vlog(env, fmt, ...) \ + bpf_verifier_log_write(env, "[nfp] " fmt, ##__VA_ARGS__) + +struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns) +{ + unsigned int forward, backward, i; + + backward = meta->n - insn_idx; + forward = insn_idx - meta->n; + + if (min(forward, backward) > n_insns - insn_idx - 1) { + backward = n_insns - insn_idx - 1; + meta = nfp_prog_last_meta(nfp_prog); + } + if (min(forward, backward) > insn_idx && backward > insn_idx) { + forward = insn_idx; + meta = nfp_prog_first_meta(nfp_prog); + } + + if (forward < backward) + for (i = 0; i < forward; i++) + meta = nfp_meta_next(meta); + else + for (i = 0; i < backward; i++) + meta = nfp_meta_prev(meta); + + return meta; +} + +static void +nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg2) +{ + unsigned int location = UINT_MAX; + int imm; + + /* Datapath usually can give us guarantees on how much adjust head + * can be done without the need for any checks. Optimize the simple + * case where there is only one adjust head by a constant. + */ + if (reg2->type != SCALAR_VALUE || !tnum_is_const(reg2->var_off)) + goto exit_set_location; + imm = reg2->var_off.value; + /* Translator will skip all checks, we need to guarantee min pkt len */ + if (imm > ETH_ZLEN - ETH_HLEN) + goto exit_set_location; + if (imm > (int)bpf->adjust_head.guaranteed_add || + imm < -bpf->adjust_head.guaranteed_sub) + goto exit_set_location; + + if (nfp_prog->adjust_head_location) { + /* Only one call per program allowed */ + if (nfp_prog->adjust_head_location != meta->n) + goto exit_set_location; + + if (meta->arg2.reg.var_off.value != imm) + goto exit_set_location; + } + + location = meta->n; +exit_set_location: + nfp_prog->adjust_head_location = location; +} + +static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; + const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3; + struct bpf_offloaded_map *offmap; + struct bpf_func_state *state; + struct nfp_bpf_map *nfp_map; + int off, i; + + state = env->cur_state->frame[reg3->frameno]; + + /* We need to record each time update happens with non-zero words, + * in case such word is used in atomic operations. + * Implicitly depend on nfp_bpf_stack_arg_ok(reg3) being run before. + */ + + offmap = map_to_offmap(reg1->map_ptr); + nfp_map = offmap->dev_priv; + off = reg3->off + reg3->var_off.value; + + for (i = 0; i < offmap->map.value_size; i++) { + struct bpf_stack_state *stack_entry; + unsigned int soff; + + soff = -(off + i) - 1; + stack_entry = &state->stack[soff / BPF_REG_SIZE]; + if (stack_entry->slot_type[soff % BPF_REG_SIZE] == STACK_ZERO) + continue; + + if (nfp_map->use_map[i / 4].type == NFP_MAP_USE_ATOMIC_CNT) { + pr_vlog(env, "value at offset %d/%d may be non-zero, bpf_map_update_elem() is required to initialize atomic counters to zero to avoid offload endian issues\n", + i, soff); + return false; + } + nfp_map->use_map[i / 4].non_zero_update = 1; + } + + return true; +} + +static int +nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + struct nfp_bpf_reg_state *old_arg) +{ + s64 off, old_off; + + if (reg->type != PTR_TO_STACK) { + pr_vlog(env, "%s: unsupported ptr type %d\n", + fname, reg->type); + return false; + } + if (!tnum_is_const(reg->var_off)) { + pr_vlog(env, "%s: variable pointer\n", fname); + return false; + } + + off = reg->var_off.value + reg->off; + if (-off % 4) { + pr_vlog(env, "%s: unaligned stack pointer %lld\n", fname, -off); + return false; + } + + /* Rest of the checks is only if we re-parse the same insn */ + if (!old_arg) + return true; + + old_off = old_arg->reg.var_off.value + old_arg->reg.off; + old_arg->var_off |= off != old_off; + + return true; +} + +static bool +nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env, + struct nfp_insn_meta *meta, + u32 helper_tgt, const struct bpf_reg_state *reg1) +{ + if (!helper_tgt) { + pr_vlog(env, "%s: not supported by FW\n", fname); + return false; + } + + return true; +} + +static int +nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, + struct nfp_insn_meta *meta) +{ + const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; + const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2; + const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3; + struct nfp_app_bpf *bpf = nfp_prog->bpf; + u32 func_id = meta->insn.imm; + + switch (func_id) { + case BPF_FUNC_xdp_adjust_head: + if (!bpf->adjust_head.off_max) { + pr_vlog(env, "adjust_head not supported by FW\n"); + return -EOPNOTSUPP; + } + if (!(bpf->adjust_head.flags & NFP_BPF_ADJUST_HEAD_NO_META)) { + pr_vlog(env, "adjust_head: FW requires shifting metadata, not supported by the driver\n"); + return -EOPNOTSUPP; + } + + nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); + break; + + case BPF_FUNC_xdp_adjust_tail: + if (!bpf->adjust_tail) { + pr_vlog(env, "adjust_tail not supported by FW\n"); + return -EOPNOTSUPP; + } + break; + + case BPF_FUNC_map_lookup_elem: + if (!nfp_bpf_map_call_ok("map_lookup", env, meta, + bpf->helpers.map_lookup, reg1) || + !nfp_bpf_stack_arg_ok("map_lookup", env, reg2, + meta->func_id ? &meta->arg2 : NULL)) + return -EOPNOTSUPP; + break; + + case BPF_FUNC_map_update_elem: + if (!nfp_bpf_map_call_ok("map_update", env, meta, + bpf->helpers.map_update, reg1) || + !nfp_bpf_stack_arg_ok("map_update", env, reg2, + meta->func_id ? &meta->arg2 : NULL) || + !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL) || + !nfp_bpf_map_update_value_ok(env)) + return -EOPNOTSUPP; + break; + + case BPF_FUNC_map_delete_elem: + if (!nfp_bpf_map_call_ok("map_delete", env, meta, + bpf->helpers.map_delete, reg1) || + !nfp_bpf_stack_arg_ok("map_delete", env, reg2, + meta->func_id ? &meta->arg2 : NULL)) + return -EOPNOTSUPP; + break; + + case BPF_FUNC_get_prandom_u32: + if (bpf->pseudo_random) + break; + pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n"); + return -EOPNOTSUPP; + + case BPF_FUNC_perf_event_output: + BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE || + NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE || + NFP_BPF_STACK != PTR_TO_STACK || + NFP_BPF_PACKET_DATA != PTR_TO_PACKET); + + if (!bpf->helpers.perf_event_output) { + pr_vlog(env, "event_output: not supported by FW\n"); + return -EOPNOTSUPP; + } + + /* Force current CPU to make sure we can report the event + * wherever we get the control message from FW. + */ + if (reg3->var_off.mask & BPF_F_INDEX_MASK || + (reg3->var_off.value & BPF_F_INDEX_MASK) != + BPF_F_CURRENT_CPU) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off); + pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n", + tn_buf); + return -EOPNOTSUPP; + } + + /* Save space in meta, we don't care about arguments other + * than 4th meta, shove it into arg1. + */ + reg1 = cur_regs(env) + BPF_REG_4; + + if (reg1->type != SCALAR_VALUE /* NULL ptr */ && + reg1->type != PTR_TO_STACK && + reg1->type != PTR_TO_MAP_VALUE && + reg1->type != PTR_TO_PACKET) { + pr_vlog(env, "event_output: unsupported ptr type: %d\n", + reg1->type); + return -EOPNOTSUPP; + } + + if (reg1->type == PTR_TO_STACK && + !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL)) + return -EOPNOTSUPP; + + /* Warn user that on offload NFP may return success even if map + * is not going to accept the event, since the event output is + * fully async and device won't know the state of the map. + * There is also FW limitation on the event length. + * + * Lost events will not show up on the perf ring, driver + * won't see them at all. Events may also get reordered. + */ + dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev, + "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n"); + pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n"); + + if (!meta->func_id) + break; + + if (reg1->type != meta->arg1.type) { + pr_vlog(env, "event_output: ptr type changed: %d %d\n", + meta->arg1.type, reg1->type); + return -EINVAL; + } + break; + + default: + pr_vlog(env, "unsupported function id: %d\n", func_id); + return -EOPNOTSUPP; + } + + meta->func_id = func_id; + meta->arg1 = *reg1; + meta->arg2.reg = *reg2; + + return 0; +} + +static int +nfp_bpf_check_exit(struct nfp_prog *nfp_prog, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; + u64 imm; + + if (nfp_prog->type == BPF_PROG_TYPE_XDP) + return 0; + + if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off); + pr_vlog(env, "unsupported exit state: %d, var_off: %s\n", + reg0->type, tn_buf); + return -EINVAL; + } + + imm = reg0->var_off.value; + if (nfp_prog->type == BPF_PROG_TYPE_SCHED_CLS && + imm <= TC_ACT_REDIRECT && + imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && + imm != TC_ACT_QUEUED) { + pr_vlog(env, "unsupported exit state: %d, imm: %llx\n", + reg0->type, imm); + return -EINVAL; + } + + return 0; +} + +static int +nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg, + struct bpf_verifier_env *env) +{ + s32 old_off, new_off; + + if (!tnum_is_const(reg->var_off)) { + pr_vlog(env, "variable ptr stack access\n"); + return -EINVAL; + } + + if (meta->ptr.type == NOT_INIT) + return 0; + + old_off = meta->ptr.off + meta->ptr.var_off.value; + new_off = reg->off + reg->var_off.value; + + meta->ptr_not_const |= old_off != new_off; + + if (!meta->ptr_not_const) + return 0; + + if (old_off % 4 == new_off % 4) + return 0; + + pr_vlog(env, "stack access changed location was:%d is:%d\n", + old_off, new_off); + return -EINVAL; +} + +static const char *nfp_bpf_map_use_name(enum nfp_bpf_map_use use) +{ + static const char * const names[] = { + [NFP_MAP_UNUSED] = "unused", + [NFP_MAP_USE_READ] = "read", + [NFP_MAP_USE_WRITE] = "write", + [NFP_MAP_USE_ATOMIC_CNT] = "atomic", + }; + + if (use >= ARRAY_SIZE(names) || !names[use]) + return "unknown"; + return names[use]; +} + +static int +nfp_bpf_map_mark_used_one(struct bpf_verifier_env *env, + struct nfp_bpf_map *nfp_map, + unsigned int off, enum nfp_bpf_map_use use) +{ + if (nfp_map->use_map[off / 4].type != NFP_MAP_UNUSED && + nfp_map->use_map[off / 4].type != use) { + pr_vlog(env, "map value use type conflict %s vs %s off: %u\n", + nfp_bpf_map_use_name(nfp_map->use_map[off / 4].type), + nfp_bpf_map_use_name(use), off); + return -EOPNOTSUPP; + } + + if (nfp_map->use_map[off / 4].non_zero_update && + use == NFP_MAP_USE_ATOMIC_CNT) { + pr_vlog(env, "atomic counter in map value may already be initialized to non-zero value off: %u\n", + off); + return -EOPNOTSUPP; + } + + nfp_map->use_map[off / 4].type = use; + + return 0; +} + +static int +nfp_bpf_map_mark_used(struct bpf_verifier_env *env, struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg, + enum nfp_bpf_map_use use) +{ + struct bpf_offloaded_map *offmap; + struct nfp_bpf_map *nfp_map; + unsigned int size, off; + int i, err; + + if (!tnum_is_const(reg->var_off)) { + pr_vlog(env, "map value offset is variable\n"); + return -EOPNOTSUPP; + } + + off = reg->var_off.value + meta->insn.off + reg->off; + size = BPF_LDST_BYTES(&meta->insn); + offmap = map_to_offmap(reg->map_ptr); + nfp_map = offmap->dev_priv; + + if (off + size > offmap->map.value_size) { + pr_vlog(env, "map value access out-of-bounds\n"); + return -EINVAL; + } + + for (i = 0; i < size; i += 4 - (off + i) % 4) { + err = nfp_bpf_map_mark_used_one(env, nfp_map, off + i, use); + if (err) + return err; + } + + return 0; +} + +static int +nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env, u8 reg_no) +{ + const struct bpf_reg_state *reg = cur_regs(env) + reg_no; + int err; + + if (reg->type != PTR_TO_CTX && + reg->type != PTR_TO_STACK && + reg->type != PTR_TO_MAP_VALUE && + reg->type != PTR_TO_PACKET) { + pr_vlog(env, "unsupported ptr type: %d\n", reg->type); + return -EINVAL; + } + + if (reg->type == PTR_TO_STACK) { + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg, env); + if (err) + return err; + } + + if (reg->type == PTR_TO_MAP_VALUE) { + if (is_mbpf_load(meta)) { + err = nfp_bpf_map_mark_used(env, meta, reg, + NFP_MAP_USE_READ); + if (err) + return err; + } + if (is_mbpf_store(meta)) { + pr_vlog(env, "map writes not supported\n"); + return -EOPNOTSUPP; + } + if (is_mbpf_xadd(meta)) { + err = nfp_bpf_map_mark_used(env, meta, reg, + NFP_MAP_USE_ATOMIC_CNT); + if (err) + return err; + } + } + + if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { + pr_vlog(env, "ptr type changed for instruction %d -> %d\n", + meta->ptr.type, reg->type); + return -EINVAL; + } + + meta->ptr = *reg; + + return 0; +} + +static int +nfp_bpf_check_store(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg = cur_regs(env) + meta->insn.dst_reg; + + if (reg->type == PTR_TO_CTX) { + if (nfp_prog->type == BPF_PROG_TYPE_XDP) { + /* XDP ctx accesses must be 4B in size */ + switch (meta->insn.off) { + case offsetof(struct xdp_md, rx_queue_index): + if (nfp_prog->bpf->queue_select) + goto exit_check_ptr; + pr_vlog(env, "queue selection not supported by FW\n"); + return -EOPNOTSUPP; + } + } + pr_vlog(env, "unsupported store to context field\n"); + return -EOPNOTSUPP; + } +exit_check_ptr: + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); +} + +static int +nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg; + const struct bpf_reg_state *dreg = cur_regs(env) + meta->insn.dst_reg; + + if (dreg->type != PTR_TO_MAP_VALUE) { + pr_vlog(env, "atomic add not to a map value pointer: %d\n", + dreg->type); + return -EOPNOTSUPP; + } + if (sreg->type != SCALAR_VALUE) { + pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type); + return -EOPNOTSUPP; + } + + meta->xadd_over_16bit |= + sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff; + meta->xadd_maybe_16bit |= + (sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff; + + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); +} + +static int +nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *sreg = + cur_regs(env) + meta->insn.src_reg; + const struct bpf_reg_state *dreg = + cur_regs(env) + meta->insn.dst_reg; + + meta->umin_src = min(meta->umin_src, sreg->umin_value); + meta->umax_src = max(meta->umax_src, sreg->umax_value); + meta->umin_dst = min(meta->umin_dst, dreg->umin_value); + meta->umax_dst = max(meta->umax_dst, dreg->umax_value); + + /* NFP supports u16 and u32 multiplication. + * + * For ALU64, if either operand is beyond u32's value range, we reject + * it. One thing to note, if the source operand is BPF_K, then we need + * to check "imm" field directly, and we'd reject it if it is negative. + * Because for ALU64, "imm" (with s32 type) is expected to be sign + * extended to s64 which NFP mul doesn't support. + * + * For ALU32, it is fine for "imm" be negative though, because the + * result is 32-bits and there is no difference on the low halve of + * the result for signed/unsigned mul, so we will get correct result. + */ + if (is_mbpf_mul(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "multiplier is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) { + pr_vlog(env, "multiplicand is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_class(meta) == BPF_ALU64 && + mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "sign extended multiplicand won't be within u32 value range\n"); + return -EINVAL; + } + } + + /* NFP doesn't have divide instructions, we support divide by constant + * through reciprocal multiplication. Given NFP support multiplication + * no bigger than u32, we'd require divisor and dividend no bigger than + * that as well. + * + * Also eBPF doesn't support signed divide and has enforced this on C + * language level by failing compilation. However LLVM assembler hasn't + * enforced this, so it is possible for negative constant to leak in as + * a BPF_K operand through assembly code, we reject such cases as well. + */ + if (is_mbpf_div(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "dividend is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X) { + if (meta->umin_src != meta->umax_src) { + pr_vlog(env, "divisor is not constant\n"); + return -EINVAL; + } + if (meta->umax_src > U32_MAX) { + pr_vlog(env, "divisor is not within u32 value range\n"); + return -EINVAL; + } + } + if (mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "divide by negative constant is not supported\n"); + return -EINVAL; + } + } + + return 0; +} + +static int +nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +{ + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; + + meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len); + nfp_prog->verifier_meta = meta; + + if (!nfp_bpf_supported_opcode(meta->insn.code)) { + pr_vlog(env, "instruction %#02x not supported\n", + meta->insn.code); + return -EINVAL; + } + + if (meta->insn.src_reg >= MAX_BPF_REG || + meta->insn.dst_reg >= MAX_BPF_REG) { + pr_vlog(env, "program uses extended registers - jit hardening?\n"); + return -EINVAL; + } + + if (meta->insn.code == (BPF_JMP | BPF_CALL)) + return nfp_bpf_check_call(nfp_prog, env, meta); + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + return nfp_bpf_check_exit(nfp_prog, env); + + if (is_mbpf_load(meta)) + return nfp_bpf_check_ptr(nfp_prog, meta, env, + meta->insn.src_reg); + if (is_mbpf_store(meta)) + return nfp_bpf_check_store(nfp_prog, meta, env); + + if (is_mbpf_xadd(meta)) + return nfp_bpf_check_xadd(nfp_prog, meta, env); + + if (is_mbpf_alu(meta)) + return nfp_bpf_check_alu(nfp_prog, meta, env); + + return 0; +} + +const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = { + .insn_hook = nfp_verify_insn, +}; |