summaryrefslogtreecommitdiffstats
path: root/src/seastar/dpdk/lib/librte_ip_frag
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/seastar/dpdk/lib/librte_ip_frag
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/lib/librte_ip_frag')
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/Makefile29
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h165
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c369
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/meson.build11
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h358
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c144
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map26
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c185
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c171
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c182
-rw-r--r--src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c216
11 files changed, 1856 insertions, 0 deletions
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/Makefile b/src/seastar/dpdk/lib/librte_ip_frag/Makefile
new file mode 100644
index 000000000..4c3dc4d37
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_ip_frag.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
+LDLIBS += -lrte_hash
+
+EXPORT_MAP := rte_ip_frag_version.map
+
+LIBABIVER := 1
+
+#source files
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_fragmentation.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_reassembly.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_reassembly.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ip_frag_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h
new file mode 100644
index 000000000..a17a74076
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _IP_FRAG_COMMON_H_
+#define _IP_FRAG_COMMON_H_
+
+#include "rte_ip_frag.h"
+
+/* logging macros. */
+#ifdef RTE_LIBRTE_IP_FRAG_DEBUG
+#define IP_FRAG_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args)
+#else
+#define IP_FRAG_LOG(lvl, fmt, args...) do {} while(0)
+#endif /* IP_FRAG_DEBUG */
+
+#define IPV4_KEYLEN 1
+#define IPV6_KEYLEN 4
+
+/* helper macros */
+#define IP_FRAG_MBUF2DR(dr, mb) ((dr)->row[(dr)->cnt++] = (mb))
+
+#define IPv6_KEY_BYTES(key) \
+ (key)[0], (key)[1], (key)[2], (key)[3]
+#define IPv6_KEY_BYTES_FMT \
+ "%08" PRIx64 "%08" PRIx64 "%08" PRIx64 "%08" PRIx64
+
+#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
+#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v))
+#else
+#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0)
+#endif /* IP_FRAG_TBL_STAT */
+
+/* internal functions declarations */
+struct rte_mbuf * ip_frag_process(struct ip_frag_pkt *fp,
+ struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb,
+ uint16_t ofs, uint16_t len, uint16_t more_frags);
+
+struct ip_frag_pkt * ip_frag_find(struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr,
+ const struct ip_frag_key *key, uint64_t tms);
+
+struct ip_frag_pkt * ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
+ const struct ip_frag_key *key, uint64_t tms,
+ struct ip_frag_pkt **free, struct ip_frag_pkt **stale);
+
+/* these functions need to be declared here as ip_frag_process relies on them */
+struct rte_mbuf *ipv4_frag_reassemble(struct ip_frag_pkt *fp);
+struct rte_mbuf *ipv6_frag_reassemble(struct ip_frag_pkt *fp);
+
+
+
+/*
+ * misc frag key functions
+ */
+
+/* check if key is empty */
+static inline int
+ip_frag_key_is_empty(const struct ip_frag_key * key)
+{
+ return (key->key_len == 0);
+}
+
+/* invalidate the key */
+static inline void
+ip_frag_key_invalidate(struct ip_frag_key * key)
+{
+ key->key_len = 0;
+}
+
+/* compare two keys */
+static inline uint64_t
+ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2)
+{
+ uint32_t i;
+ uint64_t val;
+ val = k1->id_key_len ^ k2->id_key_len;
+ for (i = 0; i < k1->key_len; i++)
+ val |= k1->src_dst[i] ^ k2->src_dst[i];
+ return val;
+}
+
+/*
+ * misc fragment functions
+ */
+
+/* put fragment on death row */
+static inline void
+ip_frag_free(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr)
+{
+ uint32_t i, k;
+
+ k = dr->cnt;
+ for (i = 0; i != fp->last_idx; i++) {
+ if (fp->frags[i].mb != NULL) {
+ dr->row[k++] = fp->frags[i].mb;
+ fp->frags[i].mb = NULL;
+ }
+ }
+
+ fp->last_idx = 0;
+ dr->cnt = k;
+}
+
+/* delete fragment's mbufs immediately instead of using death row */
+static inline void
+ip_frag_free_immediate(struct ip_frag_pkt *fp)
+{
+ uint32_t i;
+
+ for (i = 0; i < fp->last_idx; i++) {
+ if (fp->frags[i].mb != NULL) {
+ IP_FRAG_LOG(DEBUG, "%s:%d\n"
+ "mbuf: %p, tms: %" PRIu64", key: <%" PRIx64 ", %#x>\n",
+ __func__, __LINE__, fp->frags[i].mb, fp->start,
+ fp->key.src_dst[0], fp->key.id);
+ rte_pktmbuf_free(fp->frags[i].mb);
+ fp->frags[i].mb = NULL;
+ }
+ }
+
+ fp->last_idx = 0;
+}
+
+/* if key is empty, mark key as in use */
+static inline void
+ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct ip_frag_pkt *fp)
+{
+ if (ip_frag_key_is_empty(&fp->key)) {
+ TAILQ_REMOVE(&tbl->lru, fp, lru);
+ tbl->use_entries--;
+ }
+}
+
+/* reset the fragment */
+static inline void
+ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
+{
+ static const struct ip_frag zero_frag = {
+ .ofs = 0,
+ .len = 0,
+ .mb = NULL,
+ };
+
+ fp->start = tms;
+ fp->total_size = UINT32_MAX;
+ fp->frag_size = 0;
+ fp->last_idx = IP_MIN_FRAG_NUM;
+ fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
+ fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
+}
+
+/* local frag table helper functions */
+static inline void
+ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+ struct ip_frag_pkt *fp)
+{
+ ip_frag_free(fp, dr);
+ ip_frag_key_invalidate(&fp->key);
+ TAILQ_REMOVE(&tbl->lru, fp, lru);
+ tbl->use_entries--;
+ IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
+}
+
+#endif /* _IP_FRAG_COMMON_H_ */
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c
new file mode 100644
index 000000000..97470a872
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stddef.h>
+
+#include <rte_jhash.h>
+#include <rte_hash_crc.h>
+
+#include "ip_frag_common.h"
+
+#define PRIME_VALUE 0xeaad8405
+
+#define IP_FRAG_TBL_POS(tbl, sig) \
+ ((tbl)->pkt + ((sig) & (tbl)->entry_mask))
+
+static inline void
+ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl, struct ip_frag_pkt *fp,
+ const struct ip_frag_key *key, uint64_t tms)
+{
+ fp->key = key[0];
+ ip_frag_reset(fp, tms);
+ TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
+ tbl->use_entries++;
+ IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
+}
+
+static inline void
+ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+ struct ip_frag_pkt *fp, uint64_t tms)
+{
+ ip_frag_free(fp, dr);
+ ip_frag_reset(fp, tms);
+ TAILQ_REMOVE(&tbl->lru, fp, lru);
+ TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
+ IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
+}
+
+
+static inline void
+ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
+{
+ uint32_t v;
+ const uint32_t *p;
+
+ p = (const uint32_t *)&key->src_dst;
+
+#ifdef RTE_ARCH_X86
+ v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
+ v = rte_hash_crc_4byte(p[1], v);
+ v = rte_hash_crc_4byte(key->id, v);
+#else
+
+ v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
+#endif /* RTE_ARCH_X86 */
+
+ *v1 = v;
+ *v2 = (v << 7) + (v >> 14);
+}
+
+static inline void
+ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
+{
+ uint32_t v;
+ const uint32_t *p;
+
+ p = (const uint32_t *) &key->src_dst;
+
+#ifdef RTE_ARCH_X86
+ v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
+ v = rte_hash_crc_4byte(p[1], v);
+ v = rte_hash_crc_4byte(p[2], v);
+ v = rte_hash_crc_4byte(p[3], v);
+ v = rte_hash_crc_4byte(p[4], v);
+ v = rte_hash_crc_4byte(p[5], v);
+ v = rte_hash_crc_4byte(p[6], v);
+ v = rte_hash_crc_4byte(p[7], v);
+ v = rte_hash_crc_4byte(key->id, v);
+#else
+
+ v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE);
+ v = rte_jhash_3words(p[3], p[4], p[5], v);
+ v = rte_jhash_3words(p[6], p[7], key->id, v);
+#endif /* RTE_ARCH_X86 */
+
+ *v1 = v;
+ *v2 = (v << 7) + (v >> 14);
+}
+
+struct rte_mbuf *
+ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
+ struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
+{
+ uint32_t idx;
+
+ fp->frag_size += len;
+
+ /* this is the first fragment. */
+ if (ofs == 0) {
+ idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
+ IP_FIRST_FRAG_IDX : UINT32_MAX;
+
+ /* this is the last fragment. */
+ } else if (more_frags == 0) {
+ fp->total_size = ofs + len;
+ idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
+ IP_LAST_FRAG_IDX : UINT32_MAX;
+
+ /* this is the intermediate fragment. */
+ } else if ((idx = fp->last_idx) <
+ sizeof (fp->frags) / sizeof (fp->frags[0])) {
+ fp->last_idx++;
+ }
+
+ /*
+ * erroneous packet: either exceed max allowed number of fragments,
+ * or duplicate first/last fragment encountered.
+ */
+ if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
+
+ /* report an error. */
+ if (fp->key.key_len == IPV4_KEYLEN)
+ IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+ "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
+ "total_size: %u, frag_size: %u, last_idx: %u\n"
+ "first fragment: ofs: %u, len: %u\n"
+ "last fragment: ofs: %u, len: %u\n\n",
+ __func__, __LINE__,
+ fp, fp->key.src_dst[0], fp->key.id,
+ fp->total_size, fp->frag_size, fp->last_idx,
+ fp->frags[IP_FIRST_FRAG_IDX].ofs,
+ fp->frags[IP_FIRST_FRAG_IDX].len,
+ fp->frags[IP_LAST_FRAG_IDX].ofs,
+ fp->frags[IP_LAST_FRAG_IDX].len);
+ else
+ IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+ "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
+ "total_size: %u, frag_size: %u, last_idx: %u\n"
+ "first fragment: ofs: %u, len: %u\n"
+ "last fragment: ofs: %u, len: %u\n\n",
+ __func__, __LINE__,
+ fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
+ fp->total_size, fp->frag_size, fp->last_idx,
+ fp->frags[IP_FIRST_FRAG_IDX].ofs,
+ fp->frags[IP_FIRST_FRAG_IDX].len,
+ fp->frags[IP_LAST_FRAG_IDX].ofs,
+ fp->frags[IP_LAST_FRAG_IDX].len);
+
+ /* free all fragments, invalidate the entry. */
+ ip_frag_free(fp, dr);
+ ip_frag_key_invalidate(&fp->key);
+ IP_FRAG_MBUF2DR(dr, mb);
+
+ return NULL;
+ }
+
+ fp->frags[idx].ofs = ofs;
+ fp->frags[idx].len = len;
+ fp->frags[idx].mb = mb;
+
+ mb = NULL;
+
+ /* not all fragments are collected yet. */
+ if (likely (fp->frag_size < fp->total_size)) {
+ return mb;
+
+ /* if we collected all fragments, then try to reassemble. */
+ } else if (fp->frag_size == fp->total_size &&
+ fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {
+ if (fp->key.key_len == IPV4_KEYLEN)
+ mb = ipv4_frag_reassemble(fp);
+ else
+ mb = ipv6_frag_reassemble(fp);
+ }
+
+ /* errorenous set of fragments. */
+ if (mb == NULL) {
+
+ /* report an error. */
+ if (fp->key.key_len == IPV4_KEYLEN)
+ IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+ "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
+ "total_size: %u, frag_size: %u, last_idx: %u\n"
+ "first fragment: ofs: %u, len: %u\n"
+ "last fragment: ofs: %u, len: %u\n\n",
+ __func__, __LINE__,
+ fp, fp->key.src_dst[0], fp->key.id,
+ fp->total_size, fp->frag_size, fp->last_idx,
+ fp->frags[IP_FIRST_FRAG_IDX].ofs,
+ fp->frags[IP_FIRST_FRAG_IDX].len,
+ fp->frags[IP_LAST_FRAG_IDX].ofs,
+ fp->frags[IP_LAST_FRAG_IDX].len);
+ else
+ IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+ "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
+ "total_size: %u, frag_size: %u, last_idx: %u\n"
+ "first fragment: ofs: %u, len: %u\n"
+ "last fragment: ofs: %u, len: %u\n\n",
+ __func__, __LINE__,
+ fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
+ fp->total_size, fp->frag_size, fp->last_idx,
+ fp->frags[IP_FIRST_FRAG_IDX].ofs,
+ fp->frags[IP_FIRST_FRAG_IDX].len,
+ fp->frags[IP_LAST_FRAG_IDX].ofs,
+ fp->frags[IP_LAST_FRAG_IDX].len);
+
+ /* free associated resources. */
+ ip_frag_free(fp, dr);
+ }
+
+ /* we are done with that entry, invalidate it. */
+ ip_frag_key_invalidate(&fp->key);
+ return mb;
+}
+
+
+/*
+ * Find an entry in the table for the corresponding fragment.
+ * If such entry is not present, then allocate a new one.
+ * If the entry is stale, then free and reuse it.
+ */
+struct ip_frag_pkt *
+ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+ const struct ip_frag_key *key, uint64_t tms)
+{
+ struct ip_frag_pkt *pkt, *free, *stale, *lru;
+ uint64_t max_cycles;
+
+ /*
+ * Actually the two line below are totally redundant.
+ * they are here, just to make gcc 4.6 happy.
+ */
+ free = NULL;
+ stale = NULL;
+ max_cycles = tbl->max_cycles;
+
+ IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
+
+ if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
+
+ /*timed-out entry, free and invalidate it*/
+ if (stale != NULL) {
+ ip_frag_tbl_del(tbl, dr, stale);
+ free = stale;
+
+ /*
+ * we found a free entry, check if we can use it.
+ * If we run out of free entries in the table, then
+ * check if we have a timed out entry to delete.
+ */
+ } else if (free != NULL &&
+ tbl->max_entries <= tbl->use_entries) {
+ lru = TAILQ_FIRST(&tbl->lru);
+ if (max_cycles + lru->start < tms) {
+ ip_frag_tbl_del(tbl, dr, lru);
+ } else {
+ free = NULL;
+ IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
+ fail_nospace, 1);
+ }
+ }
+
+ /* found a free entry to reuse. */
+ if (free != NULL) {
+ ip_frag_tbl_add(tbl, free, key, tms);
+ pkt = free;
+ }
+
+ /*
+ * we found the flow, but it is already timed out,
+ * so free associated resources, reposition it in the LRU list,
+ * and reuse it.
+ */
+ } else if (max_cycles + pkt->start < tms) {
+ ip_frag_tbl_reuse(tbl, dr, pkt, tms);
+ }
+
+ IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
+
+ tbl->last = pkt;
+ return pkt;
+}
+
+struct ip_frag_pkt *
+ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
+ const struct ip_frag_key *key, uint64_t tms,
+ struct ip_frag_pkt **free, struct ip_frag_pkt **stale)
+{
+ struct ip_frag_pkt *p1, *p2;
+ struct ip_frag_pkt *empty, *old;
+ uint64_t max_cycles;
+ uint32_t i, assoc, sig1, sig2;
+
+ empty = NULL;
+ old = NULL;
+
+ max_cycles = tbl->max_cycles;
+ assoc = tbl->bucket_entries;
+
+ if (tbl->last != NULL && ip_frag_key_cmp(key, &tbl->last->key) == 0)
+ return tbl->last;
+
+ /* different hashing methods for IPv4 and IPv6 */
+ if (key->key_len == IPV4_KEYLEN)
+ ipv4_frag_hash(key, &sig1, &sig2);
+ else
+ ipv6_frag_hash(key, &sig1, &sig2);
+
+ p1 = IP_FRAG_TBL_POS(tbl, sig1);
+ p2 = IP_FRAG_TBL_POS(tbl, sig2);
+
+ for (i = 0; i != assoc; i++) {
+ if (p1->key.key_len == IPV4_KEYLEN)
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv4_frag_pkt line0: %p, index: %u from %u\n"
+ "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
+ __func__, __LINE__,
+ tbl, tbl->max_entries, tbl->use_entries,
+ p1, i, assoc,
+ p1[i].key.src_dst[0], p1[i].key.id, p1[i].start);
+ else
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv6_frag_pkt line0: %p, index: %u from %u\n"
+ "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
+ __func__, __LINE__,
+ tbl, tbl->max_entries, tbl->use_entries,
+ p1, i, assoc,
+ IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start);
+
+ if (ip_frag_key_cmp(key, &p1[i].key) == 0)
+ return p1 + i;
+ else if (ip_frag_key_is_empty(&p1[i].key))
+ empty = (empty == NULL) ? (p1 + i) : empty;
+ else if (max_cycles + p1[i].start < tms)
+ old = (old == NULL) ? (p1 + i) : old;
+
+ if (p2->key.key_len == IPV4_KEYLEN)
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv4_frag_pkt line1: %p, index: %u from %u\n"
+ "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
+ __func__, __LINE__,
+ tbl, tbl->max_entries, tbl->use_entries,
+ p2, i, assoc,
+ p2[i].key.src_dst[0], p2[i].key.id, p2[i].start);
+ else
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv6_frag_pkt line1: %p, index: %u from %u\n"
+ "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
+ __func__, __LINE__,
+ tbl, tbl->max_entries, tbl->use_entries,
+ p2, i, assoc,
+ IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start);
+
+ if (ip_frag_key_cmp(key, &p2[i].key) == 0)
+ return p2 + i;
+ else if (ip_frag_key_is_empty(&p2[i].key))
+ empty = (empty == NULL) ?( p2 + i) : empty;
+ else if (max_cycles + p2[i].start < tms)
+ old = (old == NULL) ? (p2 + i) : old;
+ }
+
+ *free = empty;
+ *stale = old;
+ return NULL;
+}
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/meson.build b/src/seastar/dpdk/lib/librte_ip_frag/meson.build
new file mode 100644
index 000000000..c5b9a4596
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+sources = files('rte_ipv4_fragmentation.c',
+ 'rte_ipv6_fragmentation.c',
+ 'rte_ipv4_reassembly.c',
+ 'rte_ipv6_reassembly.c',
+ 'rte_ip_frag_common.c',
+ 'ip_frag_internal.c')
+headers = files('rte_ip_frag.h')
+deps += ['ethdev', 'hash']
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h
new file mode 100644
index 000000000..bc4c100f3
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_IP_FRAG_H_
+#define _RTE_IP_FRAG_H_
+
+/**
+ * @file
+ * RTE IP Fragmentation and Reassembly
+ *
+ * Implementation of IP packet fragmentation and reassembly.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_config.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_ip.h>
+#include <rte_byteorder.h>
+
+struct rte_mbuf;
+
+enum {
+ IP_LAST_FRAG_IDX, /**< index of last fragment */
+ IP_FIRST_FRAG_IDX, /**< index of first fragment */
+ IP_MIN_FRAG_NUM, /**< minimum number of fragments */
+ IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG,
+ /**< maximum number of fragments per packet */
+};
+
+/** @internal fragmented mbuf */
+struct ip_frag {
+ uint16_t ofs; /**< offset into the packet */
+ uint16_t len; /**< length of fragment */
+ struct rte_mbuf *mb; /**< fragment mbuf */
+};
+
+/** @internal <src addr, dst_addr, id> to uniquely identify fragmented datagram. */
+struct ip_frag_key {
+ uint64_t src_dst[4];
+ /**< src and dst address, only first 8 bytes used for IPv4 */
+ RTE_STD_C11
+ union {
+ uint64_t id_key_len; /**< combined for easy fetch */
+ __extension__
+ struct {
+ uint32_t id; /**< packet id */
+ uint32_t key_len; /**< src/dst key length */
+ };
+ };
+};
+
+/**
+ * @internal Fragmented packet to reassemble.
+ * First two entries in the frags[] array are for the last and first fragments.
+ */
+struct ip_frag_pkt {
+ TAILQ_ENTRY(ip_frag_pkt) lru; /**< LRU list */
+ struct ip_frag_key key; /**< fragmentation key */
+ uint64_t start; /**< creation timestamp */
+ uint32_t total_size; /**< expected reassembled size */
+ uint32_t frag_size; /**< size of fragments received */
+ uint32_t last_idx; /**< index of next entry to fill */
+ struct ip_frag frags[IP_MAX_FRAG_NUM]; /**< fragments */
+} __rte_cache_aligned;
+
+#define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */
+
+/* death row size in mbufs */
+#define IP_FRAG_DEATH_ROW_MBUF_LEN (IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1))
+
+/** mbuf death row (packets to be freed) */
+struct rte_ip_frag_death_row {
+ uint32_t cnt; /**< number of mbufs currently on death row */
+ struct rte_mbuf *row[IP_FRAG_DEATH_ROW_MBUF_LEN];
+ /**< mbufs to be freed */
+};
+
+TAILQ_HEAD(ip_pkt_list, ip_frag_pkt); /**< @internal fragments tailq */
+
+/** fragmentation table statistics */
+struct ip_frag_tbl_stat {
+ uint64_t find_num; /**< total # of find/insert attempts. */
+ uint64_t add_num; /**< # of add ops. */
+ uint64_t del_num; /**< # of del ops. */
+ uint64_t reuse_num; /**< # of reuse (del/add) ops. */
+ uint64_t fail_total; /**< total # of add failures. */
+ uint64_t fail_nospace; /**< # of 'no space' add failures. */
+} __rte_cache_aligned;
+
+/** fragmentation table */
+struct rte_ip_frag_tbl {
+ uint64_t max_cycles; /**< ttl for table entries. */
+ uint32_t entry_mask; /**< hash value mask. */
+ uint32_t max_entries; /**< max entries allowed. */
+ uint32_t use_entries; /**< entries in use. */
+ uint32_t bucket_entries; /**< hash associativity. */
+ uint32_t nb_entries; /**< total size of the table. */
+ uint32_t nb_buckets; /**< num of associativity lines. */
+ struct ip_frag_pkt *last; /**< last used entry. */
+ struct ip_pkt_list lru; /**< LRU list for table entries. */
+ struct ip_frag_tbl_stat stat; /**< statistics counters. */
+ __extension__ struct ip_frag_pkt pkt[0]; /**< hash table. */
+};
+
+/** IPv6 fragment extension header */
+#define RTE_IPV6_EHDR_MF_SHIFT 0
+#define RTE_IPV6_EHDR_MF_MASK 1
+#define RTE_IPV6_EHDR_FO_SHIFT 3
+#define RTE_IPV6_EHDR_FO_MASK (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1))
+#define RTE_IPV6_EHDR_FO_ALIGN (1 << RTE_IPV6_EHDR_FO_SHIFT)
+
+#define RTE_IPV6_FRAG_USED_MASK \
+ (RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK)
+
+#define RTE_IPV6_GET_MF(x) ((x) & RTE_IPV6_EHDR_MF_MASK)
+#define RTE_IPV6_GET_FO(x) ((x) >> RTE_IPV6_EHDR_FO_SHIFT)
+
+#define RTE_IPV6_SET_FRAG_DATA(fo, mf) \
+ (((fo) & RTE_IPV6_EHDR_FO_MASK) | ((mf) & RTE_IPV6_EHDR_MF_MASK))
+
+struct ipv6_extension_fragment {
+ uint8_t next_header; /**< Next header type */
+ uint8_t reserved; /**< Reserved */
+ uint16_t frag_data; /**< All fragmentation data */
+ uint32_t id; /**< Packet ID */
+} __attribute__((__packed__));
+
+
+
+/**
+ * Create a new IP fragmentation table.
+ *
+ * @param bucket_num
+ * Number of buckets in the hash table.
+ * @param bucket_entries
+ * Number of entries per bucket (e.g. hash associativity).
+ * Should be power of two.
+ * @param max_entries
+ * Maximum number of entries that could be stored in the table.
+ * The value should be less or equal then bucket_num * bucket_entries.
+ * @param max_cycles
+ * Maximum TTL in cycles for each fragmented packet.
+ * @param socket_id
+ * The *socket_id* argument is the socket identifier in the case of
+ * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints.
+ * @return
+ * The pointer to the new allocated fragmentation table, on success. NULL on error.
+ */
+struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num,
+ uint32_t bucket_entries, uint32_t max_entries,
+ uint64_t max_cycles, int socket_id);
+
+/**
+ * Free allocated IP fragmentation table.
+ *
+ * @param tbl
+ * Fragmentation table to free.
+ */
+void
+rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl);
+
+/**
+ * This function implements the fragmentation of IPv6 packets.
+ *
+ * @param pkt_in
+ * The input packet.
+ * @param pkts_out
+ * Array storing the output fragments.
+ * @param nb_pkts_out
+ * Number of fragments.
+ * @param mtu_size
+ * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6
+ * datagrams. This value includes the size of the IPv6 header.
+ * @param pool_direct
+ * MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ * MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ * Upon successful completion - number of output fragments placed
+ * in the pkts_out array.
+ * Otherwise - (-1) * errno.
+ */
+int32_t
+rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out,
+ uint16_t mtu_size,
+ struct rte_mempool *pool_direct,
+ struct rte_mempool *pool_indirect);
+
+/**
+ * This function implements reassembly of fragmented IPv6 packets.
+ * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
+ *
+ * @param tbl
+ * Table where to lookup/add the fragmented packet.
+ * @param dr
+ * Death row to free buffers to
+ * @param mb
+ * Incoming mbuf with IPv6 fragment.
+ * @param tms
+ * Fragment arrival timestamp.
+ * @param ip_hdr
+ * Pointer to the IPv6 header.
+ * @param frag_hdr
+ * Pointer to the IPv6 fragment extension header.
+ * @return
+ * Pointer to mbuf for reassembled packet, or NULL if:
+ * - an error occurred.
+ * - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr,
+ struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr,
+ struct ipv6_extension_fragment *frag_hdr);
+
+/**
+ * Return a pointer to the packet's fragment header, if found.
+ * It only looks at the extension header that's right after the fixed IPv6
+ * header, and doesn't follow the whole chain of extension headers.
+ *
+ * @param hdr
+ * Pointer to the IPv6 header.
+ * @return
+ * Pointer to the IPv6 fragment extension header, or NULL if it's not
+ * present.
+ */
+static inline struct ipv6_extension_fragment *
+rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr)
+{
+ if (hdr->proto == IPPROTO_FRAGMENT) {
+ return (struct ipv6_extension_fragment *) ++hdr;
+ }
+ else
+ return NULL;
+}
+
+/**
+ * IPv4 fragmentation.
+ *
+ * This function implements the fragmentation of IPv4 packets.
+ *
+ * @param pkt_in
+ * The input packet.
+ * @param pkts_out
+ * Array storing the output fragments.
+ * @param nb_pkts_out
+ * Number of fragments.
+ * @param mtu_size
+ * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4
+ * datagrams. This value includes the size of the IPv4 header.
+ * @param pool_direct
+ * MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ * MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ * Upon successful completion - number of output fragments placed
+ * in the pkts_out array.
+ * Otherwise - (-1) * errno.
+ */
+int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out, uint16_t mtu_size,
+ struct rte_mempool *pool_direct,
+ struct rte_mempool *pool_indirect);
+
+/**
+ * This function implements reassembly of fragmented IPv4 packets.
+ * Incoming mbufs should have its l2_len/l3_len fields setup correctly.
+ *
+ * @param tbl
+ * Table where to lookup/add the fragmented packet.
+ * @param dr
+ * Death row to free buffers to
+ * @param mb
+ * Incoming mbuf with IPv4 fragment.
+ * @param tms
+ * Fragment arrival timestamp.
+ * @param ip_hdr
+ * Pointer to the IPV4 header inside the fragment.
+ * @return
+ * Pointer to mbuf for reassembled packet, or NULL if:
+ * - an error occurred.
+ * - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr,
+ struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr);
+
+/**
+ * Check if the IPv4 packet is fragmented
+ *
+ * @param hdr
+ * IPv4 header of the packet
+ * @return
+ * 1 if fragmented, 0 if not fragmented
+ */
+static inline int
+rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) {
+ uint16_t flag_offset, ip_flag, ip_ofs;
+
+ flag_offset = rte_be_to_cpu_16(hdr->fragment_offset);
+ ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
+ ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
+
+ return ip_flag != 0 || ip_ofs != 0;
+}
+
+/**
+ * Free mbufs on a given death row.
+ *
+ * @param dr
+ * Death row to free mbufs in.
+ * @param prefetch
+ * How many buffers to prefetch before freeing.
+ */
+void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
+ uint32_t prefetch);
+
+
+/**
+ * Dump fragmentation table statistics to file.
+ *
+ * @param f
+ * File to dump statistics to
+ * @param tbl
+ * Fragmentation table to dump statistics from
+ */
+void
+rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl);
+
+/**
+ * Delete expired fragments
+ *
+ * @param tbl
+ * Table to delete expired fragments from
+ * @param dr
+ * Death row to free buffers to
+ * @param tms
+ * Current timestamp
+ */
+void __rte_experimental
+rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr, uint64_t tms);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IP_FRAG_H_ */
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c
new file mode 100644
index 000000000..a23f6f24f
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <rte_memory.h>
+#include <rte_log.h>
+
+#include "ip_frag_common.h"
+
+#define IP_FRAG_HASH_FNUM 2
+
+/* free mbufs from death row */
+void
+rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
+ uint32_t prefetch)
+{
+ uint32_t i, k, n;
+
+ k = RTE_MIN(prefetch, dr->cnt);
+ n = dr->cnt;
+
+ for (i = 0; i != k; i++)
+ rte_prefetch0(dr->row[i]);
+
+ for (i = 0; i != n - k; i++) {
+ rte_prefetch0(dr->row[i + k]);
+ rte_pktmbuf_free(dr->row[i]);
+ }
+
+ for (; i != n; i++)
+ rte_pktmbuf_free(dr->row[i]);
+
+ dr->cnt = 0;
+}
+
+/* create fragmentation table */
+struct rte_ip_frag_tbl *
+rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries,
+ uint32_t max_entries, uint64_t max_cycles, int socket_id)
+{
+ struct rte_ip_frag_tbl *tbl;
+ size_t sz;
+ uint64_t nb_entries;
+
+ nb_entries = rte_align32pow2(bucket_num);
+ nb_entries *= bucket_entries;
+ nb_entries *= IP_FRAG_HASH_FNUM;
+
+ /* check input parameters. */
+ if (rte_is_power_of_2(bucket_entries) == 0 ||
+ nb_entries > UINT32_MAX || nb_entries == 0 ||
+ nb_entries < max_entries) {
+ RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__);
+ return NULL;
+ }
+
+ sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]);
+ if ((tbl = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE,
+ socket_id)) == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s: allocation of %zu bytes at socket %d failed do\n",
+ __func__, sz, socket_id);
+ return NULL;
+ }
+
+ RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n",
+ __func__, sz, socket_id);
+
+ tbl->max_cycles = max_cycles;
+ tbl->max_entries = max_entries;
+ tbl->nb_entries = (uint32_t)nb_entries;
+ tbl->nb_buckets = bucket_num;
+ tbl->bucket_entries = bucket_entries;
+ tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1);
+
+ TAILQ_INIT(&(tbl->lru));
+ return tbl;
+}
+
+/* delete fragmentation table */
+void
+rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl)
+{
+ struct ip_frag_pkt *fp;
+
+ TAILQ_FOREACH(fp, &tbl->lru, lru) {
+ ip_frag_free_immediate(fp);
+ }
+
+ rte_free(tbl);
+}
+
+/* dump frag table statistics to file */
+void
+rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl)
+{
+ uint64_t fail_total, fail_nospace;
+
+ fail_total = tbl->stat.fail_total;
+ fail_nospace = tbl->stat.fail_nospace;
+
+ fprintf(f, "max entries:\t%u;\n"
+ "entries in use:\t%u;\n"
+ "finds/inserts:\t%" PRIu64 ";\n"
+ "entries added:\t%" PRIu64 ";\n"
+ "entries deleted by timeout:\t%" PRIu64 ";\n"
+ "entries reused by timeout:\t%" PRIu64 ";\n"
+ "total add failures:\t%" PRIu64 ";\n"
+ "add no-space failures:\t%" PRIu64 ";\n"
+ "add hash-collisions failures:\t%" PRIu64 ";\n",
+ tbl->max_entries,
+ tbl->use_entries,
+ tbl->stat.find_num,
+ tbl->stat.add_num,
+ tbl->stat.del_num,
+ tbl->stat.reuse_num,
+ fail_total,
+ fail_nospace,
+ fail_total - fail_nospace);
+}
+
+/* Delete expired fragments */
+void __rte_experimental
+rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr, uint64_t tms)
+{
+ uint64_t max_cycles;
+ struct ip_frag_pkt *fp;
+
+ max_cycles = tbl->max_cycles;
+
+ TAILQ_FOREACH(fp, &tbl->lru, lru)
+ if (max_cycles + fp->start < tms) {
+ /* check that death row has enough space */
+ if (IP_FRAG_DEATH_ROW_MBUF_LEN - dr->cnt >= fp->last_idx)
+ ip_frag_tbl_del(tbl, dr, fp);
+ else
+ return;
+ } else
+ return;
+}
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map
new file mode 100644
index 000000000..a193007c6
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map
@@ -0,0 +1,26 @@
+DPDK_2.0 {
+ global:
+
+ rte_ip_frag_free_death_row;
+ rte_ip_frag_table_create;
+ rte_ip_frag_table_statistics_dump;
+ rte_ipv4_frag_reassemble_packet;
+ rte_ipv4_fragment_packet;
+ rte_ipv6_frag_reassemble_packet;
+ rte_ipv6_fragment_packet;
+
+ local: *;
+};
+
+DPDK_17.08 {
+ global:
+
+ rte_ip_frag_table_destroy;
+
+} DPDK_2.0;
+
+EXPERIMENTAL {
+ global:
+
+ rte_frag_table_del_expired_entries;
+};
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c
new file mode 100644
index 000000000..a96fb03e4
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stddef.h>
+#include <errno.h>
+
+#include <rte_memcpy.h>
+#include <rte_mempool.h>
+#include <rte_debug.h>
+
+#include "ip_frag_common.h"
+
+/* Fragment Offset */
+#define IPV4_HDR_DF_SHIFT 14
+#define IPV4_HDR_MF_SHIFT 13
+#define IPV4_HDR_FO_SHIFT 3
+
+#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT)
+#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT)
+
+#define IPV4_HDR_FO_ALIGN (1 << IPV4_HDR_FO_SHIFT)
+
+static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst,
+ const struct ipv4_hdr *src, uint16_t len, uint16_t fofs,
+ uint16_t dofs, uint32_t mf)
+{
+ rte_memcpy(dst, src, sizeof(*dst));
+ fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT));
+ fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT);
+ dst->fragment_offset = rte_cpu_to_be_16(fofs);
+ dst->total_length = rte_cpu_to_be_16(len);
+ dst->hdr_checksum = 0;
+}
+
+static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i;
+ for (i = 0; i != num; i++)
+ rte_pktmbuf_free(mb[i]);
+}
+
+/**
+ * IPv4 fragmentation.
+ *
+ * This function implements the fragmentation of IPv4 packets.
+ *
+ * @param pkt_in
+ * The input packet.
+ * @param pkts_out
+ * Array storing the output fragments.
+ * @param mtu_size
+ * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4
+ * datagrams. This value includes the size of the IPv4 header.
+ * @param pool_direct
+ * MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ * MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ * Upon successful completion - number of output fragments placed
+ * in the pkts_out array.
+ * Otherwise - (-1) * <errno>.
+ */
+int32_t
+rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out,
+ uint16_t mtu_size,
+ struct rte_mempool *pool_direct,
+ struct rte_mempool *pool_indirect)
+{
+ struct rte_mbuf *in_seg = NULL;
+ struct ipv4_hdr *in_hdr;
+ uint32_t out_pkt_pos, in_seg_data_pos;
+ uint32_t more_in_segs;
+ uint16_t fragment_offset, flag_offset, frag_size;
+ uint16_t frag_bytes_remaining;
+
+ /*
+ * Ensure the IP payload length of all fragments is aligned to a
+ * multiple of 8 bytes as per RFC791 section 2.3.
+ */
+ frag_size = RTE_ALIGN_FLOOR((mtu_size - sizeof(struct ipv4_hdr)),
+ IPV4_HDR_FO_ALIGN);
+
+ in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *);
+ flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset);
+
+ /* If Don't Fragment flag is set */
+ if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0))
+ return -ENOTSUP;
+
+ /* Check that pkts_out is big enough to hold all fragments */
+ if (unlikely(frag_size * nb_pkts_out <
+ (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv4_hdr))))
+ return -EINVAL;
+
+ in_seg = pkt_in;
+ in_seg_data_pos = sizeof(struct ipv4_hdr);
+ out_pkt_pos = 0;
+ fragment_offset = 0;
+
+ more_in_segs = 1;
+ while (likely(more_in_segs)) {
+ struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
+ uint32_t more_out_segs;
+ struct ipv4_hdr *out_hdr;
+
+ /* Allocate direct buffer */
+ out_pkt = rte_pktmbuf_alloc(pool_direct);
+ if (unlikely(out_pkt == NULL)) {
+ __free_fragments(pkts_out, out_pkt_pos);
+ return -ENOMEM;
+ }
+
+ /* Reserve space for the IP header that will be built later */
+ out_pkt->data_len = sizeof(struct ipv4_hdr);
+ out_pkt->pkt_len = sizeof(struct ipv4_hdr);
+ frag_bytes_remaining = frag_size;
+
+ out_seg_prev = out_pkt;
+ more_out_segs = 1;
+ while (likely(more_out_segs && more_in_segs)) {
+ struct rte_mbuf *out_seg = NULL;
+ uint32_t len;
+
+ /* Allocate indirect buffer */
+ out_seg = rte_pktmbuf_alloc(pool_indirect);
+ if (unlikely(out_seg == NULL)) {
+ rte_pktmbuf_free(out_pkt);
+ __free_fragments(pkts_out, out_pkt_pos);
+ return -ENOMEM;
+ }
+ out_seg_prev->next = out_seg;
+ out_seg_prev = out_seg;
+
+ /* Prepare indirect buffer */
+ rte_pktmbuf_attach(out_seg, in_seg);
+ len = frag_bytes_remaining;
+ if (len > (in_seg->data_len - in_seg_data_pos)) {
+ len = in_seg->data_len - in_seg_data_pos;
+ }
+ out_seg->data_off = in_seg->data_off + in_seg_data_pos;
+ out_seg->data_len = (uint16_t)len;
+ out_pkt->pkt_len = (uint16_t)(len +
+ out_pkt->pkt_len);
+ out_pkt->nb_segs += 1;
+ in_seg_data_pos += len;
+ frag_bytes_remaining -= len;
+
+ /* Current output packet (i.e. fragment) done ? */
+ if (unlikely(frag_bytes_remaining == 0))
+ more_out_segs = 0;
+
+ /* Current input segment done ? */
+ if (unlikely(in_seg_data_pos == in_seg->data_len)) {
+ in_seg = in_seg->next;
+ in_seg_data_pos = 0;
+
+ if (unlikely(in_seg == NULL))
+ more_in_segs = 0;
+ }
+ }
+
+ /* Build the IP header */
+
+ out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv4_hdr *);
+
+ __fill_ipv4hdr_frag(out_hdr, in_hdr,
+ (uint16_t)out_pkt->pkt_len,
+ flag_offset, fragment_offset, more_in_segs);
+
+ fragment_offset = (uint16_t)(fragment_offset +
+ out_pkt->pkt_len - sizeof(struct ipv4_hdr));
+
+ out_pkt->ol_flags |= PKT_TX_IP_CKSUM;
+ out_pkt->l3_len = sizeof(struct ipv4_hdr);
+
+ /* Write the fragment to the output list */
+ pkts_out[out_pkt_pos] = out_pkt;
+ out_pkt_pos ++;
+ }
+
+ return out_pkt_pos;
+}
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c
new file mode 100644
index 000000000..1029b7abc
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stddef.h>
+
+#include <rte_debug.h>
+
+#include "ip_frag_common.h"
+
+/*
+ * Reassemble fragments into one packet.
+ */
+struct rte_mbuf *
+ipv4_frag_reassemble(struct ip_frag_pkt *fp)
+{
+ struct ipv4_hdr *ip_hdr;
+ struct rte_mbuf *m, *prev;
+ uint32_t i, n, ofs, first_len;
+ uint32_t curr_idx = 0;
+
+ first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
+ n = fp->last_idx - 1;
+
+ /*start from the last fragment. */
+ m = fp->frags[IP_LAST_FRAG_IDX].mb;
+ ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
+ curr_idx = IP_LAST_FRAG_IDX;
+
+ while (ofs != first_len) {
+
+ prev = m;
+
+ for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {
+
+ /* previous fragment found. */
+ if(fp->frags[i].ofs + fp->frags[i].len == ofs) {
+
+ RTE_ASSERT(curr_idx != i);
+
+ /* adjust start of the last fragment data. */
+ rte_pktmbuf_adj(m,
+ (uint16_t)(m->l2_len + m->l3_len));
+ rte_pktmbuf_chain(fp->frags[i].mb, m);
+
+ /* this mbuf should not be accessed directly */
+ fp->frags[curr_idx].mb = NULL;
+ curr_idx = i;
+
+ /* update our last fragment and offset. */
+ m = fp->frags[i].mb;
+ ofs = fp->frags[i].ofs;
+ }
+ }
+
+ /* error - hole in the packet. */
+ if (m == prev) {
+ return NULL;
+ }
+ }
+
+ /* chain with the first fragment. */
+ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
+ rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+ fp->frags[curr_idx].mb = NULL;
+ m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+ fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
+
+ /* update mbuf fields for reassembled packet. */
+ m->ol_flags |= PKT_TX_IP_CKSUM;
+
+ /* update ipv4 header for the reassembled packet */
+ ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+
+ ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +
+ m->l3_len));
+ ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset &
+ rte_cpu_to_be_16(IPV4_HDR_DF_FLAG));
+ ip_hdr->hdr_checksum = 0;
+
+ return m;
+}
+
+/*
+ * Process new mbuf with fragment of IPV4 packet.
+ * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty.
+ * @param tbl
+ * Table where to lookup/add the fragmented packet.
+ * @param mb
+ * Incoming mbuf with IPV4 fragment.
+ * @param tms
+ * Fragment arrival timestamp.
+ * @param ip_hdr
+ * Pointer to the IPV4 header inside the fragment.
+ * @return
+ * Pointer to mbuf for reassembled packet, or NULL if:
+ * - an error occurred.
+ * - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf *
+rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
+ struct ipv4_hdr *ip_hdr)
+{
+ struct ip_frag_pkt *fp;
+ struct ip_frag_key key;
+ const unaligned_uint64_t *psd;
+ uint16_t flag_offset, ip_ofs, ip_flag;
+ int32_t ip_len;
+
+ flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset);
+ ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
+ ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
+
+ psd = (unaligned_uint64_t *)&ip_hdr->src_addr;
+ /* use first 8 bytes only */
+ key.src_dst[0] = psd[0];
+ key.id = ip_hdr->packet_id;
+ key.key_len = IPV4_KEYLEN;
+
+ ip_ofs *= IPV4_HDR_OFFSET_UNITS;
+ ip_len = rte_be_to_cpu_16(ip_hdr->total_length) - mb->l3_len;
+
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "mbuf: %p, tms: %" PRIu64
+ ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %d, flags: %#x\n"
+ "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, "
+ "max_entries: %u, use_entries: %u\n\n",
+ __func__, __LINE__,
+ mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, ip_flag,
+ tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
+ tbl->use_entries);
+
+ /* check that fragment length is greater then zero. */
+ if (ip_len <= 0) {
+ IP_FRAG_MBUF2DR(dr, mb);
+ return NULL;
+ }
+
+ /* try to find/add entry into the fragment's table. */
+ if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {
+ IP_FRAG_MBUF2DR(dr, mb);
+ return NULL;
+ }
+
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
+ ", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+ __func__, __LINE__,
+ tbl, tbl->max_entries, tbl->use_entries,
+ fp, fp->key.src_dst[0], fp->key.id, fp->start,
+ fp->total_size, fp->frag_size, fp->last_idx);
+
+
+ /* process the fragmented packet. */
+ mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);
+ ip_frag_inuse(tbl, fp);
+
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "mbuf: %p\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
+ ", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+ __func__, __LINE__, mb,
+ tbl, tbl->max_entries, tbl->use_entries,
+ fp, fp->key.src_dst[0], fp->key.id, fp->start,
+ fp->total_size, fp->frag_size, fp->last_idx);
+
+ return mb;
+}
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c
new file mode 100644
index 000000000..b9437eb11
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stddef.h>
+#include <errno.h>
+
+#include <rte_memcpy.h>
+
+#include "ip_frag_common.h"
+
+/**
+ * @file
+ * RTE IPv6 Fragmentation
+ *
+ * Implementation of IPv6 fragmentation.
+ *
+ */
+
+static inline void
+__fill_ipv6hdr_frag(struct ipv6_hdr *dst,
+ const struct ipv6_hdr *src, uint16_t len, uint16_t fofs,
+ uint32_t mf)
+{
+ struct ipv6_extension_fragment *fh;
+
+ rte_memcpy(dst, src, sizeof(*dst));
+ dst->payload_len = rte_cpu_to_be_16(len);
+ dst->proto = IPPROTO_FRAGMENT;
+
+ fh = (struct ipv6_extension_fragment *) ++dst;
+ fh->next_header = src->proto;
+ fh->reserved = 0;
+ fh->frag_data = rte_cpu_to_be_16(RTE_IPV6_SET_FRAG_DATA(fofs, mf));
+ fh->id = 0;
+}
+
+static inline void
+__free_fragments(struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i;
+ for (i = 0; i < num; i++)
+ rte_pktmbuf_free(mb[i]);
+}
+
+/**
+ * IPv6 fragmentation.
+ *
+ * This function implements the fragmentation of IPv6 packets.
+ *
+ * @param pkt_in
+ * The input packet.
+ * @param pkts_out
+ * Array storing the output fragments.
+ * @param mtu_size
+ * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6
+ * datagrams. This value includes the size of the IPv6 header.
+ * @param pool_direct
+ * MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ * MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ * Upon successful completion - number of output fragments placed
+ * in the pkts_out array.
+ * Otherwise - (-1) * <errno>.
+ */
+int32_t
+rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out,
+ uint16_t mtu_size,
+ struct rte_mempool *pool_direct,
+ struct rte_mempool *pool_indirect)
+{
+ struct rte_mbuf *in_seg = NULL;
+ struct ipv6_hdr *in_hdr;
+ uint32_t out_pkt_pos, in_seg_data_pos;
+ uint32_t more_in_segs;
+ uint16_t fragment_offset, frag_size;
+ uint64_t frag_bytes_remaining;
+
+ /*
+ * Ensure the IP payload length of all fragments (except the
+ * the last fragment) are a multiple of 8 bytes per RFC2460.
+ */
+ frag_size = RTE_ALIGN_FLOOR(mtu_size - sizeof(struct ipv6_hdr),
+ RTE_IPV6_EHDR_FO_ALIGN);
+
+ /* Check that pkts_out is big enough to hold all fragments */
+ if (unlikely (frag_size * nb_pkts_out <
+ (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv6_hdr))))
+ return -EINVAL;
+
+ in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv6_hdr *);
+
+ in_seg = pkt_in;
+ in_seg_data_pos = sizeof(struct ipv6_hdr);
+ out_pkt_pos = 0;
+ fragment_offset = 0;
+
+ more_in_segs = 1;
+ while (likely(more_in_segs)) {
+ struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
+ uint32_t more_out_segs;
+ struct ipv6_hdr *out_hdr;
+
+ /* Allocate direct buffer */
+ out_pkt = rte_pktmbuf_alloc(pool_direct);
+ if (unlikely(out_pkt == NULL)) {
+ __free_fragments(pkts_out, out_pkt_pos);
+ return -ENOMEM;
+ }
+
+ /* Reserve space for the IP header that will be built later */
+ out_pkt->data_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
+ out_pkt->pkt_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
+ frag_bytes_remaining = frag_size;
+
+ out_seg_prev = out_pkt;
+ more_out_segs = 1;
+ while (likely(more_out_segs && more_in_segs)) {
+ struct rte_mbuf *out_seg = NULL;
+ uint32_t len;
+
+ /* Allocate indirect buffer */
+ out_seg = rte_pktmbuf_alloc(pool_indirect);
+ if (unlikely(out_seg == NULL)) {
+ rte_pktmbuf_free(out_pkt);
+ __free_fragments(pkts_out, out_pkt_pos);
+ return -ENOMEM;
+ }
+ out_seg_prev->next = out_seg;
+ out_seg_prev = out_seg;
+
+ /* Prepare indirect buffer */
+ rte_pktmbuf_attach(out_seg, in_seg);
+ len = frag_bytes_remaining;
+ if (len > (in_seg->data_len - in_seg_data_pos)) {
+ len = in_seg->data_len - in_seg_data_pos;
+ }
+ out_seg->data_off = in_seg->data_off + in_seg_data_pos;
+ out_seg->data_len = (uint16_t)len;
+ out_pkt->pkt_len = (uint16_t)(len +
+ out_pkt->pkt_len);
+ out_pkt->nb_segs += 1;
+ in_seg_data_pos += len;
+ frag_bytes_remaining -= len;
+
+ /* Current output packet (i.e. fragment) done ? */
+ if (unlikely(frag_bytes_remaining == 0))
+ more_out_segs = 0;
+
+ /* Current input segment done ? */
+ if (unlikely(in_seg_data_pos == in_seg->data_len)) {
+ in_seg = in_seg->next;
+ in_seg_data_pos = 0;
+
+ if (unlikely(in_seg == NULL)) {
+ more_in_segs = 0;
+ }
+ }
+ }
+
+ /* Build the IP header */
+
+ out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv6_hdr *);
+
+ __fill_ipv6hdr_frag(out_hdr, in_hdr,
+ (uint16_t) out_pkt->pkt_len - sizeof(struct ipv6_hdr),
+ fragment_offset, more_in_segs);
+
+ fragment_offset = (uint16_t)(fragment_offset +
+ out_pkt->pkt_len - sizeof(struct ipv6_hdr)
+ - sizeof(struct ipv6_extension_fragment));
+
+ /* Write the fragment to the output list */
+ pkts_out[out_pkt_pos] = out_pkt;
+ out_pkt_pos ++;
+ }
+
+ return out_pkt_pos;
+}
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c
new file mode 100644
index 000000000..855e3f740
--- /dev/null
+++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stddef.h>
+
+#include <rte_memcpy.h>
+
+#include "ip_frag_common.h"
+
+/**
+ * @file
+ * IPv6 reassemble
+ *
+ * Implementation of IPv6 reassembly.
+ *
+ */
+
+static inline void
+ip_frag_memmove(char *dst, char *src, int len)
+{
+ int i;
+
+ /* go backwards to make sure we don't overwrite anything important */
+ for (i = len - 1; i >= 0; i--)
+ dst[i] = src[i];
+}
+
+/*
+ * Reassemble fragments into one packet.
+ */
+struct rte_mbuf *
+ipv6_frag_reassemble(struct ip_frag_pkt *fp)
+{
+ struct ipv6_hdr *ip_hdr;
+ struct ipv6_extension_fragment *frag_hdr;
+ struct rte_mbuf *m, *prev;
+ uint32_t i, n, ofs, first_len;
+ uint32_t last_len, move_len, payload_len;
+ uint32_t curr_idx = 0;
+
+ first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
+ n = fp->last_idx - 1;
+
+ /*start from the last fragment. */
+ m = fp->frags[IP_LAST_FRAG_IDX].mb;
+ ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
+ last_len = fp->frags[IP_LAST_FRAG_IDX].len;
+ curr_idx = IP_LAST_FRAG_IDX;
+
+ payload_len = ofs + last_len;
+
+ while (ofs != first_len) {
+
+ prev = m;
+
+ for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {
+
+ /* previous fragment found. */
+ if (fp->frags[i].ofs + fp->frags[i].len == ofs) {
+
+ RTE_ASSERT(curr_idx != i);
+
+ /* adjust start of the last fragment data. */
+ rte_pktmbuf_adj(m,
+ (uint16_t)(m->l2_len + m->l3_len));
+ rte_pktmbuf_chain(fp->frags[i].mb, m);
+
+ /* this mbuf should not be accessed directly */
+ fp->frags[curr_idx].mb = NULL;
+ curr_idx = i;
+
+ /* update our last fragment and offset. */
+ m = fp->frags[i].mb;
+ ofs = fp->frags[i].ofs;
+ }
+ }
+
+ /* error - hole in the packet. */
+ if (m == prev) {
+ return NULL;
+ }
+ }
+
+ /* chain with the first fragment. */
+ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
+ rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+ fp->frags[curr_idx].mb = NULL;
+ m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+ fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
+
+ /* update mbuf fields for reassembled packet. */
+ m->ol_flags |= PKT_TX_IP_CKSUM;
+
+ /* update ipv6 header for the reassembled datagram */
+ ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
+
+ ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
+
+ /*
+ * remove fragmentation header. note that per RFC2460, we need to update
+ * the last non-fragmentable header with the "next header" field to contain
+ * type of the first fragmentable header, but we currently don't support
+ * other headers, so we assume there are no other headers and thus update
+ * the main IPv6 header instead.
+ */
+ move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
+ frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1);
+ ip_hdr->proto = frag_hdr->next_header;
+
+ ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
+ rte_pktmbuf_mtod(m, char*), move_len);
+
+ rte_pktmbuf_adj(m, sizeof(*frag_hdr));
+
+ return m;
+}
+
+/*
+ * Process new mbuf with fragment of IPV6 datagram.
+ * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
+ * @param tbl
+ * Table where to lookup/add the fragmented packet.
+ * @param mb
+ * Incoming mbuf with IPV6 fragment.
+ * @param tms
+ * Fragment arrival timestamp.
+ * @param ip_hdr
+ * Pointer to the IPV6 header.
+ * @param frag_hdr
+ * Pointer to the IPV6 fragment extension header.
+ * @return
+ * Pointer to mbuf for reassembled packet, or NULL if:
+ * - an error occurred.
+ * - not all fragments of the packet are collected yet.
+ */
+#define MORE_FRAGS(x) (((x) & 0x100) >> 8)
+#define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3)
+struct rte_mbuf *
+rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
+ struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr)
+{
+ struct ip_frag_pkt *fp;
+ struct ip_frag_key key;
+ uint16_t ip_ofs;
+ int32_t ip_len;
+
+ rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16);
+ rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16);
+
+ key.id = frag_hdr->id;
+ key.key_len = IPV6_KEYLEN;
+
+ ip_ofs = FRAG_OFFSET(frag_hdr->frag_data) * 8;
+
+ /*
+ * as per RFC2460, payload length contains all extension headers
+ * as well.
+ * since we don't support anything but frag headers,
+ * this is what we remove from the payload len.
+ */
+ ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
+
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "mbuf: %p, tms: %" PRIu64
+ ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
+ "ofs: %u, len: %d, flags: %#x\n"
+ "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, "
+ "max_entries: %u, use_entries: %u\n\n",
+ __func__, __LINE__,
+ mb, tms, IPv6_KEY_BYTES(key.src_dst), key.id, ip_ofs, ip_len,
+ RTE_IPV6_GET_MF(frag_hdr->frag_data),
+ tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
+ tbl->use_entries);
+
+ /* check that fragment length is greater then zero. */
+ if (ip_len <= 0) {
+ IP_FRAG_MBUF2DR(dr, mb);
+ return NULL;
+ }
+
+ /* try to find/add entry into the fragment's table. */
+ fp = ip_frag_find(tbl, dr, &key, tms);
+ if (fp == NULL) {
+ IP_FRAG_MBUF2DR(dr, mb);
+ return NULL;
+ }
+
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64
+ ", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+ __func__, __LINE__,
+ tbl, tbl->max_entries, tbl->use_entries,
+ fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start,
+ fp->total_size, fp->frag_size, fp->last_idx);
+
+
+ /* process the fragmented packet. */
+ mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
+ MORE_FRAGS(frag_hdr->frag_data));
+ ip_frag_inuse(tbl, fp);
+
+ IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+ "mbuf: %p\n"
+ "tbl: %p, max_entries: %u, use_entries: %u\n"
+ "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64
+ ", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+ __func__, __LINE__, mb,
+ tbl, tbl->max_entries, tbl->use_entries,
+ fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start,
+ fp->total_size, fp->frag_size, fp->last_idx);
+
+ return mb;
+}