diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/seastar/dpdk/lib/librte_ip_frag | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/lib/librte_ip_frag')
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/Makefile | 29 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h | 165 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c | 369 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/meson.build | 11 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h | 358 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c | 144 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map | 26 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c | 185 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c | 171 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c | 182 | ||||
-rw-r--r-- | src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c | 216 |
11 files changed, 1856 insertions, 0 deletions
diff --git a/src/seastar/dpdk/lib/librte_ip_frag/Makefile b/src/seastar/dpdk/lib/librte_ip_frag/Makefile new file mode 100644 index 000000000..4c3dc4d37 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2014 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_ip_frag.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev +LDLIBS += -lrte_hash + +EXPORT_MAP := rte_ip_frag_version.map + +LIBABIVER := 1 + +#source files +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_fragmentation.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_reassembly.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_reassembly.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ip_frag_common.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h new file mode 100644 index 000000000..a17a74076 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_common.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _IP_FRAG_COMMON_H_ +#define _IP_FRAG_COMMON_H_ + +#include "rte_ip_frag.h" + +/* logging macros. */ +#ifdef RTE_LIBRTE_IP_FRAG_DEBUG +#define IP_FRAG_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args) +#else +#define IP_FRAG_LOG(lvl, fmt, args...) do {} while(0) +#endif /* IP_FRAG_DEBUG */ + +#define IPV4_KEYLEN 1 +#define IPV6_KEYLEN 4 + +/* helper macros */ +#define IP_FRAG_MBUF2DR(dr, mb) ((dr)->row[(dr)->cnt++] = (mb)) + +#define IPv6_KEY_BYTES(key) \ + (key)[0], (key)[1], (key)[2], (key)[3] +#define IPv6_KEY_BYTES_FMT \ + "%08" PRIx64 "%08" PRIx64 "%08" PRIx64 "%08" PRIx64 + +#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT +#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v)) +#else +#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0) +#endif /* IP_FRAG_TBL_STAT */ + +/* internal functions declarations */ +struct rte_mbuf * ip_frag_process(struct ip_frag_pkt *fp, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, + uint16_t ofs, uint16_t len, uint16_t more_frags); + +struct ip_frag_pkt * ip_frag_find(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + const struct ip_frag_key *key, uint64_t tms); + +struct ip_frag_pkt * ip_frag_lookup(struct rte_ip_frag_tbl *tbl, + const struct ip_frag_key *key, uint64_t tms, + struct ip_frag_pkt **free, struct ip_frag_pkt **stale); + +/* these functions need to be declared here as ip_frag_process relies on them */ +struct rte_mbuf *ipv4_frag_reassemble(struct ip_frag_pkt *fp); +struct rte_mbuf *ipv6_frag_reassemble(struct ip_frag_pkt *fp); + + + +/* + * misc frag key functions + */ + +/* check if key is empty */ +static inline int +ip_frag_key_is_empty(const struct ip_frag_key * key) +{ + return (key->key_len == 0); +} + +/* invalidate the key */ +static inline void +ip_frag_key_invalidate(struct ip_frag_key * key) +{ + key->key_len = 0; +} + +/* compare two keys */ +static inline uint64_t +ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2) +{ + uint32_t i; + uint64_t val; + val = k1->id_key_len ^ k2->id_key_len; + for (i = 0; i < k1->key_len; i++) + val |= k1->src_dst[i] ^ k2->src_dst[i]; + return val; +} + +/* + * misc fragment functions + */ + +/* put fragment on death row */ +static inline void +ip_frag_free(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr) +{ + uint32_t i, k; + + k = dr->cnt; + for (i = 0; i != fp->last_idx; i++) { + if (fp->frags[i].mb != NULL) { + dr->row[k++] = fp->frags[i].mb; + fp->frags[i].mb = NULL; + } + } + + fp->last_idx = 0; + dr->cnt = k; +} + +/* delete fragment's mbufs immediately instead of using death row */ +static inline void +ip_frag_free_immediate(struct ip_frag_pkt *fp) +{ + uint32_t i; + + for (i = 0; i < fp->last_idx; i++) { + if (fp->frags[i].mb != NULL) { + IP_FRAG_LOG(DEBUG, "%s:%d\n" + "mbuf: %p, tms: %" PRIu64", key: <%" PRIx64 ", %#x>\n", + __func__, __LINE__, fp->frags[i].mb, fp->start, + fp->key.src_dst[0], fp->key.id); + rte_pktmbuf_free(fp->frags[i].mb); + fp->frags[i].mb = NULL; + } + } + + fp->last_idx = 0; +} + +/* if key is empty, mark key as in use */ +static inline void +ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct ip_frag_pkt *fp) +{ + if (ip_frag_key_is_empty(&fp->key)) { + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + } +} + +/* reset the fragment */ +static inline void +ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms) +{ + static const struct ip_frag zero_frag = { + .ofs = 0, + .len = 0, + .mb = NULL, + }; + + fp->start = tms; + fp->total_size = UINT32_MAX; + fp->frag_size = 0; + fp->last_idx = IP_MIN_FRAG_NUM; + fp->frags[IP_LAST_FRAG_IDX] = zero_frag; + fp->frags[IP_FIRST_FRAG_IDX] = zero_frag; +} + +/* local frag table helper functions */ +static inline void +ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + struct ip_frag_pkt *fp) +{ + ip_frag_free(fp, dr); + ip_frag_key_invalidate(&fp->key); + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1); +} + +#endif /* _IP_FRAG_COMMON_H_ */ diff --git a/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c new file mode 100644 index 000000000..97470a872 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/ip_frag_internal.c @@ -0,0 +1,369 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stddef.h> + +#include <rte_jhash.h> +#include <rte_hash_crc.h> + +#include "ip_frag_common.h" + +#define PRIME_VALUE 0xeaad8405 + +#define IP_FRAG_TBL_POS(tbl, sig) \ + ((tbl)->pkt + ((sig) & (tbl)->entry_mask)) + +static inline void +ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl, struct ip_frag_pkt *fp, + const struct ip_frag_key *key, uint64_t tms) +{ + fp->key = key[0]; + ip_frag_reset(fp, tms); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + tbl->use_entries++; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1); +} + +static inline void +ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + struct ip_frag_pkt *fp, uint64_t tms) +{ + ip_frag_free(fp, dr); + ip_frag_reset(fp, tms); + TAILQ_REMOVE(&tbl->lru, fp, lru); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1); +} + + +static inline void +ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) +{ + uint32_t v; + const uint32_t *p; + + p = (const uint32_t *)&key->src_dst; + +#ifdef RTE_ARCH_X86 + v = rte_hash_crc_4byte(p[0], PRIME_VALUE); + v = rte_hash_crc_4byte(p[1], v); + v = rte_hash_crc_4byte(key->id, v); +#else + + v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE); +#endif /* RTE_ARCH_X86 */ + + *v1 = v; + *v2 = (v << 7) + (v >> 14); +} + +static inline void +ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) +{ + uint32_t v; + const uint32_t *p; + + p = (const uint32_t *) &key->src_dst; + +#ifdef RTE_ARCH_X86 + v = rte_hash_crc_4byte(p[0], PRIME_VALUE); + v = rte_hash_crc_4byte(p[1], v); + v = rte_hash_crc_4byte(p[2], v); + v = rte_hash_crc_4byte(p[3], v); + v = rte_hash_crc_4byte(p[4], v); + v = rte_hash_crc_4byte(p[5], v); + v = rte_hash_crc_4byte(p[6], v); + v = rte_hash_crc_4byte(p[7], v); + v = rte_hash_crc_4byte(key->id, v); +#else + + v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE); + v = rte_jhash_3words(p[3], p[4], p[5], v); + v = rte_jhash_3words(p[6], p[7], key->id, v); +#endif /* RTE_ARCH_X86 */ + + *v1 = v; + *v2 = (v << 7) + (v >> 14); +} + +struct rte_mbuf * +ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags) +{ + uint32_t idx; + + fp->frag_size += len; + + /* this is the first fragment. */ + if (ofs == 0) { + idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ? + IP_FIRST_FRAG_IDX : UINT32_MAX; + + /* this is the last fragment. */ + } else if (more_frags == 0) { + fp->total_size = ofs + len; + idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ? + IP_LAST_FRAG_IDX : UINT32_MAX; + + /* this is the intermediate fragment. */ + } else if ((idx = fp->last_idx) < + sizeof (fp->frags) / sizeof (fp->frags[0])) { + fp->last_idx++; + } + + /* + * erroneous packet: either exceed max allowed number of fragments, + * or duplicate first/last fragment encountered. + */ + if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) { + + /* report an error. */ + if (fp->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst[0], fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + else + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + + /* free all fragments, invalidate the entry. */ + ip_frag_free(fp, dr); + ip_frag_key_invalidate(&fp->key); + IP_FRAG_MBUF2DR(dr, mb); + + return NULL; + } + + fp->frags[idx].ofs = ofs; + fp->frags[idx].len = len; + fp->frags[idx].mb = mb; + + mb = NULL; + + /* not all fragments are collected yet. */ + if (likely (fp->frag_size < fp->total_size)) { + return mb; + + /* if we collected all fragments, then try to reassemble. */ + } else if (fp->frag_size == fp->total_size && + fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) { + if (fp->key.key_len == IPV4_KEYLEN) + mb = ipv4_frag_reassemble(fp); + else + mb = ipv6_frag_reassemble(fp); + } + + /* errorenous set of fragments. */ + if (mb == NULL) { + + /* report an error. */ + if (fp->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst[0], fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + else + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + + /* free associated resources. */ + ip_frag_free(fp, dr); + } + + /* we are done with that entry, invalidate it. */ + ip_frag_key_invalidate(&fp->key); + return mb; +} + + +/* + * Find an entry in the table for the corresponding fragment. + * If such entry is not present, then allocate a new one. + * If the entry is stale, then free and reuse it. + */ +struct ip_frag_pkt * +ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + const struct ip_frag_key *key, uint64_t tms) +{ + struct ip_frag_pkt *pkt, *free, *stale, *lru; + uint64_t max_cycles; + + /* + * Actually the two line below are totally redundant. + * they are here, just to make gcc 4.6 happy. + */ + free = NULL; + stale = NULL; + max_cycles = tbl->max_cycles; + + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1); + + if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) { + + /*timed-out entry, free and invalidate it*/ + if (stale != NULL) { + ip_frag_tbl_del(tbl, dr, stale); + free = stale; + + /* + * we found a free entry, check if we can use it. + * If we run out of free entries in the table, then + * check if we have a timed out entry to delete. + */ + } else if (free != NULL && + tbl->max_entries <= tbl->use_entries) { + lru = TAILQ_FIRST(&tbl->lru); + if (max_cycles + lru->start < tms) { + ip_frag_tbl_del(tbl, dr, lru); + } else { + free = NULL; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, + fail_nospace, 1); + } + } + + /* found a free entry to reuse. */ + if (free != NULL) { + ip_frag_tbl_add(tbl, free, key, tms); + pkt = free; + } + + /* + * we found the flow, but it is already timed out, + * so free associated resources, reposition it in the LRU list, + * and reuse it. + */ + } else if (max_cycles + pkt->start < tms) { + ip_frag_tbl_reuse(tbl, dr, pkt, tms); + } + + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL)); + + tbl->last = pkt; + return pkt; +} + +struct ip_frag_pkt * +ip_frag_lookup(struct rte_ip_frag_tbl *tbl, + const struct ip_frag_key *key, uint64_t tms, + struct ip_frag_pkt **free, struct ip_frag_pkt **stale) +{ + struct ip_frag_pkt *p1, *p2; + struct ip_frag_pkt *empty, *old; + uint64_t max_cycles; + uint32_t i, assoc, sig1, sig2; + + empty = NULL; + old = NULL; + + max_cycles = tbl->max_cycles; + assoc = tbl->bucket_entries; + + if (tbl->last != NULL && ip_frag_key_cmp(key, &tbl->last->key) == 0) + return tbl->last; + + /* different hashing methods for IPv4 and IPv6 */ + if (key->key_len == IPV4_KEYLEN) + ipv4_frag_hash(key, &sig1, &sig2); + else + ipv6_frag_hash(key, &sig1, &sig2); + + p1 = IP_FRAG_TBL_POS(tbl, sig1); + p2 = IP_FRAG_TBL_POS(tbl, sig2); + + for (i = 0; i != assoc; i++) { + if (p1->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt line0: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p1, i, assoc, + p1[i].key.src_dst[0], p1[i].key.id, p1[i].start); + else + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line0: %p, index: %u from %u\n" + "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p1, i, assoc, + IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start); + + if (ip_frag_key_cmp(key, &p1[i].key) == 0) + return p1 + i; + else if (ip_frag_key_is_empty(&p1[i].key)) + empty = (empty == NULL) ? (p1 + i) : empty; + else if (max_cycles + p1[i].start < tms) + old = (old == NULL) ? (p1 + i) : old; + + if (p2->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt line1: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p2, i, assoc, + p2[i].key.src_dst[0], p2[i].key.id, p2[i].start); + else + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line1: %p, index: %u from %u\n" + "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p2, i, assoc, + IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start); + + if (ip_frag_key_cmp(key, &p2[i].key) == 0) + return p2 + i; + else if (ip_frag_key_is_empty(&p2[i].key)) + empty = (empty == NULL) ?( p2 + i) : empty; + else if (max_cycles + p2[i].start < tms) + old = (old == NULL) ? (p2 + i) : old; + } + + *free = empty; + *stale = old; + return NULL; +} diff --git a/src/seastar/dpdk/lib/librte_ip_frag/meson.build b/src/seastar/dpdk/lib/librte_ip_frag/meson.build new file mode 100644 index 000000000..c5b9a4596 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +sources = files('rte_ipv4_fragmentation.c', + 'rte_ipv6_fragmentation.c', + 'rte_ipv4_reassembly.c', + 'rte_ipv6_reassembly.c', + 'rte_ip_frag_common.c', + 'ip_frag_internal.c') +headers = files('rte_ip_frag.h') +deps += ['ethdev', 'hash'] diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h new file mode 100644 index 000000000..bc4c100f3 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag.h @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _RTE_IP_FRAG_H_ +#define _RTE_IP_FRAG_H_ + +/** + * @file + * RTE IP Fragmentation and Reassembly + * + * Implementation of IP packet fragmentation and reassembly. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <stdio.h> + +#include <rte_config.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_ip.h> +#include <rte_byteorder.h> + +struct rte_mbuf; + +enum { + IP_LAST_FRAG_IDX, /**< index of last fragment */ + IP_FIRST_FRAG_IDX, /**< index of first fragment */ + IP_MIN_FRAG_NUM, /**< minimum number of fragments */ + IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG, + /**< maximum number of fragments per packet */ +}; + +/** @internal fragmented mbuf */ +struct ip_frag { + uint16_t ofs; /**< offset into the packet */ + uint16_t len; /**< length of fragment */ + struct rte_mbuf *mb; /**< fragment mbuf */ +}; + +/** @internal <src addr, dst_addr, id> to uniquely identify fragmented datagram. */ +struct ip_frag_key { + uint64_t src_dst[4]; + /**< src and dst address, only first 8 bytes used for IPv4 */ + RTE_STD_C11 + union { + uint64_t id_key_len; /**< combined for easy fetch */ + __extension__ + struct { + uint32_t id; /**< packet id */ + uint32_t key_len; /**< src/dst key length */ + }; + }; +}; + +/** + * @internal Fragmented packet to reassemble. + * First two entries in the frags[] array are for the last and first fragments. + */ +struct ip_frag_pkt { + TAILQ_ENTRY(ip_frag_pkt) lru; /**< LRU list */ + struct ip_frag_key key; /**< fragmentation key */ + uint64_t start; /**< creation timestamp */ + uint32_t total_size; /**< expected reassembled size */ + uint32_t frag_size; /**< size of fragments received */ + uint32_t last_idx; /**< index of next entry to fill */ + struct ip_frag frags[IP_MAX_FRAG_NUM]; /**< fragments */ +} __rte_cache_aligned; + +#define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */ + +/* death row size in mbufs */ +#define IP_FRAG_DEATH_ROW_MBUF_LEN (IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)) + +/** mbuf death row (packets to be freed) */ +struct rte_ip_frag_death_row { + uint32_t cnt; /**< number of mbufs currently on death row */ + struct rte_mbuf *row[IP_FRAG_DEATH_ROW_MBUF_LEN]; + /**< mbufs to be freed */ +}; + +TAILQ_HEAD(ip_pkt_list, ip_frag_pkt); /**< @internal fragments tailq */ + +/** fragmentation table statistics */ +struct ip_frag_tbl_stat { + uint64_t find_num; /**< total # of find/insert attempts. */ + uint64_t add_num; /**< # of add ops. */ + uint64_t del_num; /**< # of del ops. */ + uint64_t reuse_num; /**< # of reuse (del/add) ops. */ + uint64_t fail_total; /**< total # of add failures. */ + uint64_t fail_nospace; /**< # of 'no space' add failures. */ +} __rte_cache_aligned; + +/** fragmentation table */ +struct rte_ip_frag_tbl { + uint64_t max_cycles; /**< ttl for table entries. */ + uint32_t entry_mask; /**< hash value mask. */ + uint32_t max_entries; /**< max entries allowed. */ + uint32_t use_entries; /**< entries in use. */ + uint32_t bucket_entries; /**< hash associativity. */ + uint32_t nb_entries; /**< total size of the table. */ + uint32_t nb_buckets; /**< num of associativity lines. */ + struct ip_frag_pkt *last; /**< last used entry. */ + struct ip_pkt_list lru; /**< LRU list for table entries. */ + struct ip_frag_tbl_stat stat; /**< statistics counters. */ + __extension__ struct ip_frag_pkt pkt[0]; /**< hash table. */ +}; + +/** IPv6 fragment extension header */ +#define RTE_IPV6_EHDR_MF_SHIFT 0 +#define RTE_IPV6_EHDR_MF_MASK 1 +#define RTE_IPV6_EHDR_FO_SHIFT 3 +#define RTE_IPV6_EHDR_FO_MASK (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1)) +#define RTE_IPV6_EHDR_FO_ALIGN (1 << RTE_IPV6_EHDR_FO_SHIFT) + +#define RTE_IPV6_FRAG_USED_MASK \ + (RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK) + +#define RTE_IPV6_GET_MF(x) ((x) & RTE_IPV6_EHDR_MF_MASK) +#define RTE_IPV6_GET_FO(x) ((x) >> RTE_IPV6_EHDR_FO_SHIFT) + +#define RTE_IPV6_SET_FRAG_DATA(fo, mf) \ + (((fo) & RTE_IPV6_EHDR_FO_MASK) | ((mf) & RTE_IPV6_EHDR_MF_MASK)) + +struct ipv6_extension_fragment { + uint8_t next_header; /**< Next header type */ + uint8_t reserved; /**< Reserved */ + uint16_t frag_data; /**< All fragmentation data */ + uint32_t id; /**< Packet ID */ +} __attribute__((__packed__)); + + + +/** + * Create a new IP fragmentation table. + * + * @param bucket_num + * Number of buckets in the hash table. + * @param bucket_entries + * Number of entries per bucket (e.g. hash associativity). + * Should be power of two. + * @param max_entries + * Maximum number of entries that could be stored in the table. + * The value should be less or equal then bucket_num * bucket_entries. + * @param max_cycles + * Maximum TTL in cycles for each fragmented packet. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. + * @return + * The pointer to the new allocated fragmentation table, on success. NULL on error. + */ +struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num, + uint32_t bucket_entries, uint32_t max_entries, + uint64_t max_cycles, int socket_id); + +/** + * Free allocated IP fragmentation table. + * + * @param tbl + * Fragmentation table to free. + */ +void +rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl); + +/** + * This function implements the fragmentation of IPv6 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param nb_pkts_out + * Number of fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6 + * datagrams. This value includes the size of the IPv6 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * errno. + */ +int32_t +rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect); + +/** + * This function implements reassembly of fragmented IPv6 packets. + * Incoming mbuf should have its l2_len/l3_len fields setup correctly. + * + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param dr + * Death row to free buffers to + * @param mb + * Incoming mbuf with IPv6 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPv6 header. + * @param frag_hdr + * Pointer to the IPv6 fragment extension header. + * @return + * Pointer to mbuf for reassembled packet, or NULL if: + * - an error occurred. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr, + struct ipv6_extension_fragment *frag_hdr); + +/** + * Return a pointer to the packet's fragment header, if found. + * It only looks at the extension header that's right after the fixed IPv6 + * header, and doesn't follow the whole chain of extension headers. + * + * @param hdr + * Pointer to the IPv6 header. + * @return + * Pointer to the IPv6 fragment extension header, or NULL if it's not + * present. + */ +static inline struct ipv6_extension_fragment * +rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr) +{ + if (hdr->proto == IPPROTO_FRAGMENT) { + return (struct ipv6_extension_fragment *) ++hdr; + } + else + return NULL; +} + +/** + * IPv4 fragmentation. + * + * This function implements the fragmentation of IPv4 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param nb_pkts_out + * Number of fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 + * datagrams. This value includes the size of the IPv4 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * errno. + */ +int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect); + +/** + * This function implements reassembly of fragmented IPv4 packets. + * Incoming mbufs should have its l2_len/l3_len fields setup correctly. + * + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param dr + * Death row to free buffers to + * @param mb + * Incoming mbuf with IPv4 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV4 header inside the fragment. + * @return + * Pointer to mbuf for reassembled packet, or NULL if: + * - an error occurred. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr); + +/** + * Check if the IPv4 packet is fragmented + * + * @param hdr + * IPv4 header of the packet + * @return + * 1 if fragmented, 0 if not fragmented + */ +static inline int +rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) { + uint16_t flag_offset, ip_flag, ip_ofs; + + flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); + ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); + ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); + + return ip_flag != 0 || ip_ofs != 0; +} + +/** + * Free mbufs on a given death row. + * + * @param dr + * Death row to free mbufs in. + * @param prefetch + * How many buffers to prefetch before freeing. + */ +void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, + uint32_t prefetch); + + +/** + * Dump fragmentation table statistics to file. + * + * @param f + * File to dump statistics to + * @param tbl + * Fragmentation table to dump statistics from + */ +void +rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl); + +/** + * Delete expired fragments + * + * @param tbl + * Table to delete expired fragments from + * @param dr + * Death row to free buffers to + * @param tms + * Current timestamp + */ +void __rte_experimental +rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, uint64_t tms); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IP_FRAG_H_ */ diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c new file mode 100644 index 000000000..a23f6f24f --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_common.c @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stddef.h> +#include <stdio.h> + +#include <rte_memory.h> +#include <rte_log.h> + +#include "ip_frag_common.h" + +#define IP_FRAG_HASH_FNUM 2 + +/* free mbufs from death row */ +void +rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, + uint32_t prefetch) +{ + uint32_t i, k, n; + + k = RTE_MIN(prefetch, dr->cnt); + n = dr->cnt; + + for (i = 0; i != k; i++) + rte_prefetch0(dr->row[i]); + + for (i = 0; i != n - k; i++) { + rte_prefetch0(dr->row[i + k]); + rte_pktmbuf_free(dr->row[i]); + } + + for (; i != n; i++) + rte_pktmbuf_free(dr->row[i]); + + dr->cnt = 0; +} + +/* create fragmentation table */ +struct rte_ip_frag_tbl * +rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries, + uint32_t max_entries, uint64_t max_cycles, int socket_id) +{ + struct rte_ip_frag_tbl *tbl; + size_t sz; + uint64_t nb_entries; + + nb_entries = rte_align32pow2(bucket_num); + nb_entries *= bucket_entries; + nb_entries *= IP_FRAG_HASH_FNUM; + + /* check input parameters. */ + if (rte_is_power_of_2(bucket_entries) == 0 || + nb_entries > UINT32_MAX || nb_entries == 0 || + nb_entries < max_entries) { + RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__); + return NULL; + } + + sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]); + if ((tbl = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE, + socket_id)) == NULL) { + RTE_LOG(ERR, USER1, + "%s: allocation of %zu bytes at socket %d failed do\n", + __func__, sz, socket_id); + return NULL; + } + + RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n", + __func__, sz, socket_id); + + tbl->max_cycles = max_cycles; + tbl->max_entries = max_entries; + tbl->nb_entries = (uint32_t)nb_entries; + tbl->nb_buckets = bucket_num; + tbl->bucket_entries = bucket_entries; + tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1); + + TAILQ_INIT(&(tbl->lru)); + return tbl; +} + +/* delete fragmentation table */ +void +rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl) +{ + struct ip_frag_pkt *fp; + + TAILQ_FOREACH(fp, &tbl->lru, lru) { + ip_frag_free_immediate(fp); + } + + rte_free(tbl); +} + +/* dump frag table statistics to file */ +void +rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl) +{ + uint64_t fail_total, fail_nospace; + + fail_total = tbl->stat.fail_total; + fail_nospace = tbl->stat.fail_nospace; + + fprintf(f, "max entries:\t%u;\n" + "entries in use:\t%u;\n" + "finds/inserts:\t%" PRIu64 ";\n" + "entries added:\t%" PRIu64 ";\n" + "entries deleted by timeout:\t%" PRIu64 ";\n" + "entries reused by timeout:\t%" PRIu64 ";\n" + "total add failures:\t%" PRIu64 ";\n" + "add no-space failures:\t%" PRIu64 ";\n" + "add hash-collisions failures:\t%" PRIu64 ";\n", + tbl->max_entries, + tbl->use_entries, + tbl->stat.find_num, + tbl->stat.add_num, + tbl->stat.del_num, + tbl->stat.reuse_num, + fail_total, + fail_nospace, + fail_total - fail_nospace); +} + +/* Delete expired fragments */ +void __rte_experimental +rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, uint64_t tms) +{ + uint64_t max_cycles; + struct ip_frag_pkt *fp; + + max_cycles = tbl->max_cycles; + + TAILQ_FOREACH(fp, &tbl->lru, lru) + if (max_cycles + fp->start < tms) { + /* check that death row has enough space */ + if (IP_FRAG_DEATH_ROW_MBUF_LEN - dr->cnt >= fp->last_idx) + ip_frag_tbl_del(tbl, dr, fp); + else + return; + } else + return; +} diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map new file mode 100644 index 000000000..a193007c6 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ip_frag_version.map @@ -0,0 +1,26 @@ +DPDK_2.0 { + global: + + rte_ip_frag_free_death_row; + rte_ip_frag_table_create; + rte_ip_frag_table_statistics_dump; + rte_ipv4_frag_reassemble_packet; + rte_ipv4_fragment_packet; + rte_ipv6_frag_reassemble_packet; + rte_ipv6_fragment_packet; + + local: *; +}; + +DPDK_17.08 { + global: + + rte_ip_frag_table_destroy; + +} DPDK_2.0; + +EXPERIMENTAL { + global: + + rte_frag_table_del_expired_entries; +}; diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c new file mode 100644 index 000000000..a96fb03e4 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stddef.h> +#include <errno.h> + +#include <rte_memcpy.h> +#include <rte_mempool.h> +#include <rte_debug.h> + +#include "ip_frag_common.h" + +/* Fragment Offset */ +#define IPV4_HDR_DF_SHIFT 14 +#define IPV4_HDR_MF_SHIFT 13 +#define IPV4_HDR_FO_SHIFT 3 + +#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT) +#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT) + +#define IPV4_HDR_FO_ALIGN (1 << IPV4_HDR_FO_SHIFT) + +static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, + const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, + uint16_t dofs, uint32_t mf) +{ + rte_memcpy(dst, src, sizeof(*dst)); + fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT)); + fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT); + dst->fragment_offset = rte_cpu_to_be_16(fofs); + dst->total_length = rte_cpu_to_be_16(len); + dst->hdr_checksum = 0; +} + +static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) +{ + uint32_t i; + for (i = 0; i != num; i++) + rte_pktmbuf_free(mb[i]); +} + +/** + * IPv4 fragmentation. + * + * This function implements the fragmentation of IPv4 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 + * datagrams. This value includes the size of the IPv4 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * <errno>. + */ +int32_t +rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect) +{ + struct rte_mbuf *in_seg = NULL; + struct ipv4_hdr *in_hdr; + uint32_t out_pkt_pos, in_seg_data_pos; + uint32_t more_in_segs; + uint16_t fragment_offset, flag_offset, frag_size; + uint16_t frag_bytes_remaining; + + /* + * Ensure the IP payload length of all fragments is aligned to a + * multiple of 8 bytes as per RFC791 section 2.3. + */ + frag_size = RTE_ALIGN_FLOOR((mtu_size - sizeof(struct ipv4_hdr)), + IPV4_HDR_FO_ALIGN); + + in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *); + flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); + + /* If Don't Fragment flag is set */ + if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0)) + return -ENOTSUP; + + /* Check that pkts_out is big enough to hold all fragments */ + if (unlikely(frag_size * nb_pkts_out < + (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv4_hdr)))) + return -EINVAL; + + in_seg = pkt_in; + in_seg_data_pos = sizeof(struct ipv4_hdr); + out_pkt_pos = 0; + fragment_offset = 0; + + more_in_segs = 1; + while (likely(more_in_segs)) { + struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; + uint32_t more_out_segs; + struct ipv4_hdr *out_hdr; + + /* Allocate direct buffer */ + out_pkt = rte_pktmbuf_alloc(pool_direct); + if (unlikely(out_pkt == NULL)) { + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + + /* Reserve space for the IP header that will be built later */ + out_pkt->data_len = sizeof(struct ipv4_hdr); + out_pkt->pkt_len = sizeof(struct ipv4_hdr); + frag_bytes_remaining = frag_size; + + out_seg_prev = out_pkt; + more_out_segs = 1; + while (likely(more_out_segs && more_in_segs)) { + struct rte_mbuf *out_seg = NULL; + uint32_t len; + + /* Allocate indirect buffer */ + out_seg = rte_pktmbuf_alloc(pool_indirect); + if (unlikely(out_seg == NULL)) { + rte_pktmbuf_free(out_pkt); + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + out_seg_prev->next = out_seg; + out_seg_prev = out_seg; + + /* Prepare indirect buffer */ + rte_pktmbuf_attach(out_seg, in_seg); + len = frag_bytes_remaining; + if (len > (in_seg->data_len - in_seg_data_pos)) { + len = in_seg->data_len - in_seg_data_pos; + } + out_seg->data_off = in_seg->data_off + in_seg_data_pos; + out_seg->data_len = (uint16_t)len; + out_pkt->pkt_len = (uint16_t)(len + + out_pkt->pkt_len); + out_pkt->nb_segs += 1; + in_seg_data_pos += len; + frag_bytes_remaining -= len; + + /* Current output packet (i.e. fragment) done ? */ + if (unlikely(frag_bytes_remaining == 0)) + more_out_segs = 0; + + /* Current input segment done ? */ + if (unlikely(in_seg_data_pos == in_seg->data_len)) { + in_seg = in_seg->next; + in_seg_data_pos = 0; + + if (unlikely(in_seg == NULL)) + more_in_segs = 0; + } + } + + /* Build the IP header */ + + out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv4_hdr *); + + __fill_ipv4hdr_frag(out_hdr, in_hdr, + (uint16_t)out_pkt->pkt_len, + flag_offset, fragment_offset, more_in_segs); + + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt_len - sizeof(struct ipv4_hdr)); + + out_pkt->ol_flags |= PKT_TX_IP_CKSUM; + out_pkt->l3_len = sizeof(struct ipv4_hdr); + + /* Write the fragment to the output list */ + pkts_out[out_pkt_pos] = out_pkt; + out_pkt_pos ++; + } + + return out_pkt_pos; +} diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c new file mode 100644 index 000000000..1029b7abc --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stddef.h> + +#include <rte_debug.h> + +#include "ip_frag_common.h" + +/* + * Reassemble fragments into one packet. + */ +struct rte_mbuf * +ipv4_frag_reassemble(struct ip_frag_pkt *fp) +{ + struct ipv4_hdr *ip_hdr; + struct rte_mbuf *m, *prev; + uint32_t i, n, ofs, first_len; + uint32_t curr_idx = 0; + + first_len = fp->frags[IP_FIRST_FRAG_IDX].len; + n = fp->last_idx - 1; + + /*start from the last fragment. */ + m = fp->frags[IP_LAST_FRAG_IDX].mb; + ofs = fp->frags[IP_LAST_FRAG_IDX].ofs; + curr_idx = IP_LAST_FRAG_IDX; + + while (ofs != first_len) { + + prev = m; + + for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) { + + /* previous fragment found. */ + if(fp->frags[i].ofs + fp->frags[i].len == ofs) { + + RTE_ASSERT(curr_idx != i); + + /* adjust start of the last fragment data. */ + rte_pktmbuf_adj(m, + (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[i].mb, m); + + /* this mbuf should not be accessed directly */ + fp->frags[curr_idx].mb = NULL; + curr_idx = i; + + /* update our last fragment and offset. */ + m = fp->frags[i].mb; + ofs = fp->frags[i].ofs; + } + } + + /* error - hole in the packet. */ + if (m == prev) { + return NULL; + } + } + + /* chain with the first fragment. */ + rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; + m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; + + /* update mbuf fields for reassembled packet. */ + m->ol_flags |= PKT_TX_IP_CKSUM; + + /* update ipv4 header for the reassembled packet */ + ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + + ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size + + m->l3_len)); + ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset & + rte_cpu_to_be_16(IPV4_HDR_DF_FLAG)); + ip_hdr->hdr_checksum = 0; + + return m; +} + +/* + * Process new mbuf with fragment of IPV4 packet. + * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param mb + * Incoming mbuf with IPV4 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV4 header inside the fragment. + * @return + * Pointer to mbuf for reassembled packet, or NULL if: + * - an error occurred. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf * +rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv4_hdr *ip_hdr) +{ + struct ip_frag_pkt *fp; + struct ip_frag_key key; + const unaligned_uint64_t *psd; + uint16_t flag_offset, ip_ofs, ip_flag; + int32_t ip_len; + + flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset); + ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); + ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); + + psd = (unaligned_uint64_t *)&ip_hdr->src_addr; + /* use first 8 bytes only */ + key.src_dst[0] = psd[0]; + key.id = ip_hdr->packet_id; + key.key_len = IPV4_KEYLEN; + + ip_ofs *= IPV4_HDR_OFFSET_UNITS; + ip_len = rte_be_to_cpu_16(ip_hdr->total_length) - mb->l3_len; + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p, tms: %" PRIu64 + ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %d, flags: %#x\n" + "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " + "max_entries: %u, use_entries: %u\n\n", + __func__, __LINE__, + mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, ip_flag, + tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, + tbl->use_entries); + + /* check that fragment length is greater then zero. */ + if (ip_len <= 0) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + + /* try to find/add entry into the fragment's table. */ + if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst[0], fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + + /* process the fragmented packet. */ + mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); + ip_frag_inuse(tbl, fp); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, mb, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst[0], fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + return mb; +} diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c new file mode 100644 index 000000000..b9437eb11 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stddef.h> +#include <errno.h> + +#include <rte_memcpy.h> + +#include "ip_frag_common.h" + +/** + * @file + * RTE IPv6 Fragmentation + * + * Implementation of IPv6 fragmentation. + * + */ + +static inline void +__fill_ipv6hdr_frag(struct ipv6_hdr *dst, + const struct ipv6_hdr *src, uint16_t len, uint16_t fofs, + uint32_t mf) +{ + struct ipv6_extension_fragment *fh; + + rte_memcpy(dst, src, sizeof(*dst)); + dst->payload_len = rte_cpu_to_be_16(len); + dst->proto = IPPROTO_FRAGMENT; + + fh = (struct ipv6_extension_fragment *) ++dst; + fh->next_header = src->proto; + fh->reserved = 0; + fh->frag_data = rte_cpu_to_be_16(RTE_IPV6_SET_FRAG_DATA(fofs, mf)); + fh->id = 0; +} + +static inline void +__free_fragments(struct rte_mbuf *mb[], uint32_t num) +{ + uint32_t i; + for (i = 0; i < num; i++) + rte_pktmbuf_free(mb[i]); +} + +/** + * IPv6 fragmentation. + * + * This function implements the fragmentation of IPv6 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6 + * datagrams. This value includes the size of the IPv6 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * <errno>. + */ +int32_t +rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect) +{ + struct rte_mbuf *in_seg = NULL; + struct ipv6_hdr *in_hdr; + uint32_t out_pkt_pos, in_seg_data_pos; + uint32_t more_in_segs; + uint16_t fragment_offset, frag_size; + uint64_t frag_bytes_remaining; + + /* + * Ensure the IP payload length of all fragments (except the + * the last fragment) are a multiple of 8 bytes per RFC2460. + */ + frag_size = RTE_ALIGN_FLOOR(mtu_size - sizeof(struct ipv6_hdr), + RTE_IPV6_EHDR_FO_ALIGN); + + /* Check that pkts_out is big enough to hold all fragments */ + if (unlikely (frag_size * nb_pkts_out < + (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv6_hdr)))) + return -EINVAL; + + in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv6_hdr *); + + in_seg = pkt_in; + in_seg_data_pos = sizeof(struct ipv6_hdr); + out_pkt_pos = 0; + fragment_offset = 0; + + more_in_segs = 1; + while (likely(more_in_segs)) { + struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; + uint32_t more_out_segs; + struct ipv6_hdr *out_hdr; + + /* Allocate direct buffer */ + out_pkt = rte_pktmbuf_alloc(pool_direct); + if (unlikely(out_pkt == NULL)) { + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + + /* Reserve space for the IP header that will be built later */ + out_pkt->data_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment); + out_pkt->pkt_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment); + frag_bytes_remaining = frag_size; + + out_seg_prev = out_pkt; + more_out_segs = 1; + while (likely(more_out_segs && more_in_segs)) { + struct rte_mbuf *out_seg = NULL; + uint32_t len; + + /* Allocate indirect buffer */ + out_seg = rte_pktmbuf_alloc(pool_indirect); + if (unlikely(out_seg == NULL)) { + rte_pktmbuf_free(out_pkt); + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + out_seg_prev->next = out_seg; + out_seg_prev = out_seg; + + /* Prepare indirect buffer */ + rte_pktmbuf_attach(out_seg, in_seg); + len = frag_bytes_remaining; + if (len > (in_seg->data_len - in_seg_data_pos)) { + len = in_seg->data_len - in_seg_data_pos; + } + out_seg->data_off = in_seg->data_off + in_seg_data_pos; + out_seg->data_len = (uint16_t)len; + out_pkt->pkt_len = (uint16_t)(len + + out_pkt->pkt_len); + out_pkt->nb_segs += 1; + in_seg_data_pos += len; + frag_bytes_remaining -= len; + + /* Current output packet (i.e. fragment) done ? */ + if (unlikely(frag_bytes_remaining == 0)) + more_out_segs = 0; + + /* Current input segment done ? */ + if (unlikely(in_seg_data_pos == in_seg->data_len)) { + in_seg = in_seg->next; + in_seg_data_pos = 0; + + if (unlikely(in_seg == NULL)) { + more_in_segs = 0; + } + } + } + + /* Build the IP header */ + + out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv6_hdr *); + + __fill_ipv6hdr_frag(out_hdr, in_hdr, + (uint16_t) out_pkt->pkt_len - sizeof(struct ipv6_hdr), + fragment_offset, more_in_segs); + + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt_len - sizeof(struct ipv6_hdr) + - sizeof(struct ipv6_extension_fragment)); + + /* Write the fragment to the output list */ + pkts_out[out_pkt_pos] = out_pkt; + out_pkt_pos ++; + } + + return out_pkt_pos; +} diff --git a/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c new file mode 100644 index 000000000..855e3f740 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stddef.h> + +#include <rte_memcpy.h> + +#include "ip_frag_common.h" + +/** + * @file + * IPv6 reassemble + * + * Implementation of IPv6 reassembly. + * + */ + +static inline void +ip_frag_memmove(char *dst, char *src, int len) +{ + int i; + + /* go backwards to make sure we don't overwrite anything important */ + for (i = len - 1; i >= 0; i--) + dst[i] = src[i]; +} + +/* + * Reassemble fragments into one packet. + */ +struct rte_mbuf * +ipv6_frag_reassemble(struct ip_frag_pkt *fp) +{ + struct ipv6_hdr *ip_hdr; + struct ipv6_extension_fragment *frag_hdr; + struct rte_mbuf *m, *prev; + uint32_t i, n, ofs, first_len; + uint32_t last_len, move_len, payload_len; + uint32_t curr_idx = 0; + + first_len = fp->frags[IP_FIRST_FRAG_IDX].len; + n = fp->last_idx - 1; + + /*start from the last fragment. */ + m = fp->frags[IP_LAST_FRAG_IDX].mb; + ofs = fp->frags[IP_LAST_FRAG_IDX].ofs; + last_len = fp->frags[IP_LAST_FRAG_IDX].len; + curr_idx = IP_LAST_FRAG_IDX; + + payload_len = ofs + last_len; + + while (ofs != first_len) { + + prev = m; + + for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) { + + /* previous fragment found. */ + if (fp->frags[i].ofs + fp->frags[i].len == ofs) { + + RTE_ASSERT(curr_idx != i); + + /* adjust start of the last fragment data. */ + rte_pktmbuf_adj(m, + (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[i].mb, m); + + /* this mbuf should not be accessed directly */ + fp->frags[curr_idx].mb = NULL; + curr_idx = i; + + /* update our last fragment and offset. */ + m = fp->frags[i].mb; + ofs = fp->frags[i].ofs; + } + } + + /* error - hole in the packet. */ + if (m == prev) { + return NULL; + } + } + + /* chain with the first fragment. */ + rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; + m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; + + /* update mbuf fields for reassembled packet. */ + m->ol_flags |= PKT_TX_IP_CKSUM; + + /* update ipv6 header for the reassembled datagram */ + ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); + + ip_hdr->payload_len = rte_cpu_to_be_16(payload_len); + + /* + * remove fragmentation header. note that per RFC2460, we need to update + * the last non-fragmentable header with the "next header" field to contain + * type of the first fragmentable header, but we currently don't support + * other headers, so we assume there are no other headers and thus update + * the main IPv6 header instead. + */ + move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr); + frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1); + ip_hdr->proto = frag_hdr->next_header; + + ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)), + rte_pktmbuf_mtod(m, char*), move_len); + + rte_pktmbuf_adj(m, sizeof(*frag_hdr)); + + return m; +} + +/* + * Process new mbuf with fragment of IPV6 datagram. + * Incoming mbuf should have its l2_len/l3_len fields setup correctly. + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param mb + * Incoming mbuf with IPV6 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV6 header. + * @param frag_hdr + * Pointer to the IPV6 fragment extension header. + * @return + * Pointer to mbuf for reassembled packet, or NULL if: + * - an error occurred. + * - not all fragments of the packet are collected yet. + */ +#define MORE_FRAGS(x) (((x) & 0x100) >> 8) +#define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3) +struct rte_mbuf * +rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr) +{ + struct ip_frag_pkt *fp; + struct ip_frag_key key; + uint16_t ip_ofs; + int32_t ip_len; + + rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16); + rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16); + + key.id = frag_hdr->id; + key.key_len = IPV6_KEYLEN; + + ip_ofs = FRAG_OFFSET(frag_hdr->frag_data) * 8; + + /* + * as per RFC2460, payload length contains all extension headers + * as well. + * since we don't support anything but frag headers, + * this is what we remove from the payload len. + */ + ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p, tms: %" PRIu64 + ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "ofs: %u, len: %d, flags: %#x\n" + "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " + "max_entries: %u, use_entries: %u\n\n", + __func__, __LINE__, + mb, tms, IPv6_KEY_BYTES(key.src_dst), key.id, ip_ofs, ip_len, + RTE_IPV6_GET_MF(frag_hdr->frag_data), + tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, + tbl->use_entries); + + /* check that fragment length is greater then zero. */ + if (ip_len <= 0) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + + /* try to find/add entry into the fragment's table. */ + fp = ip_frag_find(tbl, dr, &key, tms); + if (fp == NULL) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + + /* process the fragmented packet. */ + mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, + MORE_FRAGS(frag_hdr->frag_data)); + ip_frag_inuse(tbl, fp); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, mb, + tbl, tbl->max_entries, tbl->use_entries, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + return mb; +} |