diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/dpdk/lib/librte_member | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/dpdk/lib/librte_member')
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/Makefile | 22 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/meson.build | 8 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member.c | 307 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member.h | 490 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member_ht.c | 557 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member_ht.h | 65 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member_vbf.c | 321 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member_vbf.h | 53 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member_version.map | 16 | ||||
-rw-r--r-- | src/spdk/dpdk/lib/librte_member/rte_member_x86.h | 78 |
10 files changed, 1917 insertions, 0 deletions
diff --git a/src/spdk/dpdk/lib/librte_member/Makefile b/src/spdk/dpdk/lib/librte_member/Makefile new file mode 100644 index 000000000..ef9e2faea --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_member.a + +CFLAGS := -I$(SRCDIR) $(CFLAGS) +CFLAGS += $(WERROR_FLAGS) -O3 + +LDLIBS += -lm +LDLIBS += -lrte_eal -lrte_hash + +EXPORT_MAP := rte_member_version.map + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_MEMBER) += rte_member.c rte_member_ht.c rte_member_vbf.c +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_MEMBER)-include := rte_member.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/lib/librte_member/meson.build b/src/spdk/dpdk/lib/librte_member/meson.build new file mode 100644 index 000000000..058584b19 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/meson.build @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +sources = files('rte_member.c', 'rte_member_ht.c', 'rte_member_vbf.c') +headers = files('rte_member.h') +deps += ['hash'] +build = false +reason = 'not needed by SPDK' diff --git a/src/spdk/dpdk/lib/librte_member/rte_member.c b/src/spdk/dpdk/lib/librte_member/rte_member.c new file mode 100644 index 000000000..e0e7f127e --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member.c @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <string.h> + +#include <rte_string_fns.h> +#include <rte_eal.h> +#include <rte_eal_memconfig.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_errno.h> +#include <rte_tailq.h> + +#include "rte_member.h" +#include "rte_member_ht.h" +#include "rte_member_vbf.h" + +int librte_member_logtype; + +TAILQ_HEAD(rte_member_list, rte_tailq_entry); +static struct rte_tailq_elem rte_member_tailq = { + .name = "RTE_MEMBER", +}; +EAL_REGISTER_TAILQ(rte_member_tailq) + +struct rte_member_setsum * +rte_member_find_existing(const char *name) +{ + struct rte_member_setsum *setsum = NULL; + struct rte_tailq_entry *te; + struct rte_member_list *member_list; + + member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list); + + rte_mcfg_tailq_read_lock(); + TAILQ_FOREACH(te, member_list, next) { + setsum = (struct rte_member_setsum *) te->data; + if (strncmp(name, setsum->name, RTE_MEMBER_NAMESIZE) == 0) + break; + } + rte_mcfg_tailq_read_unlock(); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + return setsum; +} + +void +rte_member_free(struct rte_member_setsum *setsum) +{ + struct rte_member_list *member_list; + struct rte_tailq_entry *te; + + if (setsum == NULL) + return; + member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list); + rte_mcfg_tailq_write_lock(); + TAILQ_FOREACH(te, member_list, next) { + if (te->data == (void *)setsum) + break; + } + if (te == NULL) { + rte_mcfg_tailq_write_unlock(); + return; + } + TAILQ_REMOVE(member_list, te, next); + rte_mcfg_tailq_write_unlock(); + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + rte_member_free_ht(setsum); + break; + case RTE_MEMBER_TYPE_VBF: + rte_member_free_vbf(setsum); + break; + default: + break; + } + rte_free(setsum); + rte_free(te); +} + +struct rte_member_setsum * +rte_member_create(const struct rte_member_parameters *params) +{ + struct rte_tailq_entry *te; + struct rte_member_list *member_list; + struct rte_member_setsum *setsum; + int ret; + + if (params == NULL) { + rte_errno = EINVAL; + return NULL; + } + + if (params->key_len == 0 || + params->prim_hash_seed == params->sec_hash_seed) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, "Create setsummary with " + "invalid parameters\n"); + return NULL; + } + + member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list); + + rte_mcfg_tailq_write_lock(); + + TAILQ_FOREACH(te, member_list, next) { + setsum = te->data; + if (strncmp(params->name, setsum->name, + RTE_MEMBER_NAMESIZE) == 0) + break; + } + setsum = NULL; + if (te != NULL) { + rte_errno = EEXIST; + te = NULL; + goto error_unlock_exit; + } + te = rte_zmalloc("MEMBER_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_MEMBER_LOG(ERR, "tailq entry allocation failed\n"); + goto error_unlock_exit; + } + + /* Create a new setsum structure */ + setsum = rte_zmalloc_socket(params->name, + sizeof(struct rte_member_setsum), RTE_CACHE_LINE_SIZE, + params->socket_id); + if (setsum == NULL) { + RTE_MEMBER_LOG(ERR, "Create setsummary failed\n"); + goto error_unlock_exit; + } + strlcpy(setsum->name, params->name, sizeof(setsum->name)); + setsum->type = params->type; + setsum->socket_id = params->socket_id; + setsum->key_len = params->key_len; + setsum->num_set = params->num_set; + setsum->prim_hash_seed = params->prim_hash_seed; + setsum->sec_hash_seed = params->sec_hash_seed; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + ret = rte_member_create_ht(setsum, params); + break; + case RTE_MEMBER_TYPE_VBF: + ret = rte_member_create_vbf(setsum, params); + break; + default: + goto error_unlock_exit; + } + if (ret < 0) + goto error_unlock_exit; + + RTE_MEMBER_LOG(DEBUG, "Creating a setsummary table with " + "mode %u\n", setsum->type); + + te->data = (void *)setsum; + TAILQ_INSERT_TAIL(member_list, te, next); + rte_mcfg_tailq_write_unlock(); + return setsum; + +error_unlock_exit: + rte_free(te); + rte_free(setsum); + rte_mcfg_tailq_write_unlock(); + return NULL; +} + +int +rte_member_add(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id) +{ + if (setsum == NULL || key == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_add_ht(setsum, key, set_id); + case RTE_MEMBER_TYPE_VBF: + return rte_member_add_vbf(setsum, key, set_id); + default: + return -EINVAL; + } +} + +int +rte_member_lookup(const struct rte_member_setsum *setsum, const void *key, + member_set_t *set_id) +{ + if (setsum == NULL || key == NULL || set_id == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_ht(setsum, key, set_id); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_vbf(setsum, key, set_id); + default: + return -EINVAL; + } +} + +int +rte_member_lookup_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids) +{ + if (setsum == NULL || keys == NULL || set_ids == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_bulk_ht(setsum, keys, num_keys, + set_ids); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_bulk_vbf(setsum, keys, num_keys, + set_ids); + default: + return -EINVAL; + } +} + +int +rte_member_lookup_multi(const struct rte_member_setsum *setsum, const void *key, + uint32_t match_per_key, member_set_t *set_id) +{ + if (setsum == NULL || key == NULL || set_id == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_multi_ht(setsum, key, match_per_key, + set_id); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_multi_vbf(setsum, key, match_per_key, + set_id); + default: + return -EINVAL; + } +} + +int +rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + uint32_t max_match_per_key, uint32_t *match_count, + member_set_t *set_ids) +{ + if (setsum == NULL || keys == NULL || set_ids == NULL || + match_count == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_lookup_multi_bulk_ht(setsum, keys, num_keys, + max_match_per_key, match_count, set_ids); + case RTE_MEMBER_TYPE_VBF: + return rte_member_lookup_multi_bulk_vbf(setsum, keys, num_keys, + max_match_per_key, match_count, set_ids); + default: + return -EINVAL; + } +} + +int +rte_member_delete(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id) +{ + if (setsum == NULL || key == NULL) + return -EINVAL; + + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + return rte_member_delete_ht(setsum, key, set_id); + /* current vBF implementation does not support delete function */ + case RTE_MEMBER_TYPE_VBF: + default: + return -EINVAL; + } +} + +void +rte_member_reset(const struct rte_member_setsum *setsum) +{ + if (setsum == NULL) + return; + switch (setsum->type) { + case RTE_MEMBER_TYPE_HT: + rte_member_reset_ht(setsum); + return; + case RTE_MEMBER_TYPE_VBF: + rte_member_reset_vbf(setsum); + return; + default: + return; + } +} + +RTE_INIT(librte_member_init_log) +{ + librte_member_logtype = rte_log_register("lib.member"); + if (librte_member_logtype >= 0) + rte_log_set_level(librte_member_logtype, RTE_LOG_DEBUG); +} diff --git a/src/spdk/dpdk/lib/librte_member/rte_member.h b/src/spdk/dpdk/lib/librte_member/rte_member.h new file mode 100644 index 000000000..ab2b23217 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member.h @@ -0,0 +1,490 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +/** + * @file + * + * RTE Membership Library + * + * The Membership Library is an extension and generalization of a traditional + * filter (for example Bloom Filter and cuckoo filter) structure that has + * multiple usages in a variety of workloads and applications. The library is + * used to test if a key belongs to certain sets. Two types of such + * "set-summary" structures are implemented: hash-table based (HT) and vector + * bloom filter (vBF). For HT setsummary, two subtypes or modes are available, + * cache and non-cache modes. The table below summarize some properties of + * the different implementations. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +/** + * <!-- + * +==========+=====================+================+=========================+ + * | type | vbf | HT-cache | HT-non-cache | + * +==========+=====================+==========================================+ + * |structure | bloom-filter array | hash-table like without storing key | + * +----------+---------------------+------------------------------------------+ + * |set id | limited by bf count | [1, 0x7fff] | + * | | up to 32. | | + * +----------+---------------------+------------------------------------------+ + * |usages & | small set range, | can delete, | cache most recent keys, | + * |properties| user-specified | big set range, | have both false-positive| + * | | false-positive rate,| small false | and false-negative | + * | | no deletion support.| positive depend| depend on table size, | + * | | | on table size, | automatic overwritten. | + * | | | new key does | | + * | | | not overwrite | | + * | | | existing key. | | + * +----------+---------------------+----------------+-------------------------+ + * --> + */ + +#ifndef _RTE_MEMBER_H_ +#define _RTE_MEMBER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +#include <rte_common.h> +#include <rte_config.h> + +/** The set ID type that stored internally in hash table based set summary. */ +typedef uint16_t member_set_t; +/** Invalid set ID used to mean no match found. */ +#define RTE_MEMBER_NO_MATCH 0 +/** Maximum size of hash table that can be created. */ +#define RTE_MEMBER_ENTRIES_MAX (1 << 30) +/** Maximum number of keys that can be searched as a bulk */ +#define RTE_MEMBER_LOOKUP_BULK_MAX 64 +/** Entry count per bucket in hash table based mode. */ +#define RTE_MEMBER_BUCKET_ENTRIES 16 +/** Maximum number of characters in setsum name. */ +#define RTE_MEMBER_NAMESIZE 32 + +/** @internal Hash function used by membership library. */ +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#include <rte_hash_crc.h> +#define MEMBER_HASH_FUNC rte_hash_crc +#else +#include <rte_jhash.h> +#define MEMBER_HASH_FUNC rte_jhash +#endif + +extern int librte_member_logtype; + +#define RTE_MEMBER_LOG(level, ...) \ + rte_log(RTE_LOG_ ## level, \ + librte_member_logtype, \ + RTE_FMT("%s(): " RTE_FMT_HEAD(__VA_ARGS__,), \ + __func__, \ + RTE_FMT_TAIL(__VA_ARGS__,))) + +/** @internal setsummary structure. */ +struct rte_member_setsum; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Parameter struct used to create set summary + */ +struct rte_member_parameters; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Define different set summary types + */ +enum rte_member_setsum_type { + RTE_MEMBER_TYPE_HT = 0, /**< Hash table based set summary. */ + RTE_MEMBER_TYPE_VBF, /**< Vector of bloom filters. */ + RTE_MEMBER_NUM_TYPE +}; + +/** @internal compare function for different arch. */ +enum rte_member_sig_compare_function { + RTE_MEMBER_COMPARE_SCALAR = 0, + RTE_MEMBER_COMPARE_AVX2, + RTE_MEMBER_COMPARE_NUM +}; + +/** @internal setsummary structure. */ +struct rte_member_setsum { + enum rte_member_setsum_type type; /* Type of the set summary. */ + uint32_t key_len; /* Length of key. */ + uint32_t prim_hash_seed; /* Primary hash function seed. */ + uint32_t sec_hash_seed; /* Secondary hash function seed. */ + + /* Hash table based. */ + uint32_t bucket_cnt; /* Number of buckets. */ + uint32_t bucket_mask; /* Bit mask to get bucket index. */ + /* For runtime selecting AVX, scalar, etc for signature comparison. */ + enum rte_member_sig_compare_function sig_cmp_fn; + uint8_t cache; /* If it is cache mode for ht based. */ + + /* Vector bloom filter. */ + uint32_t num_set; /* Number of set (bf) in vbf. */ + uint32_t bits; /* Number of bits in each bf. */ + uint32_t bit_mask; /* Bit mask to get bit location in bf. */ + uint32_t num_hashes; /* Number of hash values to index bf. */ + + uint32_t mul_shift; /* vbf internal variable used during bit test. */ + uint32_t div_shift; /* vbf internal variable used during bit test. */ + + void *table; /* This is the handler of hash table or vBF array. */ + + + /* Second cache line should start here. */ + uint32_t socket_id; /* NUMA Socket ID for memory. */ + char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */ +} __rte_cache_aligned; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Parameters used when create the set summary table. Currently user can + * specify two types of setsummary: HT based and vBF. For HT based, user can + * specify cache or non-cache mode. Here is a table to describe some differences + * + */ +struct rte_member_parameters { + const char *name; /**< Name of the hash. */ + + /** + * User to specify the type of the setsummary from one of + * rte_member_setsum_type. + * + * HT based setsummary is implemented like a hash table. User should use + * this type when there are many sets. + * + * vBF setsummary is a vector of bloom filters. It is used when number + * of sets is not big (less than 32 for current implementation). + */ + enum rte_member_setsum_type type; + + /** + * is_cache is only used for HT based setsummary. + * + * If it is HT based setsummary, user to specify the subtype or mode + * of the setsummary. It could be cache, or non-cache mode. + * Set is_cache to be 1 if to use as cache mode. + * + * For cache mode, keys can be evicted out of the HT setsummary. Keys + * with the same signature and map to the same bucket + * will overwrite each other in the setsummary table. + * This mode is useful for the case that the set-summary only + * needs to keep record of the recently inserted keys. Both + * false-negative and false-positive could happen. + * + * For non-cache mode, keys cannot be evicted out of the cache. So for + * this mode the setsummary will become full eventually. Keys with the + * same signature but map to the same bucket will still occupy multiple + * entries. This mode does not give false-negative result. + */ + uint8_t is_cache; + + /** + * For HT setsummary, num_keys equals to the number of entries of the + * table. When the number of keys inserted in the HT setsummary + * approaches this number, eviction could happen. For cache mode, + * keys could be evicted out of the table. For non-cache mode, keys will + * be evicted to other buckets like cuckoo hash. The table will also + * likely to become full before the number of inserted keys equal to the + * total number of entries. + * + * For vBF, num_keys equal to the expected number of keys that will + * be inserted into the vBF. The implementation assumes the keys are + * evenly distributed to each BF in vBF. This is used to calculate the + * number of bits we need for each BF. User does not specify the size of + * each BF directly because the optimal size depends on the num_keys + * and false positive rate. + */ + uint32_t num_keys; + + /** + * The length of key is used for hash calculation. Since key is not + * stored in set-summary, large key does not require more memory space. + */ + uint32_t key_len; + + /** + * num_set is only used for vBF, but not used for HT setsummary. + * + * num_set is equal to the number of BFs in vBF. For current + * implementation, it only supports 1,2,4,8,16,32 BFs in one vBF set + * summary. If other number of sets are needed, for example 5, the user + * should allocate the minimum available value that larger than 5, + * which is 8. + */ + uint32_t num_set; + + /** + * false_positive_rate is only used for vBF, but not used for HT + * setsummary. + * + * For vBF, false_positive_rate is the user-defined false positive rate + * given expected number of inserted keys (num_keys). It is used to + * calculate the total number of bits for each BF, and the number of + * hash values used during lookup and insertion. For details please + * refer to vBF implementation and membership library documentation. + * + * For HT, This parameter is not directly set by users. + * HT setsummary's false positive rate is in the order of: + * false_pos = (1/bucket_count)*(1/2^16), since we use 16-bit signature. + * This is because two keys needs to map to same bucket and same + * signature to have a collision (false positive). bucket_count is equal + * to number of entries (num_keys) divided by entry count per bucket + * (RTE_MEMBER_BUCKET_ENTRIES). Thus, the false_positive_rate is not + * directly set by users for HT mode. + */ + float false_positive_rate; + + /** + * We use two seeds to calculate two independent hashes for each key. + * + * For HT type, one hash is used as signature, and the other is used + * for bucket location. + * For vBF type, these two hashes and their combinations are used as + * hash locations to index the bit array. + */ + uint32_t prim_hash_seed; + + /** + * The secondary seed should be a different value from the primary seed. + */ + uint32_t sec_hash_seed; + + int socket_id; /**< NUMA Socket ID for memory. */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Find an existing set-summary and return a pointer to it. + * + * @param name + * Name of the set-summary. + * @return + * Pointer to the set-summary or NULL if object not found + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - value not available for return + */ +struct rte_member_setsum * +rte_member_find_existing(const char *name); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Create set-summary (SS). + * + * @param params + * Parameters to initialize the setsummary. + * @return + * Return the pointer to the setsummary. + * Return value is NULL if the creation failed. + */ +struct rte_member_setsum * +rte_member_create(const struct rte_member_parameters *params); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup key in set-summary (SS). + * Single key lookup and return as soon as the first match found + * + * @param setsum + * Pointer of a setsummary. + * @param key + * Pointer of the key to be looked up. + * @param set_id + * Output the set id matches the key. + * @return + * Return 1 for found a match and 0 for not found a match. + */ +int +rte_member_lookup(const struct rte_member_setsum *setsum, const void *key, + member_set_t *set_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup bulk of keys in set-summary (SS). + * Each key lookup returns as soon as the first match found + * + * @param setsum + * Pointer of a setsummary. + * @param keys + * Pointer of the bulk of keys to be looked up. + * @param num_keys + * Number of keys that will be lookup. + * @param set_ids + * Output set ids for all the keys to this array. + * User should preallocate array that can contain all results, which size is + * the num_keys. + * @return + * The number of keys that found a match. + */ +int +rte_member_lookup_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup a key in set-summary (SS) for multiple matches. + * The key lookup will find all matched entries (multiple match). + * Note that for cache mode of HT, each key can have at most one match. This is + * because keys with same signature that maps to same bucket will overwrite + * each other. So multi-match lookup should be used for vBF and non-cache HT. + * + * @param setsum + * Pointer of a set-summary. + * @param key + * Pointer of the key that to be looked up. + * @param max_match_per_key + * User specified maximum number of matches for each key. The function returns + * as soon as this number of matches found for the key. + * @param set_id + * Output set ids for all the matches of the key. User needs to preallocate + * the array that can contain max_match_per_key number of results. + * @return + * The number of matches that found for the key. + * For cache mode HT set-summary, the number should be at most 1. + */ +int +rte_member_lookup_multi(const struct rte_member_setsum *setsum, + const void *key, uint32_t max_match_per_key, + member_set_t *set_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Lookup a bulk of keys in set-summary (SS) for multiple matches each key. + * Each key lookup will find all matched entries (multiple match). + * Note that for cache mode HT, each key can have at most one match. So + * multi-match function is mainly used for vBF and non-cache mode HT. + * + * @param setsum + * Pointer of a setsummary. + * @param keys + * Pointer of the keys to be looked up. + * @param num_keys + * The number of keys that will be lookup. + * @param max_match_per_key + * The possible maximum number of matches for each key. + * @param match_count + * Output the number of matches for each key in an array. + * @param set_ids + * Return set ids for all the matches of all keys. Users pass in a + * preallocated 2D array with first dimension as key index and second + * dimension as match index. For example set_ids[bulk_size][max_match_per_key] + * @return + * The number of keys that found one or more matches in the set-summary. + */ +int +rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + uint32_t max_match_per_key, + uint32_t *match_count, + member_set_t *set_ids); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Insert key into set-summary (SS). + * + * @param setsum + * Pointer of a set-summary. + * @param key + * Pointer of the key to be added. + * @param set_id + * The set id associated with the key that needs to be added. Different mode + * supports different set_id ranges. 0 cannot be used as set_id since + * RTE_MEMBER_NO_MATCH by default is set as 0. + * For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved. + * For vBF mode the set id is limited by the num_set parameter when create + * the set-summary. + * @return + * HT (cache mode) and vBF should never fail unless the set_id is not in the + * valid range. In such case -EINVAL is returned. + * For HT (non-cache mode) it could fail with -ENOSPC error code when table is + * full. + * For success it returns different values for different modes to provide + * extra information for users. + * Return 0 for HT (cache mode) if the add does not cause + * eviction, return 1 otherwise. Return 0 for non-cache mode if success, + * -ENOSPC for full, and 1 if cuckoo eviction happens. + * Always returns 0 for vBF mode. + */ +int +rte_member_add(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * De-allocate memory used by set-summary. + * + * @param setsum + * Pointer to the set summary. + */ +void +rte_member_free(struct rte_member_setsum *setsum); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset the set-summary tables. E.g. reset bits to be 0 in BF, + * reset set_id in each entry to be RTE_MEMBER_NO_MATCH in HT based SS. + * + * @param setsum + * Pointer to the set-summary. + */ +void +rte_member_reset(const struct rte_member_setsum *setsum); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Delete items from the set-summary. Note that vBF does not support deletion + * in current implementation. For vBF, error code of -EINVAL will be returned. + * + * @param setsum + * Pointer to the set-summary. + * @param key + * Pointer of the key to be deleted. + * @param set_id + * For HT mode, we need both key and its corresponding set_id to + * properly delete the key. Without set_id, we may delete other keys with the + * same signature. + * @return + * If no entry found to delete, an error code of -ENOENT could be returned. + */ +int +rte_member_delete(const struct rte_member_setsum *setsum, const void *key, + member_set_t set_id); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_H_ */ diff --git a/src/spdk/dpdk/lib/librte_member/rte_member_ht.c b/src/spdk/dpdk/lib/librte_member/rte_member_ht.c new file mode 100644 index 000000000..cbcd0d440 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member_ht.c @@ -0,0 +1,557 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <rte_errno.h> +#include <rte_malloc.h> +#include <rte_prefetch.h> +#include <rte_random.h> +#include <rte_log.h> + +#include "rte_member.h" +#include "rte_member_ht.h" + +#if defined(RTE_ARCH_X86) +#include "rte_member_x86.h" +#endif + +/* Search bucket for entry with tmp_sig and update set_id */ +static inline int +update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t set_id) +{ + uint32_t i; + + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (buckets[bucket_id].sigs[i] == tmp_sig) { + buckets[bucket_id].sets[i] = set_id; + return 1; + } + } + return 0; +} + +static inline int +search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t *set_id) +{ + uint32_t iter; + + for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) { + if (tmp_sig == buckets[bucket_id].sigs[iter] && + buckets[bucket_id].sets[iter] != + RTE_MEMBER_NO_MATCH) { + *set_id = buckets[bucket_id].sets[iter]; + return 1; + } + } + return 0; +} + +static inline void +search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + uint32_t *counter, + uint32_t matches_per_key, + member_set_t *set_id) +{ + uint32_t iter; + + for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) { + if (tmp_sig == buckets[bucket_id].sigs[iter] && + buckets[bucket_id].sets[iter] != + RTE_MEMBER_NO_MATCH) { + set_id[*counter] = buckets[bucket_id].sets[iter]; + (*counter)++; + if (*counter >= matches_per_key) + return; + } + } +} + +int +rte_member_create_ht(struct rte_member_setsum *ss, + const struct rte_member_parameters *params) +{ + uint32_t i, j; + uint32_t size_bucket_t; + uint32_t num_entries = rte_align32pow2(params->num_keys); + + if ((num_entries > RTE_MEMBER_ENTRIES_MAX) || + !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) || + num_entries < RTE_MEMBER_BUCKET_ENTRIES) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, + "Membership HT create with invalid parameters\n"); + return -EINVAL; + } + + uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES; + + size_bucket_t = sizeof(struct member_ht_bucket); + + struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL, + num_buckets * size_bucket_t, + RTE_CACHE_LINE_SIZE, ss->socket_id); + + if (buckets == NULL) { + RTE_MEMBER_LOG(ERR, "memory allocation failed for HT " + "setsummary\n"); + return -ENOMEM; + } + + ss->table = buckets; + ss->bucket_cnt = num_buckets; + ss->bucket_mask = num_buckets - 1; + ss->cache = params->is_cache; + + for (i = 0; i < num_buckets; i++) { + for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) + buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; + } +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) && + RTE_MEMBER_BUCKET_ENTRIES == 16) + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2; + else +#endif + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR; + + RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, " + "the table has %u entries, %u buckets\n", + num_entries, num_buckets); + return 0; +} + +static inline void +get_buckets_index(const struct rte_member_setsum *ss, const void *key, + uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig) +{ + uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len, + ss->prim_hash_seed); + uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t), + ss->sec_hash_seed); + /* + * We use the first hash value for the signature, and the second hash + * value to derive the primary and secondary bucket locations. + * + * For non-cache mode, we use the lower bits for the primary bucket + * location. Then we xor primary bucket location and the signature + * to get the secondary bucket location. This is called "partial-key + * cuckoo hashing" proposed by B. Fan, et al's paper + * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use + * xor is that one could derive the alternative bucket location + * by only using the current bucket location and the signature. This is + * generally required by non-cache mode's eviction and deletion + * process without the need to store alternative hash value nor the full + * key. + * + * For cache mode, we use the lower bits for the primary bucket + * location and the higher bits for the secondary bucket location. In + * cache mode, keys are simply overwritten if bucket is full. We do not + * use xor since lower/higher bits are more independent hash values thus + * should provide slightly better table load. + */ + *sig = first_hash; + if (ss->cache) { + *prim_bkt = sec_hash & ss->bucket_mask; + *sec_bkt = (sec_hash >> 16) & ss->bucket_mask; + } else { + *prim_bkt = sec_hash & ss->bucket_mask; + *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask; + } +} + +int +rte_member_lookup_ht(const struct rte_member_setsum *ss, + const void *key, member_set_t *set_id) +{ + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + + *set_id = RTE_MEMBER_NO_MATCH; + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single_avx(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + break; +#endif + default: + if (search_bucket_single(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + } + + return 0; +} + +uint32_t +rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, member_set_t *set_id) +{ + uint32_t i; + uint32_t num_matches = 0; + struct member_ht_bucket *buckets = ss->table; + member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + + for (i = 0; i < num_keys; i++) { + get_buckets_index(ss, keys[i], &prim_buckets[i], + &sec_buckets[i], &tmp_sig[i]); + rte_prefetch0(&buckets[prim_buckets[i]]); + rte_prefetch0(&buckets[sec_buckets[i]]); + } + + for (i = 0; i < num_keys; i++) { + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_buckets[i], + tmp_sig[i], buckets, &set_id[i]) || + search_bucket_single_avx(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + num_matches++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + break; +#endif + default: + if (search_bucket_single(prim_buckets[i], tmp_sig[i], + buckets, &set_id[i]) || + search_bucket_single(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + num_matches++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + } + } + return num_matches; +} + +uint32_t +rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, + const void *key, uint32_t match_per_key, + member_set_t *set_id) +{ + uint32_t num_matches = 0; + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, + &num_matches, match_per_key, set_id); + if (num_matches < match_per_key) + search_bucket_multi_avx(sec_bucket, tmp_sig, + buckets, &num_matches, match_per_key, set_id); + return num_matches; +#endif + default: + search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches, + match_per_key, set_id); + if (num_matches < match_per_key) + search_bucket_multi(sec_bucket, tmp_sig, + buckets, &num_matches, match_per_key, set_id); + return num_matches; + } +} + +uint32_t +rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids) +{ + uint32_t i; + uint32_t num_matches = 0; + struct member_ht_bucket *buckets = ss->table; + uint32_t match_cnt_tmp; + member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; + + for (i = 0; i < num_keys; i++) { + get_buckets_index(ss, keys[i], &prim_buckets[i], + &sec_buckets[i], &tmp_sig[i]); + rte_prefetch0(&buckets[prim_buckets[i]]); + rte_prefetch0(&buckets[sec_buckets[i]]); + } + for (i = 0; i < num_keys; i++) { + match_cnt_tmp = 0; + + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + if (match_cnt_tmp < match_per_key) + search_bucket_multi_avx(sec_buckets[i], + tmp_sig[i], buckets, &match_cnt_tmp, + match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_tmp; + if (match_cnt_tmp != 0) + num_matches++; + break; +#endif + default: + search_bucket_multi(prim_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + if (match_cnt_tmp < match_per_key) + search_bucket_multi(sec_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_tmp; + if (match_cnt_tmp != 0) + num_matches++; + } + } + return num_matches; +} + +static inline int +try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, + member_sig_t sig, member_set_t set_id) +{ + int i; + /* If not full then insert into one slot */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) { + buckets[prim].sigs[i] = sig; + buckets[prim].sets[i] = set_id; + return 0; + } + } + /* If prim failed, we need to access second bucket */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) { + buckets[sec].sigs[i] = sig; + buckets[sec].sets[i] = set_id; + return 0; + } + } + return -1; +} + +static inline int +try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, + member_sig_t sig, member_set_t set_id, + enum rte_member_sig_compare_function cmp_fn) +{ + switch (cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (update_entry_search_avx(prim, sig, buckets, set_id) || + update_entry_search_avx(sec, sig, buckets, + set_id)) + return 0; + break; +#endif + default: + if (update_entry_search(prim, sig, buckets, set_id) || + update_entry_search(sec, sig, buckets, + set_id)) + return 0; + } + return -1; +} + +static inline int +evict_from_bucket(void) +{ + /* For now, we randomly pick one entry to evict */ + return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1); +} + +/* + * This function is similar to the cuckoo hash make_space function in hash + * library + */ +static inline int +make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx, + unsigned int *nr_pushes) +{ + unsigned int i, j; + int ret; + struct member_ht_bucket *buckets = ss->table; + uint32_t next_bucket_idx; + struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES]; + struct member_ht_bucket *bkt = &buckets[bkt_idx]; + /* MSB is set to indicate if an entry has been already pushed */ + member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1); + + /* + * Push existing item (search for bucket with space in + * alternative locations) to its alternative location + */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + /* Search for space in alternative locations */ + next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask; + next_bkt[i] = &buckets[next_bucket_idx]; + for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) { + if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH) + break; + } + + if (j != RTE_MEMBER_BUCKET_ENTRIES) + break; + } + + /* Alternative location has spare room (end of recursive function) */ + if (i != RTE_MEMBER_BUCKET_ENTRIES) { + next_bkt[i]->sigs[j] = bkt->sigs[i]; + next_bkt[i]->sets[j] = bkt->sets[i]; + return i; + } + + /* Pick entry that has not been pushed yet */ + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) + if ((bkt->sets[i] & flag_mask) == 0) + break; + + /* All entries have been pushed, so entry cannot be added */ + if (i == RTE_MEMBER_BUCKET_ENTRIES || + ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES) + return -ENOSPC; + + next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask; + /* Set flag to indicate that this entry is going to be pushed */ + bkt->sets[i] |= flag_mask; + + /* Need room in alternative bucket to insert the pushed entry */ + ret = make_space_bucket(ss, next_bucket_idx, nr_pushes); + /* + * After recursive function. + * Clear flags and insert the pushed entry + * in its alternative location if successful, + * or return error + */ + bkt->sets[i] &= ~flag_mask; + if (ret >= 0) { + next_bkt[i]->sigs[ret] = bkt->sigs[i]; + next_bkt[i]->sets[ret] = bkt->sets[i]; + return i; + } else + return ret; +} + +int +rte_member_add_ht(const struct rte_member_setsum *ss, + const void *key, member_set_t set_id) +{ + int ret; + unsigned int nr_pushes = 0; + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1); + + if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0) + return -EINVAL; + + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + /* + * If it is cache based setsummary, we try overwriting (updating) + * existing entry with the same signature first. In cache mode, we allow + * false negatives and only cache the most recent keys. + * + * For non-cache mode, we do not update existing entry with the same + * signature. This is because if two keys with same signature update + * each other, false negative may happen, which is not the expected + * behavior for non-cache setsummary. + */ + if (ss->cache) { + ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig, + set_id, ss->sig_cmp_fn); + if (ret != -1) + return ret; + } + /* If not full then insert into one slot */ + ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id); + if (ret != -1) + return ret; + + /* Random pick prim or sec for recursive displacement */ + uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket; + if (ss->cache) { + ret = evict_from_bucket(); + buckets[select_bucket].sigs[ret] = tmp_sig; + buckets[select_bucket].sets[ret] = set_id; + return 1; + } + + ret = make_space_bucket(ss, select_bucket, &nr_pushes); + if (ret >= 0) { + buckets[select_bucket].sigs[ret] = tmp_sig; + buckets[select_bucket].sets[ret] = set_id; + ret = 1; + } + + return ret; +} + +void +rte_member_free_ht(struct rte_member_setsum *ss) +{ + rte_free(ss->table); +} + +int +rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key, + member_set_t set_id) +{ + int i; + uint32_t prim_bucket, sec_bucket; + member_sig_t tmp_sig; + struct member_ht_bucket *buckets = ss->table; + + get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); + + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (tmp_sig == buckets[prim_bucket].sigs[i] && + set_id == buckets[prim_bucket].sets[i]) { + buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH; + return 0; + } + } + + for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { + if (tmp_sig == buckets[sec_bucket].sigs[i] && + set_id == buckets[sec_bucket].sets[i]) { + buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH; + return 0; + } + } + return -ENOENT; +} + +void +rte_member_reset_ht(const struct rte_member_setsum *ss) +{ + uint32_t i, j; + struct member_ht_bucket *buckets = ss->table; + + for (i = 0; i < ss->bucket_cnt; i++) { + for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) + buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; + } +} diff --git a/src/spdk/dpdk/lib/librte_member/rte_member_ht.h b/src/spdk/dpdk/lib/librte_member/rte_member_ht.h new file mode 100644 index 000000000..9e24ccdc2 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member_ht.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _RTE_MEMBER_HT_H_ +#define _RTE_MEMBER_HT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Maximum number of pushes for cuckoo path in HT mode. */ +#define RTE_MEMBER_MAX_PUSHES 50 + +typedef uint16_t member_sig_t; /* signature size is 16 bit */ + +/* The bucket struct for ht setsum */ +struct member_ht_bucket { + member_sig_t sigs[RTE_MEMBER_BUCKET_ENTRIES]; /* 2-byte signature */ + member_set_t sets[RTE_MEMBER_BUCKET_ENTRIES]; /* 2-byte set */ +} __rte_cache_aligned; + +int +rte_member_create_ht(struct rte_member_setsum *ss, + const struct rte_member_parameters *params); + +int +rte_member_lookup_ht(const struct rte_member_setsum *setsum, + const void *key, member_set_t *set_id); + +uint32_t +rte_member_lookup_bulk_ht(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids); + +uint32_t +rte_member_lookup_multi_ht(const struct rte_member_setsum *setsum, + const void *key, uint32_t match_per_key, + member_set_t *set_id); + +uint32_t +rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids); + +int +rte_member_add_ht(const struct rte_member_setsum *setsum, + const void *key, member_set_t set_id); + +void +rte_member_free_ht(struct rte_member_setsum *setsum); + +int +rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key, + member_set_t set_id); + +void +rte_member_reset_ht(const struct rte_member_setsum *setsum); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_HT_H_ */ diff --git a/src/spdk/dpdk/lib/librte_member/rte_member_vbf.c b/src/spdk/dpdk/lib/librte_member/rte_member_vbf.c new file mode 100644 index 000000000..8a232bae0 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member_vbf.c @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <math.h> +#include <string.h> + +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_errno.h> +#include <rte_log.h> + +#include "rte_member.h" +#include "rte_member_vbf.h" + +/* + * vBF currently implemented as a big array. + * The BFs have a vertical layout. Bits in same location of all bfs will stay + * in the same cache line. + * For example, if we have 32 bloom filters, we use a uint32_t array to + * represent all of them. array[0] represent the first location of all the + * bloom filters, array[1] represents the second location of all the + * bloom filters, etc. The advantage of this layout is to minimize the average + * number of memory accesses to test all bloom filters. + * + * Currently the implementation supports vBF containing 1,2,4,8,16,32 BFs. + */ +int +rte_member_create_vbf(struct rte_member_setsum *ss, + const struct rte_member_parameters *params) +{ + + if (params->num_set > RTE_MEMBER_MAX_BF || + !rte_is_power_of_2(params->num_set) || + params->num_keys == 0 || + params->false_positive_rate == 0 || + params->false_positive_rate > 1) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, "Membership vBF create with invalid parameters\n"); + return -EINVAL; + } + + /* We assume expected keys evenly distribute to all BFs */ + uint32_t num_keys_per_bf = 1 + (params->num_keys - 1) / ss->num_set; + + /* + * Note that the false positive rate is for all BFs in the vBF + * such that the single BF's false positive rate needs to be + * calculated. + * Assume each BF's False positive rate is fp_one_bf. The total false + * positive rate is fp = 1-(1-fp_one_bf)^n. + * => fp_one_bf = 1 - (1-fp)^(1/n) + */ + + float fp_one_bf = 1 - pow((1 - params->false_positive_rate), + 1.0 / ss->num_set); + + if (fp_one_bf == 0) { + rte_errno = EINVAL; + RTE_MEMBER_LOG(ERR, "Membership BF false positive rate is too small\n"); + return -EINVAL; + } + + uint32_t bits = ceil((num_keys_per_bf * + log(fp_one_bf)) / + log(1.0 / (pow(2.0, log(2.0))))); + + /* We round to power of 2 for performance during lookup */ + ss->bits = rte_align32pow2(bits); + + ss->num_hashes = (uint32_t)(log(2.0) * bits / num_keys_per_bf); + ss->bit_mask = ss->bits - 1; + + /* + * Since we round the bits to power of 2, the final false positive + * rate will probably not be same as the user specified. We log the + * new value as debug message. + */ + float new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf * + ss->num_hashes)), ss->num_hashes); + new_fp = 1 - pow((1 - new_fp), ss->num_set); + + /* + * Reduce hash function count, until we approach the user specified + * false-positive rate. Otherwise it is too conservative + */ + int tmp_num_hash = ss->num_hashes; + + while (tmp_num_hash > 1) { + float tmp_fp = new_fp; + + tmp_num_hash--; + new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf * + tmp_num_hash)), tmp_num_hash); + new_fp = 1 - pow((1 - new_fp), ss->num_set); + + if (new_fp > params->false_positive_rate) { + new_fp = tmp_fp; + tmp_num_hash++; + break; + } + } + + ss->num_hashes = tmp_num_hash; + + /* + * To avoid multiplication and division: + * mul_shift is used for multiplication shift during bit test + * div_shift is used for division shift, to be divided by number of bits + * represented by a uint32_t variable + */ + ss->mul_shift = __builtin_ctzl(ss->num_set); + ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift); + + RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, " + "each bloom filter expects %u keys, needs %u bits, %u hashes, " + "with false positive rate set as %.5f, " + "The new calculated vBF false positive rate is %.5f\n", + num_keys_per_bf, ss->bits, ss->num_hashes, fp_one_bf, new_fp); + + ss->table = rte_zmalloc_socket(NULL, ss->num_set * (ss->bits >> 3), + RTE_CACHE_LINE_SIZE, ss->socket_id); + if (ss->table == NULL) + return -ENOMEM; + + return 0; +} + +static inline uint32_t +test_bit(uint32_t bit_loc, const struct rte_member_setsum *ss) +{ + uint32_t *vbf = ss->table; + uint32_t n = ss->num_set; + uint32_t div_shift = ss->div_shift; + uint32_t mul_shift = ss->mul_shift; + /* + * a is how many bits in one BF are represented by one 32bit + * variable. + */ + uint32_t a = 32 >> mul_shift; + /* + * x>>b is the divide, x & (a-1) is the mod, & (1<<n-1) to mask out bits + * we do not need + */ + return (vbf[bit_loc >> div_shift] >> + ((bit_loc & (a - 1)) << mul_shift)) & ((1ULL << n) - 1); +} + +static inline void +set_bit(uint32_t bit_loc, const struct rte_member_setsum *ss, int32_t set) +{ + uint32_t *vbf = ss->table; + uint32_t div_shift = ss->div_shift; + uint32_t mul_shift = ss->mul_shift; + uint32_t a = 32 >> mul_shift; + + vbf[bit_loc >> div_shift] |= + 1UL << (((bit_loc & (a - 1)) << mul_shift) + set - 1); +} + +int +rte_member_lookup_vbf(const struct rte_member_setsum *ss, const void *key, + member_set_t *set_id) +{ + uint32_t j; + uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); + uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), + ss->sec_hash_seed); + uint32_t mask = ~0; + uint32_t bit_loc; + + for (j = 0; j < ss->num_hashes; j++) { + bit_loc = (h1 + j * h2) & ss->bit_mask; + mask &= test_bit(bit_loc, ss); + } + + if (mask) { + *set_id = __builtin_ctzl(mask) + 1; + return 1; + } + + *set_id = RTE_MEMBER_NO_MATCH; + return 0; +} + +uint32_t +rte_member_lookup_bulk_vbf(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, member_set_t *set_ids) +{ + uint32_t i, k; + uint32_t num_matches = 0; + uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t bit_loc; + + for (i = 0; i < num_keys; i++) + h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, + ss->prim_hash_seed); + for (i = 0; i < num_keys; i++) + h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), + ss->sec_hash_seed); + for (i = 0; i < num_keys; i++) { + mask[i] = ~0; + for (k = 0; k < ss->num_hashes; k++) { + bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; + mask[i] &= test_bit(bit_loc, ss); + } + } + for (i = 0; i < num_keys; i++) { + if (mask[i]) { + set_ids[i] = __builtin_ctzl(mask[i]) + 1; + num_matches++; + } else + set_ids[i] = RTE_MEMBER_NO_MATCH; + } + return num_matches; +} + +uint32_t +rte_member_lookup_multi_vbf(const struct rte_member_setsum *ss, + const void *key, uint32_t match_per_key, + member_set_t *set_id) +{ + uint32_t num_matches = 0; + uint32_t j; + uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); + uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), + ss->sec_hash_seed); + uint32_t mask = ~0; + uint32_t bit_loc; + + for (j = 0; j < ss->num_hashes; j++) { + bit_loc = (h1 + j * h2) & ss->bit_mask; + mask &= test_bit(bit_loc, ss); + } + while (mask) { + uint32_t loc = __builtin_ctzl(mask); + set_id[num_matches] = loc + 1; + num_matches++; + if (num_matches >= match_per_key) + return num_matches; + mask &= ~(1UL << loc); + } + return num_matches; +} + +uint32_t +rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *ss, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids) +{ + uint32_t i, k; + uint32_t num_matches = 0; + uint32_t match_cnt_t; + uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; + uint32_t bit_loc; + + for (i = 0; i < num_keys; i++) + h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, + ss->prim_hash_seed); + for (i = 0; i < num_keys; i++) + h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), + ss->sec_hash_seed); + for (i = 0; i < num_keys; i++) { + mask[i] = ~0; + for (k = 0; k < ss->num_hashes; k++) { + bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; + mask[i] &= test_bit(bit_loc, ss); + } + } + for (i = 0; i < num_keys; i++) { + match_cnt_t = 0; + while (mask[i]) { + uint32_t loc = __builtin_ctzl(mask[i]); + set_ids[i * match_per_key + match_cnt_t] = loc + 1; + match_cnt_t++; + if (match_cnt_t >= match_per_key) + break; + mask[i] &= ~(1UL << loc); + } + match_count[i] = match_cnt_t; + if (match_cnt_t != 0) + num_matches++; + } + return num_matches; +} + +int +rte_member_add_vbf(const struct rte_member_setsum *ss, + const void *key, member_set_t set_id) +{ + uint32_t i, h1, h2; + uint32_t bit_loc; + + if (set_id > ss->num_set || set_id == RTE_MEMBER_NO_MATCH) + return -EINVAL; + + h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); + h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed); + + for (i = 0; i < ss->num_hashes; i++) { + bit_loc = (h1 + i * h2) & ss->bit_mask; + set_bit(bit_loc, ss, set_id); + } + return 0; +} + +void +rte_member_free_vbf(struct rte_member_setsum *ss) +{ + rte_free(ss->table); +} + +void +rte_member_reset_vbf(const struct rte_member_setsum *ss) +{ + uint32_t *vbf = ss->table; + memset(vbf, 0, (ss->num_set * ss->bits) >> 3); +} diff --git a/src/spdk/dpdk/lib/librte_member/rte_member_vbf.h b/src/spdk/dpdk/lib/librte_member/rte_member_vbf.h new file mode 100644 index 000000000..d49525d55 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member_vbf.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _RTE_MEMBER_VBF_H_ +#define _RTE_MEMBER_VBF_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Currently we only support up to 32 sets in vBF */ +#define RTE_MEMBER_MAX_BF 32 + +int +rte_member_create_vbf(struct rte_member_setsum *ss, + const struct rte_member_parameters *params); + +int +rte_member_lookup_vbf(const struct rte_member_setsum *setsum, + const void *key, member_set_t *set_id); + +uint32_t +rte_member_lookup_bulk_vbf(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, + member_set_t *set_ids); + +uint32_t +rte_member_lookup_multi_vbf(const struct rte_member_setsum *setsum, + const void *key, uint32_t match_per_key, + member_set_t *set_id); + +uint32_t +rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *setsum, + const void **keys, uint32_t num_keys, uint32_t match_per_key, + uint32_t *match_count, + member_set_t *set_ids); + +int +rte_member_add_vbf(const struct rte_member_setsum *setsum, + const void *key, member_set_t set_id); + +void +rte_member_free_vbf(struct rte_member_setsum *ss); + +void +rte_member_reset_vbf(const struct rte_member_setsum *setsum); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_VBF_H_ */ diff --git a/src/spdk/dpdk/lib/librte_member/rte_member_version.map b/src/spdk/dpdk/lib/librte_member/rte_member_version.map new file mode 100644 index 000000000..87780ae61 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member_version.map @@ -0,0 +1,16 @@ +DPDK_20.0 { + global: + + rte_member_add; + rte_member_create; + rte_member_delete; + rte_member_find_existing; + rte_member_free; + rte_member_lookup; + rte_member_lookup_bulk; + rte_member_lookup_multi; + rte_member_lookup_multi_bulk; + rte_member_reset; + + local: *; +}; diff --git a/src/spdk/dpdk/lib/librte_member/rte_member_x86.h b/src/spdk/dpdk/lib/librte_member/rte_member_x86.h new file mode 100644 index 000000000..21a498ef0 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_member/rte_member_x86.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _RTE_MEMBER_X86_H_ +#define _RTE_MEMBER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <x86intrin.h> + +#if defined(RTE_MACHINE_CPUFLAG_AVX2) + +static inline int +update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + if (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + buckets[bucket_id].sets[hit_idx] = set_id; + return 1; + } + return 0; +} + +static inline int +search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + *set_id = buckets[bucket_id].sets[hit_idx]; + return 1; + } + hitmask &= ~(3U << ((hit_idx) << 1)); + } + return 0; +} + +static inline void +search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + uint32_t *counter, + uint32_t match_per_key, + member_set_t *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + set_id[*counter] = buckets[bucket_id].sets[hit_idx]; + (*counter)++; + if (*counter >= match_per_key) + return; + } + hitmask &= ~(3U << ((hit_idx) << 1)); + } +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_X86_H_ */ |