1 files changed, 422 insertions, 0 deletions
diff --git a/src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/bit_util.h b/src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/bit_util.h
new file mode 100644
index 000000000..bac59140f
--- /dev/null
+++ b/src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/bit_util.h
@@ -0,0 +1,422 @@
+#ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
+#define JEMALLOC_INTERNAL_BIT_UTIL_H
+
+#include "jemalloc/internal/assert.h"
+
+/* Sanity check. */
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+    || !defined(JEMALLOC_INTERNAL_FFS)
+#  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
+#endif
+
+/*
+ * Unlike the builtins and posix ffs functions, our ffs requires a non-zero
+ * input, and returns the position of the lowest bit set (as opposed to the
+ * posix versions, which return 1 larger than that position and use a return
+ * value of zero as a sentinel.  This tends to simplify logic in callers, and
+ * allows for consistency with the builtins we build fls on top of.
+ */
+static inline unsigned
+ffs_llu(unsigned long long x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFSLL(x) - 1;
+}
+
+static inline unsigned
+ffs_lu(unsigned long x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFSL(x) - 1;
+}
+
+static inline unsigned
+ffs_u(unsigned x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFS(x) - 1;
+}
+
+#define DO_FLS_SLOW(x, suffix) do {					\
+	util_assume(x != 0);						\
+	x |= (x >> 1);							\
+	x |= (x >> 2);							\
+	x |= (x >> 4);							\
+	x |= (x >> 8);							\
+	x |= (x >> 16);							\
+	if (sizeof(x) > 4) {						\
+		/*							\
+		 * If sizeof(x) is 4, then the expression "x >> 32"	\
+		 * will generate compiler warnings even if the code	\
+		 * never executes.  This circumvents the warning, and	\
+		 * gets compiled out in optimized builds.		\
+		 */							\
+		int constant_32 = sizeof(x) * 4;			\
+		x |= (x >> constant_32);				\
+	}								\
+	x++;								\
+	if (x == 0) {							\
+		return 8 * sizeof(x) - 1;				\
+	}								\
+	return ffs_##suffix(x) - 1;					\
+} while(0)
+
+static inline unsigned
+fls_llu_slow(unsigned long long x) {
+	DO_FLS_SLOW(x, llu);
+}
+
+static inline unsigned
+fls_lu_slow(unsigned long x) {
+	DO_FLS_SLOW(x, lu);
+}
+
+static inline unsigned
+fls_u_slow(unsigned x) {
+	DO_FLS_SLOW(x, u);
+}
+
+#undef DO_FLS_SLOW
+
+#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
+static inline unsigned
+fls_llu(unsigned long long x) {
+	util_assume(x != 0);
+	/*
+	 * Note that the xor here is more naturally written as subtraction; the
+	 * last bit set is the number of bits in the type minus the number of
+	 * leading zero bits.  But GCC implements that as:
+	 *    bsr     edi, edi
+	 *    mov     eax, 31
+	 *    xor     edi, 31
+	 *    sub     eax, edi
+	 * If we write it as xor instead, then we get
+	 *    bsr     eax, edi
+	 * as desired.
+	 */
+	return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	util_assume(x != 0);
+	return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	util_assume(x != 0);
+	return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
+}
+#elif defined(_MSC_VER)
+
+#if LG_SIZEOF_PTR == 3
+#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
+#else
+/*
+ * This never actually runs; we're just dodging a compiler error for the
+ * never-taken branch where sizeof(void *) == 8.
+ */
+#define DO_BSR64(bit, x) bit = 0; unreachable()
+#endif
+
+#define DO_FLS(x) do {							\
+	if (x == 0) {							\
+		return 8 * sizeof(x);					\
+	}								\
+	unsigned long bit;						\
+	if (sizeof(x) == 4) {						\
+		_BitScanReverse(&bit, (unsigned)x);			\
+		return (unsigned)bit;					\
+	}								\
+	if (sizeof(x) == 8 && sizeof(void *) == 8) {			\
+		DO_BSR64(bit, x);					\
+		return (unsigned)bit;					\
+	}								\
+	if (sizeof(x) == 8 && sizeof(void *) == 4) {			\
+		/* Dodge a compiler warning, as above. */		\
+		int constant_32 = sizeof(x) * 4;			\
+		if (_BitScanReverse(&bit,				\
+		    (unsigned)(x >> constant_32))) {			\
+			return 32 + (unsigned)bit;			\
+		} else {						\
+			_BitScanReverse(&bit, (unsigned)x);		\
+			return (unsigned)bit;				\
+		}							\
+	}								\
+	unreachable();							\
+} while (0)
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+	DO_FLS(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	DO_FLS(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	DO_FLS(x);
+}
+
+#undef DO_FLS
+#undef DO_BSR64
+#else
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+	return fls_llu_slow(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	return fls_lu_slow(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	return fls_u_slow(x);
+}
+#endif
+
+#if LG_SIZEOF_LONG_LONG > 3
+#  error "Haven't implemented popcount for 16-byte ints."
+#endif
+
+#define DO_POPCOUNT(x, type) do {					\
+	/*								\
+	 * Algorithm from an old AMD optimization reference manual.	\
+	 * We're putting a little bit more work than you might expect	\
+	 * into the no-instrinsic case, since we only support the	\
+	 * GCC intrinsics spelling of popcount (for now).  Detecting	\
+	 * whether or not the popcount builtin is actually useable in	\
+	 * MSVC is nontrivial.						\
+	 */								\
+									\
+	type bmul = (type)0x0101010101010101ULL;			\
+									\
+	/*								\
+	 * Replace each 2 bits with the sideways sum of the original	\
+	 * values.  0x5 = 0b0101.					\
+	 *								\
+	 * You might expect this to be:					\
+	 *   x = (x & 0x55...) + ((x >> 1) & 0x55...).			\
+	 * That costs an extra mask relative to this, though.		\
+	 */								\
+	x = x - ((x >> 1) & (0x55U * bmul));				\
+	/* Replace each 4 bits with their sideays sum.  0x3 = 0b0011. */\
+	x = (x & (bmul * 0x33U)) + ((x >> 2) & (bmul * 0x33U));		\
+	/*								\
+	 * Replace each 8 bits with their sideways sum.  Note that we	\
+	 * can't overflow within each 4-bit sum here, so we can skip	\
+	 * the initial mask.						\
+	 */								\
+	x = (x + (x >> 4)) & (bmul * 0x0FU);				\
+	/*								\
+	 * None of the partial sums in this multiplication (viewed in	\
+	 * base-256) can overflow into the next digit.  So the least	\
+	 * significant byte of the product will be the least		\
+	 * significant byte of the original value, the second least	\
+	 * significant byte will be the sum of the two least		\
+	 * significant bytes of the original value, and so on.		\
+	 * Importantly, the high byte will be the byte-wise sum of all	\
+	 * the bytes of the original value.				\
+	 */								\
+	x = x * bmul;							\
+	x >>= ((sizeof(x) - 1) * 8);					\
+	return (unsigned)x;						\
+} while(0)
+
+static inline unsigned
+popcount_u_slow(unsigned bitmap) {
+	DO_POPCOUNT(bitmap, unsigned);
+}
+
+static inline unsigned
+popcount_lu_slow(unsigned long bitmap) {
+	DO_POPCOUNT(bitmap, unsigned long);
+}
+
+static inline unsigned
+popcount_llu_slow(unsigned long long bitmap) {
+	DO_POPCOUNT(bitmap, unsigned long long);
+}
+
+#undef DO_POPCOUNT
+
+static inline unsigned
+popcount_u(unsigned bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNT
+	return JEMALLOC_INTERNAL_POPCOUNT(bitmap);
+#else
+	return popcount_u_slow(bitmap);
+#endif
+}
+
+static inline unsigned
+popcount_lu(unsigned long bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+	return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+#else
+	return popcount_lu_slow(bitmap);
+#endif
+}
+
+static inline unsigned
+popcount_llu(unsigned long long bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNTLL
+	return JEMALLOC_INTERNAL_POPCOUNTLL(bitmap);
+#else
+	return popcount_llu_slow(bitmap);
+#endif
+}
+
+/*
+ * Clears first unset bit in bitmap, and returns
+ * place of bit.  bitmap *must not* be 0.
+ */
+
+static inline size_t
+cfs_lu(unsigned long* bitmap) {
+	util_assume(*bitmap != 0);
+	size_t bit = ffs_lu(*bitmap);
+	*bitmap ^= ZU(1) << bit;
+	return bit;
+}
+
+static inline unsigned
+ffs_zu(size_t x) {
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+	return ffs_u(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return ffs_lu(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+	return ffs_llu(x);
+#else
+#error No implementation for size_t ffs()
+#endif
+}
+
+static inline unsigned
+fls_zu(size_t x) {
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+	return fls_u(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return fls_lu(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+	return fls_llu(x);
+#else
+#error No implementation for size_t fls()
+#endif
+}
+
+
+static inline unsigned
+ffs_u64(uint64_t x) {
+#if LG_SIZEOF_LONG == 3
+	return ffs_lu(x);
+#elif LG_SIZEOF_LONG_LONG == 3
+	return ffs_llu(x);
+#else
+#error No implementation for 64-bit ffs()
+#endif
+}
+
+static inline unsigned
+fls_u64(uint64_t x) {
+#if LG_SIZEOF_LONG == 3
+	return fls_lu(x);
+#elif LG_SIZEOF_LONG_LONG == 3
+	return fls_llu(x);
+#else
+#error No implementation for 64-bit fls()
+#endif
+}
+
+static inline unsigned
+ffs_u32(uint32_t x) {
+#if LG_SIZEOF_INT == 2
+	return ffs_u(x);
+#else
+#error No implementation for 32-bit ffs()
+#endif
+	return ffs_u(x);
+}
+
+static inline unsigned
+fls_u32(uint32_t x) {
+#if LG_SIZEOF_INT == 2
+	return fls_u(x);
+#else
+#error No implementation for 32-bit fls()
+#endif
+	return fls_u(x);
+}
+
+static inline uint64_t
+pow2_ceil_u64(uint64_t x) {
+	if (unlikely(x <= 1)) {
+		return x;
+	}
+	size_t msb_on_index = fls_u64(x - 1);
+	/*
+	 * Range-check; it's on the callers to ensure that the result of this
+	 * call won't overflow.
+	 */
+	assert(msb_on_index < 63);
+	return 1ULL << (msb_on_index + 1);
+}
+
+static inline uint32_t
+pow2_ceil_u32(uint32_t x) {
+	if (unlikely(x <= 1)) {
+	    return x;
+	}
+	size_t msb_on_index = fls_u32(x - 1);
+	/* As above. */
+	assert(msb_on_index < 31);
+	return 1U << (msb_on_index + 1);
+}
+
+/* Compute the smallest power of 2 that is >= x. */
+static inline size_t
+pow2_ceil_zu(size_t x) {
+#if (LG_SIZEOF_PTR == 3)
+	return pow2_ceil_u64(x);
+#else
+	return pow2_ceil_u32(x);
+#endif
+}
+
+static inline unsigned
+lg_floor(size_t x) {
+	util_assume(x != 0);
+#if (LG_SIZEOF_PTR == 3)
+	return fls_u64(x);
+#else
+	return fls_u32(x);
+#endif
+}
+
+static inline unsigned
+lg_ceil(size_t x) {
+	return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
+}
+
+/* A compile-time version of lg_floor and lg_ceil. */
+#define LG_FLOOR_1(x) 0
+#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
+#define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
+#define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
+#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
+#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
+#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
+#if LG_SIZEOF_PTR == 2
+#  define LG_FLOOR(x) LG_FLOOR_32((x))
+#else
+#  define LG_FLOOR(x) LG_FLOOR_64((x))
+#endif
+
+#define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
+
+#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */