From cff6d757e3ba609c08ef2aaa00f07e53551e5bf6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 3 Jun 2024 07:11:10 +0200 Subject: Adding upstream version 3.0.0. Signed-off-by: Daniel Baumann --- include/import/ebtree.h | 136 ++++++++++++++++++++++++++++++-------------- include/import/ist.h | 19 +++++++ include/import/slz-tables.h | 2 + include/import/xxhash.h | 2 +- 4 files changed, 114 insertions(+), 45 deletions(-) (limited to 'include/import') diff --git a/include/import/ebtree.h b/include/import/ebtree.h index d6e51d5..31a9cac 100644 --- a/include/import/ebtree.h +++ b/include/import/ebtree.h @@ -250,39 +250,84 @@ #include #include -static inline int flsnz8_generic(unsigned int x) +/* returns clz from 7 to 0 for 0x01 to 0xFF. Returns 7 for 0 as well. */ +static inline unsigned int clz8(unsigned char c) { - int ret = 0; - if (x >> 4) { x >>= 4; ret += 4; } - return ret + ((0xFFFFAA50U >> (x << 1)) & 3) + 1; + unsigned int r = 4; + + if (c & 0xf0) { + r = 0; + c >>= 4; + } + return r + ((0x000055afU >> (c * 2)) & 0x3); } -/* Note: we never need to run fls on null keys, so we can optimize the fls - * function by removing a conditional jump. +/* FLSNZ: find last set bit for non-zero value. "Last" here means the highest + * one. It returns a value from 1 to 32 for 1<<0 to 1<<31. */ -#if defined(__i386__) || defined(__x86_64__) -/* this code is similar on 32 and 64 bit */ -static inline int flsnz(int x) + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__atom__) +/* DO NOT USE ON ATOM! The instruction is emulated and is several times slower + * than doing the math by hand. + */ +static inline unsigned int flsnz32(unsigned int x) { - int r; + unsigned int r; __asm__("bsrl %1,%0\n" : "=r" (r) : "rm" (x)); - return r+1; + return r + 1; +} +#define flsnz32(x) flsnz32(x) + +# if defined(__x86_64__) +static inline unsigned int flsnz64(unsigned long long x) +{ + unsigned long long r; + __asm__("bsrq %1,%0\n" + : "=r" (r) : "rm" (x)); + return r + 1; +} +# define flsnz64(x) flsnz64(x) +# endif + +#elif !defined(__atom__) && defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 2))) +/* gcc >= 4.2 brings __builtin_clz() and __builtin_clzl(), usable for non-x86 */ + +static inline unsigned int flsnz32(unsigned int x) +{ + return 32 - __builtin_clz(x); +} +# define flsnz32(x) flsnz32(x) + +# if defined(__SIZEOF_LONG__) && (__SIZEOF_LONG__ > 4) +static inline unsigned int flsnz64(unsigned long x) +{ + return (__SIZEOF_LONG__ * 8) - __builtin_clzl(x); } +# define flsnz64(x) flsnz64(x) +# endif -static inline int flsnz8(unsigned char x) +#endif /* end of arch-specific implementations */ + +/*** Fallback versions below ***/ + +#ifndef flsnz8 +# if defined(flsnz32) +# define flsnz8(x) flsnz32((unsigned char)x) +# else +static inline unsigned int flsnz8(unsigned int x) { - int r; - __asm__("movzbl %%al, %%eax\n" - "bsrl %%eax,%0\n" - : "=r" (r) : "a" (x)); - return r+1; + unsigned int ret = 0; + if (x >> 4) { x >>= 4; ret += 4; } + return ret + ((0xFFFFAA50U >> (x << 1)) & 3) + 1; } +# define flsnz8(x) flsnz8(x) +# endif +#endif -#else -// returns 1 to 32 for 1<<0 to 1<<31. Undefined for 0. -#define flsnz(___a) ({ \ - register int ___x, ___bits = 0; \ +#ifndef flsnz32 +# define flsnz32(___a) ({ \ + register unsigned int ___x, ___bits = 0; \ ___x = (___a); \ if (___x & 0xffff0000) { ___x &= 0xffff0000; ___bits += 16;} \ if (___x & 0xff00ff00) { ___x &= 0xff00ff00; ___bits += 8;} \ @@ -291,16 +336,10 @@ static inline int flsnz8(unsigned char x) if (___x & 0xaaaaaaaa) { ___x &= 0xaaaaaaaa; ___bits += 1;} \ ___bits + 1; \ }) - -static inline int flsnz8(unsigned int x) -{ - return flsnz8_generic(x); -} - - #endif -static inline int fls64(unsigned long long x) +#ifndef flsnz64 +static inline unsigned int flsnz64(unsigned long long x) { unsigned int h; unsigned int bits = 32; @@ -310,10 +349,21 @@ static inline int fls64(unsigned long long x) h = x; bits = 0; } - return flsnz(h) + bits; + return flsnz32(h) + bits; } +# define flsnz64(x) flsnz64(x) +#endif + +#ifndef flsnz_long +# define flsnz_long(x) ((sizeof(long) > 4) ? flsnz64(x) : flsnz32(x)) +#endif -#define fls_auto(x) ((sizeof(x) > 4) ? fls64(x) : flsnz(x)) +#ifndef flsnz +# define flsnz(x) ((sizeof(x) > 4) ? flsnz64(x) : (sizeof(x) > 1) ? flsnz32(x) : flsnz8(x)) +#endif + +#define fls64(x) flsnz64(x) +#define fls_auto(x) ((x) ? flsnz(x) : 0) /* Linux-like "container_of". It returns a pointer to the structure of type * which has its member stored at address . @@ -720,9 +770,9 @@ static forceinline void __eb_delete(struct eb_node *node) * bytes. Note that parts or all of bits may be rechecked. It is only * passed here as a hint to speed up the check. */ -static forceinline int equal_bits(const unsigned char *a, - const unsigned char *b, - int ignore, int len) +static forceinline size_t equal_bits(const unsigned char *a, + const unsigned char *b, + size_t ignore, size_t len) { for (ignore >>= 3, a += ignore, b += ignore, ignore <<= 3; ignore < len; ) { @@ -738,7 +788,7 @@ static forceinline int equal_bits(const unsigned char *a, * it as the number of identical bits. Note that low bit numbers are * assigned to high positions in the byte, as we compare them as strings. */ - ignore -= flsnz8(c); + ignore -= flsnz_long(c); break; } } @@ -786,12 +836,12 @@ static forceinline int check_bits(const unsigned char *a, * permitted. Equal strings are reported as a negative number of bits, which * indicates the end was reached. */ -static forceinline int string_equal_bits(const unsigned char *a, - const unsigned char *b, - int ignore) +static forceinline size_t string_equal_bits(const unsigned char *a, + const unsigned char *b, + size_t ignore) { - int beg; - unsigned char c; + unsigned char c, d; + size_t beg; beg = ignore >> 3; @@ -799,8 +849,6 @@ static forceinline int string_equal_bits(const unsigned char *a, * or at the first zero we encounter on either side. */ while (1) { - unsigned char d; - c = a[beg]; d = b[beg]; beg++; @@ -809,14 +857,14 @@ static forceinline int string_equal_bits(const unsigned char *a, if (c) break; if (!d) - return -1; + return (size_t)-1; } /* OK now we know that a and b differ at byte , or that both are zero. * We have to find what bit is differing and report it as the number of * identical bits. Note that low bit numbers are assigned to high positions * in the byte, as we compare them as strings. */ - return (beg << 3) - flsnz8(c); + return (beg << 3) - flsnz(c); } static forceinline int cmp_bits(const unsigned char *a, const unsigned char *b, unsigned int pos) diff --git a/include/import/ist.h b/include/import/ist.h index e4e1425..962d63b 100644 --- a/include/import/ist.h +++ b/include/import/ist.h @@ -331,6 +331,25 @@ static inline struct ist istzero(const struct ist ist, size_t size) return ret; } +/* Remove trailing newline characters if present in by reducing its + * length. Both '\n', '\r' and '\n\r' match. Return the modified ist. + */ +static inline struct ist iststrip(const struct ist ist) +{ + struct ist ret = ist; + + if (ret.len) { + if (ret.ptr[ret.len - 1] == '\n') + --ret.len; + } + if (ret.len) { + if (ret.ptr[ret.len - 1] == '\r') + --ret.len; + } + + return ret; +} + /* returns the ordinal difference between two strings : * < 0 if ist1 < ist2 * = 0 if ist1 == ist2 diff --git a/include/import/slz-tables.h b/include/import/slz-tables.h index 0b3a5b9..6e6d658 100644 --- a/include/import/slz-tables.h +++ b/include/import/slz-tables.h @@ -1,3 +1,5 @@ +#include + /* Fixed Huffman table as per RFC1951. * * Lit Value Bits Codes diff --git a/include/import/xxhash.h b/include/import/xxhash.h index a18e8c7..7c3c3fc 100644 --- a/include/import/xxhash.h +++ b/include/import/xxhash.h @@ -3387,7 +3387,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can /* === Compiler specifics === */ -#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +#if ((defined(sun) || defined(__sun)) && defined(__cplusplus) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ # define XXH_RESTRICT /* disable */ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ # define XXH_RESTRICT restrict -- cgit v1.2.3