diff options
Diffstat (limited to 'ccan/ccan/ilog')
l--------- | ccan/ccan/ilog/LICENSE | 1 | ||||
-rw-r--r-- | ccan/ccan/ilog/ilog.c | 141 | ||||
-rw-r--r-- | ccan/ccan/ilog/ilog.h | 154 |
3 files changed, 296 insertions, 0 deletions
diff --git a/ccan/ccan/ilog/LICENSE b/ccan/ccan/ilog/LICENSE new file mode 120000 index 0000000..b7951da --- /dev/null +++ b/ccan/ccan/ilog/LICENSE @@ -0,0 +1 @@ +../../licenses/CC0
\ No newline at end of file diff --git a/ccan/ccan/ilog/ilog.c b/ccan/ccan/ilog/ilog.c new file mode 100644 index 0000000..5f5122d --- /dev/null +++ b/ccan/ccan/ilog/ilog.c @@ -0,0 +1,141 @@ +/*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain). + * See LICENSE file for details. */ +#include "ilog.h" +#include <limits.h> + +/*The fastest fallback strategy for platforms with fast multiplication appears + to be based on de Bruijn sequences~\cite{LP98}. + Tests confirmed this to be true even on an ARM11, where it is actually faster + than using the native clz instruction. + Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where + multiplication or table lookups are too expensive. + + @UNPUBLISHED{LP98, + author="Charles E. Leiserson and Harald Prokop", + title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word", + month=Jun, + year=1998, + note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}" + }*/ +static UNNEEDED const unsigned char DEBRUIJN_IDX32[32]={ + 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8, + 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 +}; + +/* We always compile these in, in case someone takes address of function. */ +#undef ilog32_nz +#undef ilog32 +#undef ilog64_nz +#undef ilog64 + +int ilog32(uint32_t _v){ +/*On a Pentium M, this branchless version tested as the fastest version without + multiplications on 1,000,000,000 random 32-bit integers, edging out a + similar version with branches, and a 256-entry LUT version.*/ +# if defined(ILOG_NODEBRUIJN) + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFU)<<4; + _v>>=m; + ret|=m; + m=(_v>0xFFU)<<3; + _v>>=m; + ret|=m; + m=(_v>0xFU)<<2; + _v>>=m; + ret|=m; + m=(_v>3)<<1; + _v>>=m; + ret|=m; + ret+=_v>1; + return ret; +/*This de Bruijn sequence version is faster if you have a fast multiplier.*/ +# else + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v=(_v>>1)+1; + ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F]; + return ret; +# endif +} + +int ilog32_nz(uint32_t _v) +{ + return ilog32(_v); +} + +int ilog64(uint64_t _v){ +# if defined(ILOG_NODEBRUIJN) + uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(uint32_t)(_v>>m); + ret|=m; + m=(v>0xFFFFU)<<4; + v>>=m; + ret|=m; + m=(v>0xFFU)<<3; + v>>=m; + ret|=m; + m=(v>0xFU)<<2; + v>>=m; + ret|=m; + m=(v>3)<<1; + v>>=m; + ret|=m; + ret+=v>1; + return ret; +# else +/*If we don't have a 64-bit word, split it into two 32-bit halves.*/ +# if LONG_MAX<9223372036854775807LL + uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(uint32_t)(_v>>m); + ret|=m; + v|=v>>1; + v|=v>>2; + v|=v>>4; + v|=v>>8; + v|=v>>16; + v=(v>>1)+1; + ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F]; + return ret; +/*Otherwise do it in one 64-bit operation.*/ +# else + static const unsigned char DEBRUIJN_IDX64[64]={ + 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, + 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, + 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, + 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 + }; + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v|=_v>>32; + _v=(_v>>1)+1; + ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F]; + return ret; +# endif +# endif +} + +int ilog64_nz(uint64_t _v) +{ + return ilog64(_v); +} + diff --git a/ccan/ccan/ilog/ilog.h b/ccan/ccan/ilog/ilog.h new file mode 100644 index 0000000..32702b1 --- /dev/null +++ b/ccan/ccan/ilog/ilog.h @@ -0,0 +1,154 @@ +/* CC0 (Public domain) - see LICENSE file for details */ +#if !defined(_ilog_H) +# define _ilog_H (1) +# include "config.h" +# include <stdint.h> +# include <limits.h> +# include <ccan/compiler/compiler.h> + +/** + * ilog32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * + * See Also: + * ilog32_nz(), ilog64() + * + * Example: + * // Rounds up to next power of 2 (if not a power of 2). + * static uint32_t round_up32(uint32_t i) + * { + * assert(i != 0); + * return 1U << ilog32(i-1); + * } + */ +int ilog32(uint32_t _v) CONST_FUNCTION; + +/** + * ilog32_nz - Integer binary logarithm of a non-zero 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or undefined if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * See Also: + * ilog32(), ilog64_nz() + * Example: + * // Find Last Set (ie. highest bit set, 0 to 31). + * static uint32_t fls32(uint32_t i) + * { + * assert(i != 0); + * return ilog32_nz(i) - 1; + * } + */ +int ilog32_nz(uint32_t _v) CONST_FUNCTION; + +/** + * ilog64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * See Also: + * ilog64_nz(), ilog32() + */ +int ilog64(uint64_t _v) CONST_FUNCTION; + +/** + * ilog64_nz - Integer binary logarithm of a non-zero 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or undefined if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * See Also: + * ilog64(), ilog32_nz() + */ +int ilog64_nz(uint64_t _v) CONST_FUNCTION; + +/** + * STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant. + * @_v: A non-negative 32-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro should only be used when you need a compile-time constant, + * otherwise ilog32 or ilog32_nz are just as fast and more flexible. + * + * Example: + * #define MY_PAGE_SIZE 4096 + * #define MY_PAGE_BITS (STATIC_ILOG_32(PAGE_SIZE) - 1) + */ +#define STATIC_ILOG_32(_v) (STATIC_ILOG5((uint32_t)(_v))) + +/** + * STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant. + * @_v: A non-negative 64-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro should only be used when you need a compile-time constant, + * otherwise ilog64 or ilog64_nz are just as fast and more flexible. + */ +#define STATIC_ILOG_64(_v) (STATIC_ILOG6((uint64_t)(_v))) + +/* Private implementation details */ + +/*Note the casts to (int) below: this prevents "upgrading" + the type of an entire expression to an (unsigned) size_t.*/ +#if INT_MAX>=2147483647 && HAVE_BUILTIN_CLZ +#define builtin_ilog32_nz(v) \ + (((int)sizeof(unsigned)*CHAR_BIT) - __builtin_clz(v)) +#elif LONG_MAX>=2147483647L && HAVE_BUILTIN_CLZL +#define builtin_ilog32_nz(v) \ + (((int)sizeof(unsigned)*CHAR_BIT) - __builtin_clzl(v)) +#endif + +#if INT_MAX>=9223372036854775807LL && HAVE_BUILTIN_CLZ +#define builtin_ilog64_nz(v) \ + (((int)sizeof(unsigned)*CHAR_BIT) - __builtin_clz(v)) +#elif LONG_MAX>=9223372036854775807LL && HAVE_BUILTIN_CLZL +#define builtin_ilog64_nz(v) \ + (((int)sizeof(unsigned long)*CHAR_BIT) - __builtin_clzl(v)) +#elif HAVE_BUILTIN_CLZLL +#define builtin_ilog64_nz(v) \ + (((int)sizeof(unsigned long long)*CHAR_BIT) - __builtin_clzll(v)) +#endif + +#ifdef builtin_ilog32_nz +/* This used to be builtin_ilog32_nz(_v)&-!!(_v), which means it zeroes out + * the undefined builtin_ilog32_nz(0) return. But clang UndefinedBehaviorSantizer + * complains, so do the branch: */ +#define ilog32(_v) ((_v) ? builtin_ilog32_nz(_v) : 0) +#define ilog32_nz(_v) builtin_ilog32_nz(_v) +#else +#define ilog32_nz(_v) ilog32(_v) +#define ilog32(_v) (IS_COMPILE_CONSTANT(_v) ? STATIC_ILOG_32(_v) : ilog32(_v)) +#endif /* builtin_ilog32_nz */ + +#ifdef builtin_ilog64_nz +#define ilog32(_v) ((_v) ? builtin_ilog32_nz(_v) : 0) +#define ilog64_nz(_v) builtin_ilog64_nz(_v) +#else +#define ilog64_nz(_v) ilog64(_v) +#define ilog64(_v) (IS_COMPILE_CONSTANT(_v) ? STATIC_ILOG_64(_v) : ilog64(_v)) +#endif /* builtin_ilog64_nz */ + +/* Macros for evaluating compile-time constant ilog. */ +# define STATIC_ILOG0(_v) (!!(_v)) +# define STATIC_ILOG1(_v) (((_v)&0x2)?2:STATIC_ILOG0(_v)) +# define STATIC_ILOG2(_v) (((_v)&0xC)?2+STATIC_ILOG1((_v)>>2):STATIC_ILOG1(_v)) +# define STATIC_ILOG3(_v) \ + (((_v)&0xF0)?4+STATIC_ILOG2((_v)>>4):STATIC_ILOG2(_v)) +# define STATIC_ILOG4(_v) \ + (((_v)&0xFF00)?8+STATIC_ILOG3((_v)>>8):STATIC_ILOG3(_v)) +# define STATIC_ILOG5(_v) \ + (((_v)&0xFFFF0000)?16+STATIC_ILOG4((_v)>>16):STATIC_ILOG4(_v)) +# define STATIC_ILOG6(_v) \ + (((_v)&0xFFFFFFFF00000000ULL)?32+STATIC_ILOG5((_v)>>32):STATIC_ILOG5(_v)) + +#endif /* _ilog_H */ |