diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 13:22:53 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 13:22:53 +0000 |
commit | 347c164c35eddab388009470e6848cb361ac93f8 (patch) | |
tree | 2c0c44eac690f510bb0a35b2a13b36d606b77b6b /mysys | |
parent | Releasing progress-linux version 1:10.11.7-4~progress7.99u1. (diff) | |
download | mariadb-347c164c35eddab388009470e6848cb361ac93f8.tar.xz mariadb-347c164c35eddab388009470e6848cb361ac93f8.zip |
Merging upstream version 1:10.11.8.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | mysys/CMakeLists.txt | 34 | ||||
-rw-r--r-- | mysys/crc32/crc32_arm64.c | 338 | ||||
-rw-r--r-- | mysys/crc32/crc32_x86.c | 16 | ||||
-rw-r--r-- | mysys/crc32/crc32c.cc | 261 | ||||
-rw-r--r-- | mysys/crc32/crc32c_amd64.cc | 8 | ||||
-rw-r--r-- | mysys/crc32/crc32c_ppc.h | 3 | ||||
-rw-r--r-- | mysys/crc32/crc32c_x86.cc | 457 | ||||
-rw-r--r-- | mysys/crc32/crc_ppc64.h | 7 | ||||
-rw-r--r-- | mysys/crc32ieee.cc | 19 | ||||
-rw-r--r-- | mysys/errors.c | 9 | ||||
-rw-r--r-- | mysys/lf_alloc-pin.c | 7 | ||||
-rw-r--r-- | mysys/mf_tempfile.c | 37 | ||||
-rw-r--r-- | mysys/my_bitmap.c | 645 | ||||
-rw-r--r-- | mysys/my_getopt.c | 84 | ||||
-rw-r--r-- | mysys/my_thr_init.c | 8 |
15 files changed, 1220 insertions, 713 deletions
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 758243df..ab6d9edf 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -14,7 +14,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA -INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys) +INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys) SET(MYSYS_SOURCES array.c charset-def.c charset.c my_default.c get_password.c @@ -59,29 +59,21 @@ IF (WIN32) ENDIF() IF(MSVC_INTEL) - SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c crc32/crc32c_x86.cc) IF(CMAKE_SIZEOF_VOID_P EQUAL 8) SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc) ENDIF() - ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL) - IF(CLANG_CL) - SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") - ENDIF() ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686") - MY_CHECK_CXX_COMPILER_FLAG(-msse4.2) - MY_CHECK_CXX_COMPILER_FLAG(-mpclmul) - CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) - CHECK_INCLUDE_FILE(x86intrin.h HAVE_X86INTRIN_H) - IF(have_CXX__msse4.2 AND HAVE_CPUID_H) - ADD_DEFINITIONS(-DHAVE_SSE42) - IF (have_CXX__mpclmul AND HAVE_X86INTRIN_H) - ADD_DEFINITIONS(-DHAVE_PCLMUL) - SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c) - SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") - IF(CMAKE_SIZEOF_VOID_P EQUAL 8) - SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc) - SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") - ENDIF() + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c crc32/crc32c_x86.cc) + IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "5") + SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES + COMPILE_FLAGS "-msse4.2 -mpclmul") + ENDIF() + IF(CMAKE_SIZEOF_VOID_P EQUAL 8) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc) + IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "5") + SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES + COMPILE_FLAGS "-msse4.2 -mpclmul") ENDIF() ENDIF() ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") @@ -164,7 +156,7 @@ ENDIF() ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES}) MAYBE_DISABLE_IPO(mysys) -TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY} +TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARIES} ${LIBNSL} ${LIBM} ${LIBRT} ${CMAKE_DL_LIBS} ${LIBSOCKET} ${LIBEXECINFO}) DTRACE_INSTRUMENT(mysys) diff --git a/mysys/crc32/crc32_arm64.c b/mysys/crc32/crc32_arm64.c index 0e70c218..6588606a 100644 --- a/mysys/crc32/crc32_arm64.c +++ b/mysys/crc32/crc32_arm64.c @@ -1,13 +1,18 @@ #include <my_global.h> #include <string.h> #include <stdint.h> +#include <stddef.h> -static int pmull_supported; +typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t); -#if defined(HAVE_ARMV8_CRC) +#ifdef HAVE_ARMV8_CRC -#if defined(__APPLE__) -#include <sys/sysctl.h> +# ifdef HAVE_ARMV8_CRYPTO +static unsigned crc32c_aarch64_pmull(unsigned, const void *, size_t); +# endif + +# ifdef __APPLE__ +# include <sys/sysctl.h> int crc32_aarch64_available(void) { @@ -18,17 +23,17 @@ int crc32_aarch64_available(void) return ret; } -const char *crc32c_aarch64_available(void) +my_crc32_t crc32c_aarch64_available(void) { - if (crc32_aarch64_available() == 0) - return NULL; - pmull_supported = 1; - return "Using ARMv8 crc32 + pmull instructions"; +# ifdef HAVE_ARMV8_CRYPTO + if (crc32_aarch64_available()) + return crc32c_aarch64_pmull; +# endif + return NULL; } - -#else -#include <sys/auxv.h> -#if defined(__FreeBSD__) +# else +# include <sys/auxv.h> +# ifdef __FreeBSD__ static unsigned long getauxval(unsigned int key) { unsigned long val; @@ -36,17 +41,17 @@ static unsigned long getauxval(unsigned int key) return 0ul; return val; } -#else -# include <asm/hwcap.h> -#endif +# else +# include <asm/hwcap.h> +# endif -#ifndef HWCAP_CRC32 -# define HWCAP_CRC32 (1 << 7) -#endif +# ifndef HWCAP_CRC32 +# define HWCAP_CRC32 (1 << 7) +# endif -#ifndef HWCAP_PMULL -# define HWCAP_PMULL (1 << 4) -#endif +# ifndef HWCAP_PMULL +# define HWCAP_PMULL (1 << 4) +# endif /* ARM made crc32 default from ARMv8.1 but optional in ARMv8A * Runtime check API. @@ -56,22 +61,37 @@ int crc32_aarch64_available(void) unsigned long auxv= getauxval(AT_HWCAP); return (auxv & HWCAP_CRC32) != 0; } +# endif + +# ifndef __APPLE__ +static unsigned crc32c_aarch64(unsigned, const void *, size_t); -const char *crc32c_aarch64_available(void) +my_crc32_t crc32c_aarch64_available(void) { unsigned long auxv= getauxval(AT_HWCAP); - if (!(auxv & HWCAP_CRC32)) return NULL; +# ifdef HAVE_ARMV8_CRYPTO + /* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030). */ + if (auxv & HWCAP_PMULL) + return crc32c_aarch64_pmull; +# endif + return crc32c_aarch64; +} +# endif - pmull_supported= (auxv & HWCAP_PMULL) != 0; - if (pmull_supported) +const char *crc32c_aarch64_impl(my_crc32_t c) +{ +# ifdef HAVE_ARMV8_CRYPTO + if (c == crc32c_aarch64_pmull) return "Using ARMv8 crc32 + pmull instructions"; - else +# endif +# ifndef __APPLE__ + if (c == crc32c_aarch64) return "Using ARMv8 crc32 instructions"; +# endif + return NULL; } - -#endif /* __APPLE__ */ #endif /* HAVE_ARMV8_CRC */ #ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS @@ -157,131 +177,14 @@ asm(".arch_extension crypto"); PREF4X64L2(buffer,(PREF_OFFSET), 8) \ PREF4X64L2(buffer,(PREF_OFFSET), 12) -uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len) +#ifndef __APPLE__ +static unsigned crc32c_aarch64(unsigned crc, const void *buf, size_t len) { - uint32_t crc0, crc1, crc2; int64_t length= (int64_t)len; + const unsigned char *buffer= buf; crc^= 0xffffffff; - /* Pmull runtime check here. - * Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030). - * - * Consider the condition that the target platform does support hardware crc32 - * but not support PMULL. In this condition, it should leverage the aarch64 - * crc32 instruction (__crc32c) and just only skip parallel computation (pmull/vmull) - * rather than skip all hardware crc32 instruction of computation. - */ - if (pmull_supported) - { -/* The following Macro (HAVE_ARMV8_CRYPTO) is used for compiling check */ -#ifdef HAVE_ARMV8_CRYPTO - -/* Crypto extension Support - * Parallel computation with 1024 Bytes (per block) - * Intrinsics Support - */ -# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS - const poly64_t k1= 0xe417f38a, k2= 0x8f158014; - uint64_t t0, t1; - - /* Process per block size of 1024 Bytes - * A block size = 8 + 42*3*sizeof(uint64_t) + 8 - */ - while ((length-= 1024) >= 0) - { - /* Prefetch 3*1024 data for avoiding L2 cache miss */ - PREF1KL2(buffer, 1024*3); - /* Do first 8 bytes here for better pipelining */ - crc0= __crc32cd(crc, *(const uint64_t *)buffer); - crc1= 0; - crc2= 0; - buffer+= sizeof(uint64_t); - - /* Process block inline - * Process crc0 last to avoid dependency with above - */ - CRC32C7X3X8(buffer, 0); - CRC32C7X3X8(buffer, 1); - CRC32C7X3X8(buffer, 2); - CRC32C7X3X8(buffer, 3); - CRC32C7X3X8(buffer, 4); - CRC32C7X3X8(buffer, 5); - - buffer+= 42*3*sizeof(uint64_t); - /* Prefetch data for following block to avoid L1 cache miss */ - PREF1KL1(buffer, 1024); - - /* Last 8 bytes - * Merge crc0 and crc1 into crc2 - * crc1 multiply by K2 - * crc0 multiply by K1 - */ - t1= (uint64_t)vmull_p64(crc1, k2); - t0= (uint64_t)vmull_p64(crc0, k1); - crc= __crc32cd(crc2, *(const uint64_t *)buffer); - crc1= __crc32cd(0, t1); - crc^= crc1; - crc0= __crc32cd(0, t0); - crc^= crc0; - - buffer+= sizeof(uint64_t); - } - -# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ - - /*No intrinsics*/ - __asm__("mov x16, #0xf38a \n\t" - "movk x16, #0xe417, lsl 16 \n\t" - "mov v1.2d[0], x16 \n\t" - "mov x16, #0x8014 \n\t" - "movk x16, #0x8f15, lsl 16 \n\t" - "mov v0.2d[0], x16 \n\t" - :::"x16"); - - while ((length-= 1024) >= 0) - { - PREF1KL2(buffer, 1024*3); - __asm__("crc32cx %w[c0], %w[c], %x[v]\n\t" - :[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):); - crc1= 0; - crc2= 0; - buffer+= sizeof(uint64_t); - - CRC32C7X3X8(buffer, 0); - CRC32C7X3X8(buffer, 1); - CRC32C7X3X8(buffer, 2); - CRC32C7X3X8(buffer, 3); - CRC32C7X3X8(buffer, 4); - CRC32C7X3X8(buffer, 5); - - buffer+= 42*3*sizeof(uint64_t); - PREF1KL1(buffer, 1024); - __asm__("mov v2.2d[0], %x[c1] \n\t" - "pmull v2.1q, v2.1d, v0.1d \n\t" - "mov v3.2d[0], %x[c0] \n\t" - "pmull v3.1q, v3.1d, v1.1d \n\t" - "crc32cx %w[c], %w[c2], %x[v] \n\t" - "mov %x[c1], v2.2d[0] \n\t" - "crc32cx %w[c1], wzr, %x[c1] \n\t" - "eor %w[c], %w[c], %w[c1] \n\t" - "mov %x[c0], v3.2d[0] \n\t" - "crc32cx %w[c0], wzr, %x[c0] \n\t" - "eor %w[c], %w[c], %w[c0] \n\t" - :[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc) - :[v]"r"(*((const uint64_t *)buffer))); - buffer+= sizeof(uint64_t); - } -# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ - - /* Done if Input data size is aligned with 1024 */ - if (!(length+= 1024)) - return ~crc; - -#endif /* HAVE_ARMV8_CRYPTO */ - - } // end if pmull_supported - while ((length-= sizeof(uint64_t)) >= 0) { CRC32CX(crc, *(uint64_t *)buffer); @@ -306,6 +209,143 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len) return ~crc; } +#endif + +#ifdef HAVE_ARMV8_CRYPTO +static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len) +{ + int64_t length= (int64_t)len; + const unsigned char *buffer= buf; + + crc^= 0xffffffff; + + /* Crypto extension Support + * Parallel computation with 1024 Bytes (per block) + * Intrinsics Support + */ +# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS + /* Process per block size of 1024 Bytes + * A block size = 8 + 42*3*sizeof(uint64_t) + 8 + */ + for (const poly64_t k1= 0xe417f38a, k2= 0x8f158014; (length-= 1024) >= 0; ) + { + uint32_t crc0, crc1, crc2; + uint64_t t0, t1; + /* Prefetch 3*1024 data for avoiding L2 cache miss */ + PREF1KL2(buffer, 1024*3); + /* Do first 8 bytes here for better pipelining */ + crc0= __crc32cd(crc, *(const uint64_t *)buffer); + crc1= 0; + crc2= 0; + buffer+= sizeof(uint64_t); + + /* Process block inline + * Process crc0 last to avoid dependency with above + */ + CRC32C7X3X8(buffer, 0); + CRC32C7X3X8(buffer, 1); + CRC32C7X3X8(buffer, 2); + CRC32C7X3X8(buffer, 3); + CRC32C7X3X8(buffer, 4); + CRC32C7X3X8(buffer, 5); + + buffer+= 42*3*sizeof(uint64_t); + /* Prefetch data for following block to avoid L1 cache miss */ + PREF1KL1(buffer, 1024); + + /* Last 8 bytes + * Merge crc0 and crc1 into crc2 + * crc1 multiply by K2 + * crc0 multiply by K1 + */ + t1= (uint64_t)vmull_p64(crc1, k2); + t0= (uint64_t)vmull_p64(crc0, k1); + crc= __crc32cd(crc2, *(const uint64_t *)buffer); + crc1= __crc32cd(0, t1); + crc^= crc1; + crc0= __crc32cd(0, t0); + crc^= crc0; + + buffer+= sizeof(uint64_t); + } + +# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ + /*No intrinsics*/ + __asm__("mov x16, #0xf38a \n\t" + "movk x16, #0xe417, lsl 16 \n\t" + "mov v1.2d[0], x16 \n\t" + "mov x16, #0x8014 \n\t" + "movk x16, #0x8f15, lsl 16 \n\t" + "mov v0.2d[0], x16 \n\t" + :::"x16"); + + while ((length-= 1024) >= 0) + { + uint32_t crc0, crc1, crc2; + + PREF1KL2(buffer, 1024*3); + __asm__("crc32cx %w[c0], %w[c], %x[v]\n\t" + :[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):); + crc1= 0; + crc2= 0; + buffer+= sizeof(uint64_t); + + CRC32C7X3X8(buffer, 0); + CRC32C7X3X8(buffer, 1); + CRC32C7X3X8(buffer, 2); + CRC32C7X3X8(buffer, 3); + CRC32C7X3X8(buffer, 4); + CRC32C7X3X8(buffer, 5); + + buffer+= 42*3*sizeof(uint64_t); + PREF1KL1(buffer, 1024); + __asm__("mov v2.2d[0], %x[c1] \n\t" + "pmull v2.1q, v2.1d, v0.1d \n\t" + "mov v3.2d[0], %x[c0] \n\t" + "pmull v3.1q, v3.1d, v1.1d \n\t" + "crc32cx %w[c], %w[c2], %x[v] \n\t" + "mov %x[c1], v2.2d[0] \n\t" + "crc32cx %w[c1], wzr, %x[c1] \n\t" + "eor %w[c], %w[c], %w[c1] \n\t" + "mov %x[c0], v3.2d[0] \n\t" + "crc32cx %w[c0], wzr, %x[c0] \n\t" + "eor %w[c], %w[c], %w[c0] \n\t" + :[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc) + :[v]"r"(*((const uint64_t *)buffer))); + buffer+= sizeof(uint64_t); + } +# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ + + /* Done if Input data size is aligned with 1024 */ + length+= 1024; + if (length) + { + while ((length-= sizeof(uint64_t)) >= 0) + { + CRC32CX(crc, *(uint64_t *)buffer); + buffer+= sizeof(uint64_t); + } + + /* The following is more efficient than the straight loop */ + if (length & sizeof(uint32_t)) + { + CRC32CW(crc, *(uint32_t *)buffer); + buffer+= sizeof(uint32_t); + } + + if (length & sizeof(uint16_t)) + { + CRC32CH(crc, *(uint16_t *)buffer); + buffer+= sizeof(uint16_t); + } + + if (length & sizeof(uint8_t)) + CRC32CB(crc, *buffer); + } + + return ~crc; +} +#endif /* HAVE_ARMV8_CRYPTO */ /* There are multiple approaches to calculate crc. Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes diff --git a/mysys/crc32/crc32_x86.c b/mysys/crc32/crc32_x86.c index f077399c..ab2522d6 100644 --- a/mysys/crc32/crc32_x86.c +++ b/mysys/crc32/crc32_x86.c @@ -56,11 +56,16 @@ #include <stddef.h> #ifdef __GNUC__ -#include <x86intrin.h> +# include <emmintrin.h> +# include <smmintrin.h> +# include <tmmintrin.h> +# include <wmmintrin.h> +# define USE_PCLMUL __attribute__((target("sse4.2,pclmul"))) #elif defined(_MSC_VER) -#include <intrin.h> +# include <intrin.h> +# define USE_PCLMUL /* nothing */ #else -#error "unknown compiler" +# error "unknown compiler" #endif /** @@ -71,6 +76,7 @@ * * @return \a reg << (\a num * 8) */ +USE_PCLMUL static inline __m128i xmm_shift_left(__m128i reg, const unsigned int num) { static const MY_ALIGNED(16) uint8_t crc_xmm_shift_tab[48]= { @@ -111,6 +117,7 @@ struct crcr_pclmulqdq_ctx * * @return New 16 byte folded data */ +USE_PCLMUL static inline __m128i crcr32_folding_round(const __m128i data_block, const __m128i precomp, const __m128i fold) { @@ -128,6 +135,7 @@ static inline __m128i crcr32_folding_round(const __m128i data_block, * * @return data reduced to 64 bits */ +USE_PCLMUL static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i precomp) { __m128i tmp0, tmp1, tmp2; @@ -152,6 +160,7 @@ static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i pre * * @return data reduced to 32 bits */ +USE_PCLMUL static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i precomp) { static const MY_ALIGNED(16) uint32_t mask1[4]= { @@ -188,6 +197,7 @@ static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i prec * * @return CRC for given \a data block (32 bits wide). */ +USE_PCLMUL static inline uint32_t crcr32_calc_pclmulqdq(const uint8_t *data, uint32_t data_len, uint32_t crc, const struct crcr_pclmulqdq_ctx *params) diff --git a/mysys/crc32/crc32c.cc b/mysys/crc32/crc32c.cc index 2bec041e..32a45478 100644 --- a/mysys/crc32/crc32c.cc +++ b/mysys/crc32/crc32c.cc @@ -19,52 +19,23 @@ #include <stddef.h> #include <stdint.h> #include <my_global.h> -#include <my_byteorder.h> -static inline uint32_t DecodeFixed32(const char *ptr) -{ - return uint4korr(ptr); -} - -#include <stdint.h> -#ifdef _MSC_VER -#include <intrin.h> -#endif - -#ifdef HAVE_SSE42 -# ifdef __GNUC__ -# include <cpuid.h> -# if __GNUC__ < 5 && !defined __clang__ -/* the headers do not really work in GCC before version 5 */ -# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data) -# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data) -# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data) -# else -# include <nmmintrin.h> -# endif -# define USE_SSE42 __attribute__((target("sse4.2"))) -# else -# define USE_SSE42 /* nothing */ -# endif -#endif - #ifdef __powerpc64__ -#include "crc32c_ppc.h" - -#if __linux__ -#include <sys/auxv.h> +# include "crc32c_ppc.h" +# ifdef __linux__ +# include <sys/auxv.h> -#ifndef PPC_FEATURE2_VEC_CRYPTO -#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 -#endif +# ifndef PPC_FEATURE2_VEC_CRYPTO +# define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +# endif -#ifndef AT_HWCAP2 -#define AT_HWCAP2 26 +# ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +# endif +# endif #endif -#endif /* __linux__ */ - -#endif +typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t); namespace mysys_namespace { namespace crc32c { @@ -75,6 +46,7 @@ static int arch_ppc_crc32 = 0; #endif /* __powerpc64__ */ #endif +alignas(CPU_LEVEL1_DCACHE_LINESIZE) static const uint32_t table0_[256] = { 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, @@ -341,8 +313,9 @@ static const uint32_t table3_[256] = { }; // Used to fetch a naturally-aligned 32-bit word in little endian byte-order -static inline uint32_t LE_LOAD32(const uint8_t *p) { - return DecodeFixed32(reinterpret_cast<const char*>(p)); +static inline uint32_t LE_LOAD32(const uint8_t *p) +{ + return uint4korr(reinterpret_cast<const char*>(p)); } static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) @@ -362,10 +335,7 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) table0_[c >> 24]; } -#ifdef ALIGN #undef ALIGN -#endif - // Align n to (1 << m) byte boundary #define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1)) @@ -374,70 +344,30 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) l = table0_[c] ^ (l >> 8); \ } while (0) -static uint32_t crc32c_slow(uint32_t crc, const char* buf, size_t size) -{ - const uint8_t *p = reinterpret_cast<const uint8_t *>(buf); - const uint8_t *e = p + size; - uint64_t l = crc ^ 0xffffffffu; - - // Point x at first 16-byte aligned byte in string. This might be - // just past the end of the string. - const uintptr_t pval = reinterpret_cast<uintptr_t>(p); - const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4)); - if (x <= e) - // Process bytes until finished or p is 16-byte aligned - while (p != x) - STEP1; - // Process bytes 16 at a time - while ((e-p) >= 16) - { - Slow_CRC32(&l, &p); - Slow_CRC32(&l, &p); - } - // Process bytes 8 at a time - while ((e-p) >= 8) - Slow_CRC32(&l, &p); - // Process the last few bytes - while (p != e) - STEP1; - return static_cast<uint32_t>(l ^ 0xffffffffu); -} - -#if defined HAVE_POWER8 -#elif defined HAVE_ARMV8_CRC -#elif defined HAVE_SSE42 -constexpr uint32_t cpuid_ecx_SSE42= 1U << 20; -constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U<<1; - -static uint32_t cpuid_ecx() -{ -#ifdef __GNUC__ - uint32_t reax= 0, rebx= 0, recx= 0, redx= 0; - __cpuid(1, reax, rebx, recx, redx); - return recx; -#elif defined _MSC_VER - int regs[4]; - __cpuid(regs, 1); - return regs[2]; -#else -# error "unknown compiler" +#undef USE_SSE42 +#if defined _MSC_VER && (defined _M_X64 || defined _M_IX86) +# include <intrin.h> +# include <immintrin.h> +# define USE_SSE42 /* nothing */ +#elif defined __GNUC__ && (defined __i386__||defined __x86_64__) +# if __GNUC__ < 5 && !defined __clang_major__ +/* the headers do not really work in GCC before version 5 */ +# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data) +# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data) +# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data) +# else +# include <nmmintrin.h> +# endif +# define USE_SSE42 __attribute__((target("sse4.2"))) #endif -} - -extern "C" int crc32_pclmul_enabled(void) -{ - return !(~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL); -} - -#if SIZEOF_SIZE_T == 8 -extern "C" uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len); -USE_SSE42 +#ifdef USE_SSE42 +# if SIZEOF_SIZE_T == 8 static inline uint64_t LE_LOAD64(const uint8_t *ptr) { return uint8korr(reinterpret_cast<const char*>(ptr)); } -#endif +# endif USE_SSE42 static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) @@ -453,10 +383,11 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) # endif } +extern "C" USE_SSE42 -static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size) +unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size) { - const uint8_t *p = reinterpret_cast<const uint8_t *>(buf); + const uint8_t *p = static_cast<const uint8_t *>(buf); const uint8_t *e = p + size; uint64_t l = crc ^ 0xffffffffu; @@ -484,107 +415,111 @@ static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size) } #endif -typedef uint32_t (*Function)(uint32_t, const char*, size_t); +static unsigned crc32c_slow(unsigned crc, const void* buf, size_t size) +{ + const uint8_t *p = static_cast<const uint8_t *>(buf); + const uint8_t *e = p + size; + uint64_t l = crc ^ 0xffffffffu; -#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) -uint32_t ExtendPPCImpl(uint32_t crc, const char *buf, size_t size) { - return crc32c_ppc(crc, (const unsigned char *)buf, size); + // Point x at first 16-byte aligned byte in string. This might be + // just past the end of the string. + const uintptr_t pval = reinterpret_cast<uintptr_t>(p); + const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4)); + if (x <= e) + // Process bytes until finished or p is 16-byte aligned + while (p != x) + STEP1; + // Process bytes 16 at a time + while ((e-p) >= 16) + { + Slow_CRC32(&l, &p); + Slow_CRC32(&l, &p); + } + // Process bytes 8 at a time + while ((e-p) >= 8) + Slow_CRC32(&l, &p); + // Process the last few bytes + while (p != e) + STEP1; + return static_cast<uint32_t>(l ^ 0xffffffffu); } -#if __linux__ +#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) +# ifdef __linux__ static int arch_ppc_probe(void) { arch_ppc_crc32 = 0; -#if defined(__powerpc64__) +# if defined(__powerpc64__) if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1; -#endif /* __powerpc64__ */ +# endif /* __powerpc64__ */ return arch_ppc_crc32; } -#elif __FreeBSD_version >= 1200000 -#include <machine/cpu.h> -#include <sys/auxv.h> -#include <sys/elf_common.h> +# elif defined __FreeBSD_version && __FreeBSD_version >= 1200000 +# include <machine/cpu.h> +# include <sys/auxv.h> +# include <sys/elf_common.h> static int arch_ppc_probe(void) { unsigned long cpufeatures; arch_ppc_crc32 = 0; -#if defined(__powerpc64__) +# if defined(__powerpc64__) elf_aux_info(AT_HWCAP2, &cpufeatures, sizeof(cpufeatures)); if (cpufeatures & PPC_FEATURE2_HAS_VEC_CRYPTO) arch_ppc_crc32 = 1; -#endif /* __powerpc64__ */ +# endif /* __powerpc64__ */ return arch_ppc_crc32; } -#elif defined(_AIX) || defined(__OpenBSD__) +# elif defined(_AIX) || defined(__OpenBSD__) static int arch_ppc_probe(void) { arch_ppc_crc32 = 0; -#if defined(__powerpc64__) +# if defined(__powerpc64__) // AIX 7.1+/OpenBSD has vector crypto features on all POWER 8+ arch_ppc_crc32 = 1; -#endif /* __powerpc64__ */ +# endif /* __powerpc64__ */ return arch_ppc_crc32; } -#endif // __linux__ +# endif #endif #if defined(HAVE_ARMV8_CRC) -extern "C" const char *crc32c_aarch64_available(void); -extern "C" uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len); - -static uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) { - return crc32c_aarch64(crc, (const unsigned char *)buf, (size_t) size); -} +extern "C" my_crc32_t crc32c_aarch64_available(void); +extern "C" const char *crc32c_aarch64_impl(my_crc32_t); +#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86 +extern "C" my_crc32_t crc32c_x86_available(void); +extern "C" const char *crc32c_x86_impl(my_crc32_t); #endif -static inline Function Choose_Extend() +static inline my_crc32_t Choose_Extend() { #if defined HAVE_POWER8 && defined HAS_ALTIVEC if (arch_ppc_probe()) - return ExtendPPCImpl; -#elif defined(HAVE_ARMV8_CRC) - if (crc32c_aarch64_available()) - return ExtendARMImpl; -#elif HAVE_SSE42 -# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8 - switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) { - case cpuid_ecx_SSE42_AND_PCLMUL: - return crc32c_3way; - case cpuid_ecx_SSE42: - return crc32c_sse42; - } -# else - if (cpuid_ecx() & cpuid_ecx_SSE42) - return crc32c_sse42; -# endif + return crc32c_ppc; +#elif defined HAVE_ARMV8_CRC + if (my_crc32_t crc= crc32c_aarch64_available()) + return crc; +#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86 + if (my_crc32_t crc= crc32c_x86_available()) + return crc; #endif return crc32c_slow; } -static const Function ChosenExtend= Choose_Extend(); - -static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size) -{ - return ChosenExtend(crc, buf, size); -} +static const my_crc32_t ChosenExtend= Choose_Extend(); extern "C" const char *my_crc32c_implementation() { -#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) - if (ChosenExtend == ExtendPPCImpl) +#if defined HAVE_POWER8 && defined HAS_ALTIVEC + if (ChosenExtend == crc32c_ppc) return "Using POWER8 crc32 instructions"; -#elif defined(HAVE_ARMV8_CRC) - if (const char *ret= crc32c_aarch64_available()) +#elif defined HAVE_ARMV8_CRC + if (const char *ret= crc32c_aarch64_impl(ChosenExtend)) + return ret; +#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86 + if (const char *ret= crc32c_x86_impl(ChosenExtend)) return ret; -#elif HAVE_SSE42 -# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8 - if (ChosenExtend == crc32c_3way) - return "Using crc32 + pclmulqdq instructions"; -# endif - if (ChosenExtend == crc32c_sse42) - return "Using SSE4.2 crc32 instructions"; #endif return "Using generic crc32 instructions"; } @@ -593,5 +528,5 @@ extern "C" const char *my_crc32c_implementation() extern "C" unsigned my_crc32c(unsigned int crc, const char *buf, size_t size) { - return mysys_namespace::crc32c::Extend(crc,buf, size); + return mysys_namespace::crc32c::ChosenExtend(crc,buf, size); } diff --git a/mysys/crc32/crc32c_amd64.cc b/mysys/crc32/crc32c_amd64.cc index 22c492b4..147c0cca 100644 --- a/mysys/crc32/crc32c_amd64.cc +++ b/mysys/crc32/crc32c_amd64.cc @@ -47,6 +47,11 @@ #include <nmmintrin.h> #include <wmmintrin.h> +#ifdef _MSC_VER +# define USE_PCLMUL /* nothing */ +#else +# define USE_PCLMUL __attribute__((target("sse4.2,pclmul"))) +#endif #define CRCtriplet(crc, buf, offset) \ crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \ @@ -131,6 +136,7 @@ static const uint64_t clmul_constants alignas(16) [] = { }; // Compute the crc32c value for buffer smaller than 8 +USE_PCLMUL static inline void align_to_8( size_t len, uint64_t& crc0, // crc so far, updated on return @@ -155,6 +161,7 @@ static inline void align_to_8( // CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well // chosen constant and xor's these with the remaining CRC. // +USE_PCLMUL static inline uint64_t CombineCRC( size_t block_size, uint64_t crc0, @@ -176,6 +183,7 @@ static inline uint64_t CombineCRC( // Compute CRC-32C using the Intel hardware instruction. extern "C" +USE_PCLMUL uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len) { const unsigned char* next = (const unsigned char*)buf; diff --git a/mysys/crc32/crc32c_ppc.h b/mysys/crc32/crc32c_ppc.h index c359061c..797e849b 100644 --- a/mysys/crc32/crc32c_ppc.h +++ b/mysys/crc32/crc32c_ppc.h @@ -11,8 +11,7 @@ extern "C" { #endif -extern uint32_t crc32c_ppc(uint32_t crc, unsigned char const *buffer, - unsigned len); +extern unsigned crc32c_ppc(unsigned crc, const void *buffer, size_t len); #ifdef __cplusplus } diff --git a/mysys/crc32/crc32c_x86.cc b/mysys/crc32/crc32c_x86.cc new file mode 100644 index 00000000..02dbf292 --- /dev/null +++ b/mysys/crc32/crc32c_x86.cc @@ -0,0 +1,457 @@ +/* Copyright (c) 2024, MariaDB plc + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include <my_global.h> +#include <cstddef> +#include <cstdint> + +#ifdef _MSC_VER +# include <intrin.h> +# if 0 /* So far, we have no environment where this could be tested. */ +# define USE_VPCLMULQDQ /* nothing */ +# endif +#else +# include <cpuid.h> +# if __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 8) +# define TARGET "pclmul,avx512f,avx512dq,avx512bw,avx512vl,vpclmulqdq" +# define USE_VPCLMULQDQ __attribute__((target(TARGET))) +# endif +#endif + +extern "C" unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size); + +constexpr uint32_t cpuid_ecx_SSE42= 1U << 20; +constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U << 1; + +static uint32_t cpuid_ecx() +{ +#ifdef __GNUC__ + uint32_t reax= 0, rebx= 0, recx= 0, redx= 0; + __cpuid(1, reax, rebx, recx, redx); + return recx; +#elif defined _MSC_VER + int regs[4]; + __cpuid(regs, 1); + return regs[2]; +#else +# error "unknown compiler" +#endif +} + +typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t); +extern "C" unsigned int crc32_pclmul(unsigned int, const void *, size_t); +extern "C" unsigned int crc32c_3way(unsigned int, const void *, size_t); + +#ifdef USE_VPCLMULQDQ +# include <immintrin.h> + +# ifdef _MSC_VER +/* MSVC does not seem to define this intrinsic for vmovdqa */ +# define _mm_load_epi32(x) *reinterpret_cast<const __m128i*>(x) +# endif + +/* + This implementation is based on + crc32_by16_vclmul_avx512 and crc32_refl_by16_vclmul_avx512 + in https://github.com/intel/intel-ipsec-mb/ with some optimizations. + The // comments in crc32_avx512() correspond to assembler labels. +*/ + +/** table of constants corresponding to a CRC polynomial up to degree 32 */ +struct alignas(64) crc32_tab +{ + const uint64_t b2048[2], b1024[2]; + alignas(64) const uint64_t b896[6]; /* includes b786, b640 */ + const uint64_t b512[2]; + const uint64_t b384[2], b256[2], b128[2], zeropad_for_b384[2]; + const uint64_t b64[2], b32[2]; +}; + +/** ISO 3309 CRC-32 (reflected polynomial 0x04C11DB7); zlib crc32() */ +static const crc32_tab refl32 = { + { 0x00000000e95c1271, 0x00000000ce3371cb }, + { 0x00000000910eeec1, 0x0000000033fff533 }, + { 0x000000000cbec0ed, 0x0000000031f8303f, + 0x0000000057c54819, 0x00000000df068dc2, + 0x00000000ae0b5394, 0x000000001c279815 }, + { 0x000000001d9513d7, 0x000000008f352d95 }, + { 0x00000000af449247, 0x000000003db1ecdc }, + { 0x0000000081256527, 0x00000000f1da05aa }, + { 0x00000000ccaa009e, 0x00000000ae689191 }, + { 0, 0 }, + { 0x00000000ccaa009e, 0x00000000b8bc6765 }, + { 0x00000001f7011640, 0x00000001db710640 } +}; + +/** Castagnoli CRC-32C (reflected polynomial 0x1EDC6F41) */ +static const crc32_tab refl32c = { + { 0x00000000b9e02b86, 0x00000000dcb17aa4 }, + { 0x000000000d3b6092, 0x000000006992cea2 }, + { 0x0000000047db8317, 0x000000002ad91c30, + 0x000000000715ce53, 0x00000000c49f4f67, + 0x0000000039d3b296, 0x00000000083a6eec }, + { 0x000000009e4addf8, 0x00000000740eef02 }, + { 0x00000000ddc0152b, 0x000000001c291d04 }, + { 0x00000000ba4fc28e, 0x000000003da6d0cb }, + { 0x00000000493c7d27, 0x00000000f20c0dfe }, + { 0, 0 }, + { 0x00000000493c7d27, 0x00000000dd45aab8 }, + { 0x00000000dea713f0, 0x0000000105ec76f0 } +}; + +/** Some ternary functions */ +class ternary +{ + static constexpr uint8_t A = 0b11110000; + static constexpr uint8_t B = 0b11001100; + static constexpr uint8_t C = 0b10101010; +public: + static constexpr uint8_t XOR3 = A ^ B ^ C; + static constexpr uint8_t XNOR3 = uint8_t(~(A ^ B ^ C)); + static constexpr uint8_t XOR2_AND = (A ^ B) & C; +}; + +USE_VPCLMULQDQ +/** @return a^b^c */ +static inline __m128i xor3_128(__m128i a, __m128i b, __m128i c) +{ + return _mm_ternarylogic_epi64(a, b, c, ternary::XOR3); +} + +USE_VPCLMULQDQ +/** @return ~(a^b^c) */ +static inline __m128i xnor3_128(__m128i a, __m128i b, __m128i c) +{ + return _mm_ternarylogic_epi64(a, b, c, ternary::XNOR3); +} + +USE_VPCLMULQDQ +/** @return a^b^c */ +static inline __m512i xor3_512(__m512i a, __m512i b, __m512i c) +{ + return _mm512_ternarylogic_epi64(a, b, c, ternary::XOR3); +} + +USE_VPCLMULQDQ +/** @return (a^b)&c */ +static inline __m128i xor2_and_128(__m128i a, __m128i b, __m128i c) +{ + return _mm_ternarylogic_epi64(a, b, c, ternary::XOR2_AND); +} + +USE_VPCLMULQDQ +/** Load 64 bytes */ +static inline __m512i load512(const char *b) { return _mm512_loadu_epi8(b); } + +USE_VPCLMULQDQ +/** Load 16 bytes */ +static inline __m128i load128(const char *b) { return _mm_loadu_epi64(b); } + +/** Combine 512 data bits with CRC */ +USE_VPCLMULQDQ +static inline __m512i combine512(__m512i a, __m512i tab, __m512i b) +{ + return xor3_512(b, _mm512_clmulepi64_epi128(a, tab, 0x01), + _mm512_clmulepi64_epi128(a, tab, 0x10)); +} + +# define xor512(a, b) _mm512_xor_epi64(a, b) +# define xor256(a, b) _mm256_xor_epi64(a, b) +# define xor128(a, b) _mm_xor_epi64(a, b) +# define and128(a, b) _mm_and_si128(a, b) + +template<uint8_t bits> USE_VPCLMULQDQ +/** Pick a 128-bit component of a 512-bit vector */ +static inline __m512i extract512_128(__m512i a) +{ + static_assert(bits <= 3, "usage"); +# if defined __GNUC__ && __GNUC__ >= 11 + /* While technically incorrect, this would seem to translate into a + vextracti32x4 instruction, which actually outputs a ZMM register + (anything above the XMM range is cleared). */ + return _mm512_castsi128_si512(_mm512_extracti64x2_epi64(a, bits)); +# else + /* On clang, this is needed in order to get a correct result. */ + return _mm512_maskz_shuffle_i64x2(3, a, a, bits); +# endif +} + +alignas(16) static const uint64_t shuffle128[4] = { + 0x8786858483828100, 0x8f8e8d8c8b8a8988, + 0x0706050403020100, 0x000e0d0c0b0a0908 +}; + +static const __mmask16 size_mask[16] = { + 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + +alignas(16) static const uint64_t shift128[4] = { + 0x8786858483828100, 0x8f8e8d8c8b8a8988, + 0x0706050403020100, 0x000e0d0c0b0a0908 +}; + +static const char shift_1_to_3_reflect[7 + 11] = { + -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 +}; + +USE_VPCLMULQDQ +static unsigned crc32_avx512(unsigned crc, const char *buf, size_t size, + const crc32_tab &tab) +{ + const __m512i crc_in = _mm512_castsi128_si512(_mm_cvtsi32_si128(~crc)), + b512 = _mm512_broadcast_i32x4(_mm_load_epi32(tab.b512)); + __m128i crc_out; + __m512i lo; + + if (size >= 256) { + lo = xor512(load512(buf), crc_in); + __m512i l1 = load512(buf + 64); + + const __m512i b1024 = _mm512_broadcast_i32x4(_mm_load_epi32(&tab.b1024)); + size -= 256; + if (size >= 256) { + __m512i h0 = load512(buf + 128), + hi = load512(buf + 192); + const __m512i b2048 = _mm512_broadcast_i32x4(_mm_load_epi32(&tab.b2048)); + size -= 256; + do { + buf += 256; + lo = combine512(lo, b2048, load512(buf)); + l1 = combine512(l1, b2048, load512(buf + 64)); + h0 = combine512(h0, b2048, load512(buf + 128)); + hi = combine512(hi, b2048, load512(buf + 192)); + size -= 256; + } while (ssize_t(size) >= 0); + + buf += 256; + lo = combine512(lo, b1024, h0); + l1 = combine512(l1, b1024, hi); + size += 128; + } else { + do { + buf += 128; + lo = combine512(lo, b1024, load512(buf)); + l1 = combine512(l1, b1024, load512(buf + 64)); + size -= 128; + } while (ssize_t(size) >= 0); + + buf += 128; + } + + if (ssize_t(size) >= -64) { + size += 128; + lo = combine512(lo, b512, l1); + goto fold_64_B_loop; + } + + const __m512i + b896 = _mm512_load_epi32(&tab.b896), + b384 = _mm512_load_epi32(&tab.b384); + + __m512i c4 = xor3_512(_mm512_clmulepi64_epi128(lo, b896, 1), + _mm512_clmulepi64_epi128(lo, b896, 0x10), + _mm512_clmulepi64_epi128(l1, b384, 1)); + c4 = xor3_512(c4, _mm512_clmulepi64_epi128(l1, b384, 0x10), + extract512_128<3>(l1)); + + __m256i c2 = _mm512_castsi512_si256(_mm512_shuffle_i64x2(c4, c4, 0b01001110)); + c2 = xor256(c2, _mm512_castsi512_si256(c4)); + crc_out = xor128(_mm256_extracti64x2_epi64(c2, 1), + _mm256_castsi256_si128(c2)); + size += 128 - 16; + goto final_reduction; + } + + __m128i b; + + // less_than_256 + if (size >= 32) { + if (size >= 64) { + lo = xor512(load512(buf), crc_in); + + while (buf += 64, (size -= 64) >= 64) + fold_64_B_loop: + lo = combine512(lo, b512, load512(buf)); + + // reduce_64B + const __m512i b384 = _mm512_load_epi32(&tab.b384); + __m512i crc512 = + xor3_512(_mm512_clmulepi64_epi128(lo, b384, 1), + _mm512_clmulepi64_epi128(lo, b384, 0x10), + extract512_128<3>(lo)); + crc512 = xor512(crc512, _mm512_shuffle_i64x2(crc512, crc512, 0b01001110)); + const __m256i crc256 = _mm512_castsi512_si256(crc512); + crc_out = xor128(_mm256_extracti64x2_epi64(crc256, 1), + _mm256_castsi256_si128(crc256)); + size -= 16; + } else { + // less_than_64 + crc_out = xor128(load128(buf), + _mm512_castsi512_si128(crc_in)); + buf += 16; + size -= 32; + } + + final_reduction: + b = _mm_load_epi32(&tab.b128); + + while (ssize_t(size) >= 0) { + // reduction_loop_16B + crc_out = xor3_128(load128(buf), + _mm_clmulepi64_si128(crc_out, b, 1), + _mm_clmulepi64_si128(crc_out, b, 0x10)); + buf += 16; + size -= 16; + } + // final_reduction_for_128 + + size += 16; + if (size) { + get_last_two_xmms: + const __m128i crc2 = crc_out, d = load128(buf + (size - 16)); + __m128i S = load128(reinterpret_cast<const char*>(shuffle128) + size); + crc_out = _mm_shuffle_epi8(crc_out, S); + S = xor128(S, _mm_set1_epi32(0x80808080)); + crc_out = xor3_128(_mm_blendv_epi8(_mm_shuffle_epi8(crc2, S), d, S), + _mm_clmulepi64_si128(crc_out, b, 1), + _mm_clmulepi64_si128(crc_out, b, 0x10)); + } + + done_128: + __m128i crc_tmp; + b = _mm_load_epi32(&tab.b64); + crc_tmp = xor128(_mm_clmulepi64_si128(crc_out, b, 0x00), + _mm_srli_si128(crc_out, 8)); + crc_out = _mm_slli_si128(crc_tmp, 4); + crc_out = _mm_clmulepi64_si128(crc_out, b, 0x10); + crc_out = xor128(crc_out, crc_tmp); + + barrett: + b = _mm_load_epi32(&tab.b32); + crc_tmp = crc_out; + crc_out = and128(crc_out, _mm_set_epi64x(~0ULL, ~0xFFFFFFFFULL)); + crc_out = _mm_clmulepi64_si128(crc_out, b, 0); + crc_out = xor2_and_128(crc_out, crc_tmp, _mm_set_epi64x(0, ~0ULL)); + crc_out = xnor3_128(crc_out, crc_tmp, + _mm_clmulepi64_si128(crc_out, b, 0x10)); + return _mm_extract_epi32(crc_out, 2); + } else { + // less_than_32 + if (size > 0) { + if (size > 16) { + crc_out = xor128(load128(buf), + _mm512_castsi512_si128(crc_in)); + buf += 16; + size -= 16; + b = _mm_load_epi32(&tab.b128); + goto get_last_two_xmms; + } else if (size < 16) { + crc_out = _mm_maskz_loadu_epi8(size_mask[size - 1], buf); + crc_out = xor128(crc_out, _mm512_castsi512_si128(crc_in)); + + if (size >= 4) { + crc_out = _mm_shuffle_epi8 + (crc_out, + load128(reinterpret_cast<const char*>(shift128) + size)); + goto done_128; + } else { + // only_less_than_4 + /* Shift, zero-filling 5 to 7 of the 8-byte crc_out */ + crc_out = _mm_shuffle_epi8(crc_out, + load128(shift_1_to_3_reflect + size - 1)); + goto barrett; + } + } else { + crc_out = xor128(load128(buf), _mm512_castsi512_si128(crc_in)); + goto done_128; + } + } else + return crc; + } +} + +static ATTRIBUTE_NOINLINE int have_vpclmulqdq() +{ +# ifdef _MSC_VER + int regs[4]; + __cpuidex(regs, 7, 0); + uint32_t ebx = regs[1], ecx = regs[2]; +# else + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + __cpuid_count(7, 0, eax, ebx, ecx, edx); +# endif + return ecx & 1U<<10/*VPCLMULQDQ*/ && + !(~ebx & ((1U<<16/*AVX512F*/ | 1U<<17/*AVX512DQ*/ | + 1U<<30/*AVX512BW*/ | 1U<<31/*AVX512VL*/))); +} + +static unsigned crc32_vpclmulqdq(unsigned crc, const void *buf, size_t size) +{ + return crc32_avx512(crc, static_cast<const char*>(buf), size, refl32); +} + +static unsigned crc32c_vpclmulqdq(unsigned crc, const void *buf, size_t size) +{ + return crc32_avx512(crc, static_cast<const char*>(buf), size, refl32c); +} +#endif + +extern "C" my_crc32_t crc32_pclmul_enabled(void) +{ + if (~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) + return nullptr; +#ifdef USE_VPCLMULQDQ + if (have_vpclmulqdq()) + return crc32_vpclmulqdq; +#endif + return crc32_pclmul; +} + +extern "C" my_crc32_t crc32c_x86_available(void) +{ +#ifdef USE_VPCLMULQDQ + if (have_vpclmulqdq()) + return crc32c_vpclmulqdq; +#endif +#if SIZEOF_SIZE_T == 8 + switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) { + case cpuid_ecx_SSE42_AND_PCLMUL: + return crc32c_3way; + case cpuid_ecx_SSE42: + return crc32c_sse42; + } +#else + if (cpuid_ecx() & cpuid_ecx_SSE42) + return crc32c_sse42; +#endif + return nullptr; +} + +extern "C" const char *crc32c_x86_impl(my_crc32_t c) +{ +#ifdef USE_VPCLMULQDQ + if (c == crc32c_vpclmulqdq) + return "Using AVX512 instructions"; +#endif +#if SIZEOF_SIZE_T == 8 + if (c == crc32c_3way) + return "Using crc32 + pclmulqdq instructions"; +#endif + if (c == crc32c_sse42) + return "Using SSE4.2 crc32 instructions"; + return nullptr; +} diff --git a/mysys/crc32/crc_ppc64.h b/mysys/crc32/crc_ppc64.h index eb9379ab..81bbc16d 100644 --- a/mysys/crc32/crc_ppc64.h +++ b/mysys/crc32/crc_ppc64.h @@ -28,7 +28,7 @@ * any later version, or * b) the Apache License, Version 2.0 */ - +#include <stddef.h> #include <altivec.h> @@ -57,12 +57,13 @@ static unsigned int __attribute__ ((aligned (32))) __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); -unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p, - unsigned long len) +unsigned CRC32_FUNCTION(unsigned crc, const void *buffer, size_t len) { unsigned int prealign; unsigned int tail; + const unsigned char *p = buffer; + #ifdef CRC_XOR crc ^= 0xffffffff; #endif diff --git a/mysys/crc32ieee.cc b/mysys/crc32ieee.cc index 14e8017d..c11bdacf 100644 --- a/mysys/crc32ieee.cc +++ b/mysys/crc32ieee.cc @@ -26,23 +26,22 @@ static unsigned int my_crc32_zlib(unsigned int crc, const void *data, return (unsigned int) crc32(crc, (const Bytef *)data, (unsigned int) len); } -#ifdef HAVE_PCLMUL -extern "C" int crc32_pclmul_enabled(); -extern "C" unsigned int crc32_pclmul(unsigned int, const void *, size_t); -#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC) +typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t); + +#if defined _M_IX86 || defined _M_X64 || defined __i386__ || defined __x86_64__ +extern "C" my_crc32_t crc32_pclmul_enabled(); +#elif defined HAVE_ARMV8_CRC extern "C" int crc32_aarch64_available(); extern "C" unsigned int crc32_aarch64(unsigned int, const void *, size_t); #endif -typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t); - static my_crc32_t init_crc32() { -#ifdef HAVE_PCLMUL - if (crc32_pclmul_enabled()) - return crc32_pclmul; -#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC) +#if defined _M_IX86 || defined _M_X64 || defined __i386__ || defined __x86_64__ + if (my_crc32_t crc= crc32_pclmul_enabled()) + return crc; +#elif defined HAVE_ARMV8_CRC if (crc32_aarch64_available()) return crc32_aarch64; #endif diff --git a/mysys/errors.c b/mysys/errors.c index d88540fe..a8be37bf 100644 --- a/mysys/errors.c +++ b/mysys/errors.c @@ -112,6 +112,13 @@ void init_glob_errs() } #endif +static void my_space_sleep(uint seconds) +{ + sleep(seconds); +} + +void (*my_sleep_for_space)(uint seconds)= my_space_sleep; + void wait_for_free_space(const char *filename, int errors) { if (errors == 0) @@ -123,7 +130,7 @@ void wait_for_free_space(const char *filename, int errors) MYF(ME_BELL | ME_ERROR_LOG | ME_WARNING), MY_WAIT_FOR_USER_TO_FIX_PANIC, MY_WAIT_GIVE_USER_A_MESSAGE * MY_WAIT_FOR_USER_TO_FIX_PANIC ); - (void) sleep(MY_WAIT_FOR_USER_TO_FIX_PANIC); + my_sleep_for_space(MY_WAIT_FOR_USER_TO_FIX_PANIC); } const char **get_global_errmsgs(int nr __attribute__((unused))) diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index fc3f320a..4dc41645 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -291,7 +291,7 @@ static int harvest_pins(LF_PINS *el, struct st_harvester *hv) { for (i= 0; i < LF_PINBOX_PINS; i++) { - void *p= el->pin[i]; + void *p= my_atomic_loadptr((void **)&el->pin[i]); if (p) *hv->granary++= p; } @@ -316,7 +316,7 @@ static int match_pins(LF_PINS *el, void *addr) LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; for (; el < el_end; el++) for (i= 0; i < LF_PINBOX_PINS; i++) - if (el->pin[i] == addr) + if (my_atomic_loadptr((void **)&el->pin[i]) == addr) return 1; return 0; } @@ -501,7 +501,8 @@ void *lf_alloc_new(LF_PINS *pins) { node= allocator->top; lf_pin(pins, 0, node); - } while (node != allocator->top && LF_BACKOFF()); + } while (node != my_atomic_loadptr((void **)(char *)&allocator->top) + && LF_BACKOFF()); if (!node) { node= (void *)my_malloc(key_memory_lf_node, allocator->element_size, diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c index 0f1c6d6b..3393b610 100644 --- a/mysys/mf_tempfile.c +++ b/mysys/mf_tempfile.c @@ -66,7 +66,11 @@ File create_temp_file(char *to, const char *dir, const char *prefix, DBUG_ENTER("create_temp_file"); DBUG_PRINT("enter", ("dir: %s, prefix: %s", dir ? dir : "(null)", prefix)); +#if !defined _WIN32 && defined O_DIRECT + DBUG_ASSERT((mode & (O_EXCL | O_TRUNC | O_CREAT | O_RDWR | O_DIRECT)) == 0); +#else DBUG_ASSERT((mode & (O_EXCL | O_TRUNC | O_CREAT | O_RDWR)) == 0); +#endif mode|= O_TRUNC | O_CREAT | O_RDWR; /* not O_EXCL, see Windows code below */ @@ -118,16 +122,41 @@ File create_temp_file(char *to, const char *dir, const char *prefix, if ((MyFlags & MY_TEMPORARY) && O_TMPFILE_works) { - /* explictly don't use O_EXCL here has it has a different - meaning with O_TMPFILE + /* + explicitly don't use O_EXCL here has it has a different + meaning with O_TMPFILE */ - if ((file= open(dir, (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) >= 0) + const int flags= (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC; + const mode_t open_mode= S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; +# ifdef O_DIRECT + static int O_TMPFILE_works_with_O_DIRECT= O_DIRECT; + const int try_O_DIRECT= mode & O_TMPFILE_works_with_O_DIRECT; + if (try_O_DIRECT) + file= open(dir, flags | O_DIRECT, open_mode); + else +# endif + file= open(dir, flags, open_mode); + + if (file >= 0) { +# ifdef O_DIRECT + O_TMPFILE_works: +# endif my_snprintf(to, FN_REFLEN, "%s/#sql/fd=%d", dir, file); file=my_register_filename(file, to, FILE_BY_O_TMPFILE, EE_CANTCREATEFILE, MyFlags); } +# ifdef O_DIRECT + else if (errno == EINVAL && try_O_DIRECT) + { + file= open(dir, flags, open_mode); + if (file >= 0) + { + O_TMPFILE_works_with_O_DIRECT= 0; + goto O_TMPFILE_works; + } + } +# endif else if (errno == EOPNOTSUPP || errno == EINVAL) { my_printf_error(EE_CANTCREATEFILE, "O_TMPFILE is not supported on %s " diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index 9893c7e4..c9bbcc4b 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -13,17 +13,28 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 + USA + */ /* - Handling of uchar arrays as large bitmaps. + Handling of my_bitmap_map (ulonglong) arrays as large bitmaps. API limitations (or, rather asserted safety assumptions, to encourage correct programming) - * the internal size is a set of 32 bit words + * the internal storage is a set of 64 bit words * the number of bits specified in creation can be any number > 0 + Implementation notes: + * MY_BITMAP includes a pointer, last_word_ptr, to the last word. + The implication is that if one copies bitmaps to another memory + location, one has to call create_last_bit_mask() on the bitmap to + fix the internal pointer. + * The not used part of a the last word should always be 0. + This avoids special handling of the last bitmap in several cases. + This is checked for most calls to bitmap functions. + TODO: Make assembler thread safe versions of these using test-and-set instructions @@ -31,117 +42,97 @@ New version written and test program added and some changes to the interface was made by Mikael Ronstrom 2005, with assistance of Tomas Ulin and Mats Kindahl. + Updated to 64 bits and use my_find_first_bit() to speed up + bitmap_get_next_set() by Monty in 2024 */ #include "mysys_priv.h" #include <my_bitmap.h> #include <m_string.h> #include <my_bit.h> +#include <my_byteorder.h> + + +/* Defines to check bitmaps */ + +#define DBUG_ASSERT_BITMAP(M) \ + DBUG_ASSERT((M)->bitmap); \ + DBUG_ASSERT((M)->n_bits > 0); \ + DBUG_ASSERT((M)->last_word_ptr == (M)->bitmap + no_words_in_map(M)-1); \ + DBUG_ASSERT((*(M)->last_word_ptr & (M)->last_bit_mask) == 0); + +#define DBUG_ASSERT_BITMAP_AND_BIT(M,B) \ + DBUG_ASSERT_BITMAP(M); \ + DBUG_ASSERT((B) < (M)->n_bits); + +#define DBUG_ASSERT_DIFFERENT_BITMAPS(M,N) \ + DBUG_ASSERT_BITMAP(M); \ + DBUG_ASSERT_BITMAP(N); + +#define DBUG_ASSERT_IDENTICAL_BITMAPS(M,N) \ + DBUG_ASSERT_BITMAP(M); \ + DBUG_ASSERT_BITMAP(N); \ + DBUG_ASSERT((M)->n_bits == (N)->n_bits); /* - Create a mask with the upper 'unused' bits set and the lower 'used' - bits clear. The bits within each byte is stored in big-endian order. + Create a mask for the usable bits on the LAST my_bitmap_map position for + a bitmap with 'bits' number of bits. + + The lowest 'bits' bits are set to zero and the rest bits are set to 1. + For (bits & 63) == 0 , 0 is returned as in this case all bits in the + my_bitmap_position are significant. (This example assumes the + storage is ulonglong). + + For 'bits & 63' it will return values from the series + 0, 0xfffffffffffffffe,.... 0x8000000000000000 */ -static inline uchar invers_last_byte_mask(uint bits) -{ - return last_byte_mask(bits) ^ 255; -} - - -void create_last_word_mask(MY_BITMAP *map) -{ - unsigned char const mask= invers_last_byte_mask(map->n_bits); - - /* - The first bytes are to be set to zero since they represent real bits - in the bitvector. The last bytes are set to 0xFF since they represent - bytes not used by the bitvector. Finally the last byte contains bits - as set by the mask above. - */ - unsigned char *ptr= (unsigned char*)&map->last_word_mask; - - map->last_word_ptr= map->bitmap + no_words_in_map(map)-1; - switch (no_bytes_in_map(map) & 3) { - case 1: - map->last_word_mask= ~0U; - ptr[0]= mask; - return; - case 2: - map->last_word_mask= ~0U; - ptr[0]= 0; - ptr[1]= mask; - return; - case 3: - map->last_word_mask= 0U; - ptr[2]= mask; - ptr[3]= 0xFFU; - return; - case 0: - map->last_word_mask= 0U; - ptr[3]= mask; - return; - } +static inline my_bitmap_map last_bit_mask(uint bits) +{ + uint bits_in_last_map= (bits & (my_bitmap_map_bits-1)); + return bits_in_last_map ? ~((1ULL << bits_in_last_map)-1) : 0ULL; } -static inline my_bitmap_map last_word_mask(uint bit) -{ - my_bitmap_map last_word_mask; - uint n_bits= bit + 1; - unsigned char const mask= invers_last_byte_mask(n_bits); - - /* - The first bytes are to be set to zero since they represent real bits - in the bitvector. The last bytes are set to 0xFF since they represent - bytes not used by the bitvector. Finally the last byte contains bits - as set by the mask above. - */ - unsigned char *ptr= (unsigned char*)&last_word_mask; - - switch ((n_bits + 7)/8 & 3) { - case 1: - last_word_mask= ~0U; - ptr[0]= mask; - break; - case 2: - last_word_mask= ~0U; - ptr[0]= 0; - ptr[1]= mask; - break; - case 3: - last_word_mask= 0U; - ptr[2]= mask; - ptr[3]= 0xFFU; - break; - case 0: - last_word_mask= 0U; - ptr[3]= mask; - break; - } - return last_word_mask; -} +/* + Get a mask of the bits that are to be considered as 'on' at location + starting with 'bits'. + This function has _inv in it's name as it's usage is invers compared + to last_bit_mask(). + For (bits & 63) it will return values from the series + 0xffffffffffffffff, 0xfffffffffffffffe,.... 0x8000000000000000 +*/ -static inline uint get_first_set(my_bitmap_map value, uint word_pos) +static inline my_bitmap_map first_bit_mask_inv(uint bits) { - uchar *byte_ptr= (uchar*)&value; - uchar byte_value; - uint byte_pos, bit_pos; + uint bits_in_last_map= (bits & (my_bitmap_map_bits-1)); + return ~((1ULL << bits_in_last_map)-1); +} + + +/* + Update the bitmap's last_word_ptr and last_bit_mask + Also ensure that the last world is all zero to make it + easy to find the next set bit. + + Note that if n_bits is 0, then last_word_ptr will point to + bitmap (safely). The bitmap will not be usable for almost any operation. +*/ - DBUG_ASSERT(value); - for (byte_pos=0; ; byte_pos++, byte_ptr++) +void create_last_bit_mask(MY_BITMAP *map) +{ + my_bitmap_map mask= last_bit_mask(map->n_bits); + map->last_bit_mask= mask; + map->last_word_ptr= map->bitmap + MY_MAX(no_words_in_map(map),1) -1; + if (map->n_bits > 0) { - if ((byte_value= *byte_ptr)) - { - for (bit_pos=0; ; bit_pos++) - if (byte_value & (1 << bit_pos)) - return (word_pos*32) + (byte_pos*8) + bit_pos; - } + *map->last_word_ptr&= ~mask; /* Set not used bits to 0 */ + DBUG_ASSERT_BITMAP(map); } - return MY_BIT_NONE; /* Impossible */ } + /* Initialize a bitmap object. All bits will be set to zero */ @@ -149,17 +140,24 @@ static inline uint get_first_set(my_bitmap_map value, uint word_pos) my_bool my_bitmap_init(MY_BITMAP *map, my_bitmap_map *buf, uint n_bits) { DBUG_ENTER("my_bitmap_init"); + if (!buf) { uint size_in_bytes= bitmap_buffer_size(n_bits); if (!(buf= (my_bitmap_map*) my_malloc(key_memory_MY_BITMAP_bitmap, size_in_bytes, MYF(MY_WME)))) + { + map->bitmap= 0; DBUG_RETURN(1); + } + map->bitmap_allocated= 1; } + else + map->bitmap_allocated= 0; map->bitmap= buf; map->n_bits= n_bits; - create_last_word_mask(map); + create_last_bit_mask(map); bitmap_clear_all(map); DBUG_RETURN(0); } @@ -170,7 +168,8 @@ void my_bitmap_free(MY_BITMAP *map) DBUG_ENTER("my_bitmap_free"); if (map->bitmap) { - my_free(map->bitmap); + if (map->bitmap_allocated) + my_free(map->bitmap); map->bitmap=0; } DBUG_VOID_RETURN; @@ -192,11 +191,14 @@ void my_bitmap_free(MY_BITMAP *map) my_bool bitmap_fast_test_and_set(MY_BITMAP *map, uint bitmap_bit) { - uchar *value= ((uchar*) map->bitmap) + (bitmap_bit / 8); - uchar bit= 1 << ((bitmap_bit) & 7); - uchar res= (*value) & bit; + my_bitmap_map *value, bit, res; + DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit); + + value= map->bitmap + (bitmap_bit/my_bitmap_map_bits); + bit= 1ULL << (bitmap_bit & (my_bitmap_map_bits-1)); + res= *value & bit; *value|= bit; - return res; + return MY_TEST(res); } @@ -215,8 +217,7 @@ my_bool bitmap_fast_test_and_set(MY_BITMAP *map, uint bitmap_bit) my_bool bitmap_test_and_set(MY_BITMAP *map, uint bitmap_bit) { - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(bitmap_bit < map->n_bits); + DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit); return bitmap_fast_test_and_set(map, bitmap_bit); } @@ -235,18 +236,20 @@ my_bool bitmap_test_and_set(MY_BITMAP *map, uint bitmap_bit) my_bool bitmap_fast_test_and_clear(MY_BITMAP *map, uint bitmap_bit) { - uchar *byte= (uchar*) map->bitmap + (bitmap_bit / 8); - uchar bit= 1 << ((bitmap_bit) & 7); - uchar res= (*byte) & bit; - *byte&= ~bit; - return res; + my_bitmap_map *value, bit, res; + DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit); + + value= map->bitmap + (bitmap_bit/my_bitmap_map_bits); + bit= 1ULL << (bitmap_bit & (my_bitmap_map_bits-1)); + res= *value & bit; + *value&= ~bit; + return MY_TEST(res); } my_bool bitmap_test_and_clear(MY_BITMAP *map, uint bitmap_bit) { - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(bitmap_bit < map->n_bits); + DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit); return bitmap_fast_test_and_clear(map, bitmap_bit); } @@ -254,8 +257,8 @@ my_bool bitmap_test_and_clear(MY_BITMAP *map, uint bitmap_bit) uint bitmap_set_next(MY_BITMAP *map) { uint bit_found; - DBUG_ASSERT(map->bitmap); - if ((bit_found= bitmap_get_first(map)) != MY_BIT_NONE) + DBUG_ASSERT_BITMAP(map); + if ((bit_found= bitmap_get_first_clear(map)) != MY_BIT_NONE) bitmap_set_bit(map, bit_found); return bit_found; } @@ -265,58 +268,69 @@ uint bitmap_set_next(MY_BITMAP *map) Set the specified number of bits in the bitmap buffer. @param map [IN] Bitmap - @param prefix_size [IN] Number of bits to be set + @param prefix_size [IN] Number of bits to be set or (uint) ~0 for all */ + void bitmap_set_prefix(MY_BITMAP *map, uint prefix_size) { - uint prefix_bytes, prefix_bits, d; - uchar *m= (uchar *)map->bitmap; - - DBUG_ASSERT(map->bitmap); + uint prefix, prefix_bits; + my_bitmap_map *value= map->bitmap; + DBUG_ASSERT_BITMAP(map); DBUG_ASSERT(prefix_size <= map->n_bits || prefix_size == (uint) ~0); set_if_smaller(prefix_size, map->n_bits); - if ((prefix_bytes= prefix_size / 8)) - memset(m, 0xff, prefix_bytes); - m+= prefix_bytes; - if ((prefix_bits= prefix_size & 7)) + + if ((prefix= prefix_size / my_bitmap_map_bits)) { - *(m++)= (1 << prefix_bits)-1; - // As the prefix bits are set, lets count this byte too as a prefix byte. - prefix_bytes ++; + my_bitmap_map *end= value+prefix; + do + { + *value++= ~(my_bitmap_map) 0; + } while (value < end); } - if ((d= no_bytes_in_map(map)-prefix_bytes)) - memset(m, 0, d); + if ((prefix_bits= prefix_size & (my_bitmap_map_bits-1))) + *value++= (1ULL << prefix_bits)-1; + while (value <= map->last_word_ptr) + *value++= 0; + DBUG_ASSERT_BITMAP(map); } +/** + Check if bitmap is a bitmap of prefix bits set in the beginning + + @param map bitmap + @param prefix_size number of bits that should be set. 0 is allowed. + + @return 1 Yes, prefix bits where set or prefix_size == 0. + @return 0 No +*/ + my_bool bitmap_is_prefix(const MY_BITMAP *map, uint prefix_size) { - uint prefix_mask= last_byte_mask(prefix_size); - uchar *m= (uchar*) map->bitmap; - uchar *end_prefix= m+(prefix_size-1)/8; - uchar *end; - DBUG_ASSERT(m); - DBUG_ASSERT(prefix_size <= map->n_bits); + my_bitmap_map *value= map->bitmap; + my_bitmap_map *end= value+ (prefix_size/my_bitmap_map_bits); + uint prefix_bits; /* Empty prefix is always true */ if (!prefix_size) return 1; - while (m < end_prefix) - if (*m++ != 0xff) - return 0; + DBUG_ASSERT_BITMAP_AND_BIT(map, prefix_size-1); - end= ((uchar*) map->bitmap) + no_bytes_in_map(map) - 1; - if (m == end) - return ((*m & last_byte_mask(map->n_bits)) == prefix_mask); - - if (*m != prefix_mask) - return 0; + while (value < end) + if (*value++ != ~(my_bitmap_map) 0) + return 0; - while (++m < end) - if (*m != 0) + if ((prefix_bits= prefix_size & (my_bitmap_map_bits-1))) + { + if (*value++ != (1ULL << prefix_bits)-1) + return 0; + } + end= map->last_word_ptr; + while (value <= end) + if (*value++ != 0) return 0; - return ((*m & last_byte_mask(map->n_bits)) == 0); + return 1; } @@ -324,10 +338,12 @@ my_bool bitmap_is_set_all(const MY_BITMAP *map) { my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map); + for (; data_ptr < end; data_ptr++) - if (*data_ptr != 0xFFFFFFFF) + if (*data_ptr != ~(my_bitmap_map)0) return FALSE; - return (*data_ptr | map->last_word_mask) == 0xFFFFFFFF; + return (*data_ptr | map->last_bit_mask) == ~(my_bitmap_map)0; } @@ -335,61 +351,58 @@ my_bool bitmap_is_clear_all(const MY_BITMAP *map) { my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map); - DBUG_ASSERT(map->n_bits > 0); - for (; data_ptr < end; data_ptr++) + for (; data_ptr <= end; data_ptr++) if (*data_ptr) return FALSE; - return (*data_ptr & ~map->last_word_mask) == 0; + return TRUE; } + /* Return TRUE if map1 is a subset of map2 */ my_bool bitmap_is_subset(const MY_BITMAP *map1, const MY_BITMAP *map2) { - my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end= map1->last_word_ptr; + DBUG_ASSERT_IDENTICAL_BITMAPS(map1,map2); - DBUG_ASSERT(map1->bitmap && map2->bitmap); - DBUG_ASSERT(map1->n_bits==map2->n_bits); - - end= map1->last_word_ptr; - while (m1 < end) + while (m1 <= end) { if ((*m1++) & ~(*m2++)) return 0; } - /* here both maps have the same number of bits - see assert above */ - return ((*m1 & ~*m2 & ~map1->last_word_mask) ? 0 : 1); + return 1; } /* True if bitmaps has any common bits */ my_bool bitmap_is_overlapping(const MY_BITMAP *map1, const MY_BITMAP *map2) { - my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end= map1->last_word_ptr; + DBUG_ASSERT_IDENTICAL_BITMAPS(map1,map2); - DBUG_ASSERT(map1->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map1->n_bits==map2->n_bits); - - end= map1->last_word_ptr; - while (m1 < end) + while (m1 <= end) { if ((*m1++) & (*m2++)) return 1; } - /* here both maps have the same number of bits - see assert above */ - return ((*m1 & *m2 & ~map1->last_word_mask) ? 1 : 0); + return 0; } +/* + Create intersection of two bitmaps + + @param map map1. Result is stored here + @param map2 map2 +*/ + void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2) { my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; uint len= no_words_in_map(map), len2 = no_words_in_map(map2); - - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(map2->bitmap); + DBUG_ASSERT_DIFFERENT_BITMAPS(map,map2); end= to+MY_MIN(len,len2); while (to < end) @@ -397,7 +410,7 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2) if (len2 <= len) { - to[-1]&= ~map2->last_word_mask; /* Clear last not relevant bits */ + to[-1]&= ~map2->last_bit_mask; /* Clear last not relevant bits */ end+= len-len2; while (to < end) *to++= 0; @@ -407,50 +420,51 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2) /* Check if there is some bit index between start_bit and end_bit, such that - this is bit is set for all bitmaps in bitmap_list. + this is at least on bit that set for all bitmaps in bitmap_list. SYNOPSIS bitmap_exists_intersection() bitmpap_array [in] a set of MY_BITMAPs - bitmap_count [in] number of elements in bitmpap_array + bitmap_count [in] number of elements in bitmap_array start_bit [in] beginning (inclusive) of the range of bits to search end_bit [in] end (inclusive) of the range of bits to search, must be no bigger than the bits of the shortest bitmap. - NOTES - This function assumes that for at least one of the bitmaps in bitmap_array all - bits outside the range [start_bit, end_bit] are 0. As a result is not - necessary to take care of the bits outside the range [start_bit, end_bit]. - RETURN TRUE if an intersecion exists FALSE no intersection */ -my_bool bitmap_exists_intersection(const MY_BITMAP **bitmap_array, +my_bool bitmap_exists_intersection(MY_BITMAP **bitmap_array, uint bitmap_count, uint start_bit, uint end_bit) { uint i, j, start_idx, end_idx; - my_bitmap_map cur_res; + my_bitmap_map cur_res, first_map; DBUG_ASSERT(bitmap_count); DBUG_ASSERT(end_bit >= start_bit); for (j= 0; j < bitmap_count; j++) - DBUG_ASSERT(end_bit < bitmap_array[j]->n_bits); + { + DBUG_ASSERT_BITMAP_AND_BIT(bitmap_array[j], end_bit); + } start_idx= start_bit/8/sizeof(my_bitmap_map); end_idx= end_bit/8/sizeof(my_bitmap_map); + first_map= first_bit_mask_inv(start_bit); + cur_res= first_map; for (i= start_idx; i < end_idx; i++) { - cur_res= ~0; for (j= 0; cur_res && j < bitmap_count; j++) cur_res &= bitmap_array[j]->bitmap[i]; if (cur_res) return TRUE; + cur_res= ~(my_bitmap_map) 0; } - cur_res= ~last_word_mask(end_bit); + cur_res= ~last_bit_mask(end_bit+1); + if (start_idx == end_idx) + cur_res&= first_map; for (j= 0; cur_res && j < bitmap_count; j++) cur_res &= bitmap_array[j]->bitmap[end_idx]; return cur_res != 0; @@ -461,60 +475,21 @@ my_bool bitmap_exists_intersection(const MY_BITMAP **bitmap_array, my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2) { - my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end= map1->last_word_ptr; + DBUG_ASSERT_IDENTICAL_BITMAPS(map1,map2); - DBUG_ASSERT(map1->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map1->n_bits==map2->n_bits); - end= map1->last_word_ptr; while ( m1 < end) - if ((*m1++ | *m2++) != 0xFFFFFFFF) + if ((*m1++ | *m2++) != ~(my_bitmap_map)0) return FALSE; /* here both maps have the same number of bits - see assert above */ - return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFF); -} - - - -/* - Set/clear all bits above a bit. - - SYNOPSIS - bitmap_set_above() - map RETURN The bitmap to change. - from_byte The bitmap buffer byte offset to start with. - use_bit The bit value (1/0) to use for all upper bits. - - NOTE - You can only set/clear full bytes. - The function is meant for the situation that you copy a smaller bitmap - to a bigger bitmap. Bitmap lengths are always multiple of eigth (the - size of a byte). Using 'from_byte' saves multiplication and division - by eight during parameter passing. - - RETURN - void -*/ - -void bitmap_set_above(MY_BITMAP *map, uint from_byte, uint use_bit) -{ - uchar use_byte= use_bit ? 0xff : 0; - uchar *to= (uchar *)map->bitmap + from_byte; - uchar *end= (uchar *)map->bitmap + (map->n_bits+7)/8; - - while (to < end) - *to++= use_byte; + return ((*m1 | *m2 | map1->last_bit_mask) != ~(my_bitmap_map)0); } void bitmap_subtract(MY_BITMAP *map, const MY_BITMAP *map2) { - my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map->n_bits==map2->n_bits); - - end= map->last_word_ptr; + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_IDENTICAL_BITMAPS(map,map2); while (to <= end) *to++ &= ~(*from++); @@ -523,12 +498,8 @@ void bitmap_subtract(MY_BITMAP *map, const MY_BITMAP *map2) void bitmap_union(MY_BITMAP *map, const MY_BITMAP *map2) { - my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; - - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map->n_bits == map2->n_bits); - end= map->last_word_ptr; + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_IDENTICAL_BITMAPS(map,map2); while (to <= end) *to++ |= *from++; @@ -538,9 +509,8 @@ void bitmap_union(MY_BITMAP *map, const MY_BITMAP *map2) void bitmap_xor(MY_BITMAP *map, const MY_BITMAP *map2) { my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr; - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map->n_bits == map2->n_bits); + DBUG_ASSERT_IDENTICAL_BITMAPS(map,map2); + while (to <= end) *to++ ^= *from++; } @@ -548,13 +518,14 @@ void bitmap_xor(MY_BITMAP *map, const MY_BITMAP *map2) void bitmap_invert(MY_BITMAP *map) { - my_bitmap_map *to= map->bitmap, *end; + my_bitmap_map *to= map->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map); - DBUG_ASSERT(map->bitmap); - end= map->last_word_ptr; + while (to < end) + *to++ ^= ~(my_bitmap_map)0; - while (to <= end) - *to++ ^= 0xFFFFFFFF; + *to ^= (~(my_bitmap_map)0 & ~map->last_bit_mask); + DBUG_ASSERT_BITMAP(map); } @@ -563,45 +534,54 @@ uint bitmap_bits_set(const MY_BITMAP *map) my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end= map->last_word_ptr; uint res= 0; - DBUG_ASSERT(map->bitmap); + DBUG_ASSERT_BITMAP(map); - for (; data_ptr < end; data_ptr++) - res+= my_count_bits_uint32(*data_ptr); + for (; data_ptr <= end; data_ptr++) + res+= my_count_bits(*data_ptr); - /*Reset last bits to zero*/ - res+= my_count_bits_uint32(*map->last_word_ptr & ~map->last_word_mask); return res; } -void bitmap_copy(MY_BITMAP *map, const MY_BITMAP *map2) -{ - my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map->n_bits == map2->n_bits); - end= map->last_word_ptr; +/** + Copy bitmaps - while (to <= end) - *to++ = *from++; + @param map1 to-bitmap + @param map2 from-bitmap + + @notes + Code will work even of the bitmaps are of different size. + In this case, only up to to->n_bits will be copied. +*/ + +void bitmap_copy(MY_BITMAP *map1, const MY_BITMAP *map2) +{ + my_bitmap_map *to= map1->bitmap, *from= map2->bitmap; + uint map1_length= no_words_in_map(map1)*sizeof(my_bitmap_map); + uint map2_length= no_words_in_map(map2)*sizeof(my_bitmap_map); + uint length= MY_MIN(map1_length, map2_length); + DBUG_ASSERT_DIFFERENT_BITMAPS(map1,map2); + + memcpy(to, from, length); + if (length < map1_length) + bzero(to + length, map1_length - length); + *map1->last_word_ptr&= ~map1->last_bit_mask; } +/* + Find first set bit in the bitmap +*/ + uint bitmap_get_first_set(const MY_BITMAP *map) { - uint i; my_bitmap_map *data_ptr= map->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map); - DBUG_ASSERT(map->bitmap); - - for (i=0; data_ptr < end; data_ptr++, i++) + for (uint i=0; data_ptr <= end; data_ptr++, i++) if (*data_ptr) - goto found; - if (!(*data_ptr & ~map->last_word_mask)) - return MY_BIT_NONE; - -found: - return get_first_set(*data_ptr, i); + return my_find_first_bit(*data_ptr) + i * sizeof(my_bitmap_map)*8; + return MY_BIT_NONE; } @@ -616,80 +596,113 @@ found: uint bitmap_get_next_set(const MY_BITMAP *map, uint bitmap_bit) { - uint word_pos, byte_to_mask, i; - union { my_bitmap_map bitmap ; uchar bitmap_buff[sizeof(my_bitmap_map)]; } - first_word; - uchar *ptr= &first_word.bitmap_buff[0]; - my_bitmap_map *data_ptr, *end= map->last_word_ptr; - - DBUG_ASSERT(map->bitmap); + uint word_pos; + my_bitmap_map first_word, *data_ptr, *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map); /* Look for the next bit */ bitmap_bit++; if (bitmap_bit >= map->n_bits) return MY_BIT_NONE; - word_pos= bitmap_bit / 32; + + word_pos= bitmap_bit / 64; data_ptr= map->bitmap + word_pos; - first_word.bitmap= *data_ptr; - /* Mask out previous bits from first_word */ - byte_to_mask= (bitmap_bit % 32) / 8; - for (i= 0; i < byte_to_mask; i++) - ptr[i]= 0; - ptr[byte_to_mask]&= 0xFFU << (bitmap_bit & 7); + first_word= *data_ptr & first_bit_mask_inv(bitmap_bit); - if (data_ptr == end) + if (first_word) { - if (first_word.bitmap & ~map->last_word_mask) - return get_first_set(first_word.bitmap, word_pos); - else - return MY_BIT_NONE; + /* Optimize common case when most bits are set */ + if (first_word & (1ULL << ((bitmap_bit & (my_bitmap_map_bits-1))))) + return bitmap_bit; + return my_find_first_bit(first_word) + (bitmap_bit & ~(my_bitmap_map_bits-1)); } - - if (first_word.bitmap) - return get_first_set(first_word.bitmap, word_pos); - for (data_ptr++, word_pos++; data_ptr < end; data_ptr++, word_pos++) + for (data_ptr++; data_ptr <= end; data_ptr++) + { + bitmap_bit+= 64; if (*data_ptr) - return get_first_set(*data_ptr, word_pos); - - if (!(*end & ~map->last_word_mask)) - return MY_BIT_NONE; - return get_first_set(*end, word_pos); + return my_find_first_bit(*data_ptr) + (bitmap_bit & ~(my_bitmap_map_bits-1)); + } + return MY_BIT_NONE; } -/* Get first free bit */ +/* Get first clear bit */ -uint bitmap_get_first(const MY_BITMAP *map) +uint bitmap_get_first_clear(const MY_BITMAP *map) { - uchar *byte_ptr; - uint i,j,k; - my_bitmap_map *data_ptr, *end= map->last_word_ptr; - - DBUG_ASSERT(map->bitmap); - data_ptr= map->bitmap; - *map->last_word_ptr|= map->last_word_mask; + uint i; + my_bitmap_map *data_ptr= map->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map); - for (i=0; data_ptr < end; data_ptr++, i++) - if (*data_ptr != 0xFFFFFFFF) + for (i= 0; data_ptr < end; data_ptr++, i++) + if (*data_ptr != ~(my_bitmap_map)0) goto found; - if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFF) + if ((*data_ptr | map->last_bit_mask) == ~(my_bitmap_map)0) return MY_BIT_NONE; - found: - byte_ptr= (uchar*)data_ptr; - for (j=0; ; j++, byte_ptr++) + /* find first zero bit by reverting all bits and find first bit */ + return my_find_first_bit(~*data_ptr) + i * sizeof(my_bitmap_map)*8; +} +/* + Functions to export/import bitmaps to an architecture independent format + (low_byte_first) +*/ + +#ifdef WORDS_BIGENDIAN +/* Big endian machines, like powerpc or s390x */ + +void bitmap_export(uchar *to, MY_BITMAP *map) +{ + my_bitmap_map *value; + uint length; + uchar buff[my_bitmap_map_bytes]; + + for (value= map->bitmap ; value < map->last_word_ptr ; value++) { - if (*byte_ptr != 0xFF) - { - for (k=0; ; k++) - { - if (!(*byte_ptr & (1 << k))) - return (i*32) + (j*8) + k; - } - } + int8store(to, *value); + to+= 8; } - DBUG_ASSERT(0); - return MY_BIT_NONE; /* Impossible */ + int8store(buff, *value); + + /* We want length & 7 to return a serie 8,2,3,4,5,6,7, 8,2,3,... */ + length= 1+ ((no_bytes_in_export_map(map) + 7) & 7); + memcpy(to, buff, length); +} + + +void bitmap_import(MY_BITMAP *map, uchar *from) +{ + my_bitmap_map *value; + uint length; + uchar buff[my_bitmap_map_bytes]; + + for (value= map->bitmap ; value < map->last_word_ptr ; value++) + { + *value= uint8korr(from); + from+= 8; + } + bzero(buff, sizeof(buff)); + + /* We want length & 7 to return a serie 8,2,3,4,5,6,7, 8,2,3,... */ + length= 1+ ((no_bytes_in_export_map(map) + 7) & 7); + memcpy(buff, from, length); + *value= uint8korr(buff) & ~map->last_bit_mask; +} + +#else + +/* Little endian machines, like intel and amd */ + +void bitmap_export(uchar *to, MY_BITMAP *map) +{ + memcpy(to, (uchar*) map->bitmap, no_bytes_in_export_map(map)); +} + +void bitmap_import(MY_BITMAP *map, uchar *from) +{ + memcpy((uchar*) map->bitmap, from, no_bytes_in_export_map(map)); + *map->last_word_ptr&= ~map->last_bit_mask; } +#endif /* WORDS_BIGENDIAN */ diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index eb665b1e..6b480dae 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -172,6 +172,8 @@ static void validate_value(const char *key, const char *value, #define validate_value(key, value, filename) (void)filename #endif +#define SET_HO_ERROR_AND_CONTINUE(e) { ho_error= (e); (*argc)--; continue; } + /** Handle command line options. Sort options. @@ -241,7 +243,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, const char *UNINIT_VAR(prev_found); const struct my_option *optp; void *value; - int error, i; + int ho_error= 0, error, i; my_bool is_cmdline_arg= 1; DBUG_ENTER("handle_options"); @@ -255,7 +257,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, is_cmdline_arg= !is_file_marker(**argv); - for (pos= *argv, pos_end=pos+ *argc; pos != pos_end ; pos++) + for (pos= *argv, pos_end=pos+ *argc; pos < pos_end ; pos++) { char **first= pos; char *cur_arg= *pos; @@ -344,7 +346,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, my_progname, special_opt_prefix[i], opt_str, special_opt_prefix[i], prev_found); - DBUG_RETURN(EXIT_AMBIGUOUS_OPTION); + SET_HO_ERROR_AND_CONTINUE(EXIT_AMBIGUOUS_OPTION) } switch (i) { case OPT_SKIP: @@ -389,7 +391,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, "%s: unknown variable '%s'", my_progname, cur_arg); if (!option_is_loose) - DBUG_RETURN(EXIT_UNKNOWN_VARIABLE); + SET_HO_ERROR_AND_CONTINUE(EXIT_UNKNOWN_VARIABLE) } else { @@ -399,7 +401,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, "%s: unknown option '--%s'", my_progname, cur_arg); if (!option_is_loose) - DBUG_RETURN(EXIT_UNKNOWN_OPTION); + SET_HO_ERROR_AND_CONTINUE(EXIT_UNKNOWN_OPTION) } if (option_is_loose) { @@ -416,7 +418,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, my_getopt_error_reporter(ERROR_LEVEL, "%s: variable prefix '%s' is not unique", my_progname, opt_str); - DBUG_RETURN(EXIT_VAR_PREFIX_NOT_UNIQUE); + SET_HO_ERROR_AND_CONTINUE(EXIT_VAR_PREFIX_NOT_UNIQUE) } else { @@ -425,7 +427,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, "%s: ambiguous option '--%s' (%s, %s)", my_progname, opt_str, prev_found, optp->name); - DBUG_RETURN(EXIT_AMBIGUOUS_OPTION); + SET_HO_ERROR_AND_CONTINUE(EXIT_AMBIGUOUS_OPTION) } } if ((optp->var_type & GET_TYPE_MASK) == GET_DISABLED) @@ -439,14 +441,14 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, (*argc)--; continue; } - DBUG_RETURN(EXIT_OPTION_DISABLED); + SET_HO_ERROR_AND_CONTINUE(EXIT_OPTION_DISABLED) } error= 0; value= optp->var_type & GET_ASK_ADDR ? (*my_getopt_get_addr)(key_name, (uint)strlen(key_name), optp, &error) : optp->value; if (error) - DBUG_RETURN(error); + SET_HO_ERROR_AND_CONTINUE(error) if (optp->arg_type == NO_ARG) { @@ -461,7 +463,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, my_getopt_error_reporter(ERROR_LEVEL, "%s: option '--%s' cannot take an argument", my_progname, optp->name); - DBUG_RETURN(EXIT_NO_ARGUMENT_ALLOWED); + SET_HO_ERROR_AND_CONTINUE(EXIT_NO_ARGUMENT_ALLOWED) } if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL) { @@ -490,7 +492,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, if (get_one_option(optp, *((my_bool*) value) ? enabled_my_option : disabled_my_option, filename)) - DBUG_RETURN(EXIT_ARGUMENT_INVALID); + SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_INVALID) continue; } argument= optend; @@ -504,7 +506,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, "option '--%s' cannot take an argument", my_progname, optp->name); - DBUG_RETURN(EXIT_NO_ARGUMENT_ALLOWED); + SET_HO_ERROR_AND_CONTINUE(EXIT_NO_ARGUMENT_ALLOWED) } if (!(optp->var_type & GET_AUTO)) { @@ -514,7 +516,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, "unsupported by option '--%s'", my_progname, optp->name); if (!option_is_loose) - DBUG_RETURN(EXIT_ARGUMENT_INVALID); + SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_INVALID) continue; } else @@ -533,7 +535,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, my_getopt_error_reporter(ERROR_LEVEL, "%s: option '--%s' requires an argument", my_progname, optp->name); - DBUG_RETURN(EXIT_ARGUMENT_REQUIRED); + SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_REQUIRED) } argument= *pos; (*argc)--; @@ -558,14 +560,14 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, fprintf(stderr, "%s: ERROR: Option '-%c' used, but is disabled\n", my_progname, optp->id); - DBUG_RETURN(EXIT_OPTION_DISABLED); + SET_HO_ERROR_AND_CONTINUE(EXIT_OPTION_DISABLED) } if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL && optp->arg_type == NO_ARG) { *((my_bool*) optp->value)= (my_bool) 1; if (get_one_option(optp, argument, filename)) - DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR) continue; } else if (optp->arg_type == REQUIRED_ARG || @@ -585,7 +587,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, if (optp->var_type == GET_BOOL) *((my_bool*) optp->value)= (my_bool) 1; if (get_one_option(optp, argument, filename)) - DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR) continue; } /* Check if there are more arguments after this one */ @@ -595,7 +597,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, my_getopt_error_reporter(ERROR_LEVEL, "%s: option '-%c' requires an argument", my_progname, optp->id); - DBUG_RETURN(EXIT_ARGUMENT_REQUIRED); + SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_REQUIRED) } argument= *++pos; (*argc)--; @@ -603,10 +605,10 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, } } if ((error= setval(optp, optp->value, argument, - set_maximum_value,filename))) - DBUG_RETURN(error); + set_maximum_value,filename))) + SET_HO_ERROR_AND_CONTINUE(error) if (get_one_option(optp, argument, filename)) - DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR) break; } } @@ -640,7 +642,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, my_getopt_error_reporter(ERROR_LEVEL, "%s: unknown option '-%c'", my_progname, *optend); - DBUG_RETURN(EXIT_UNKNOWN_OPTION); + SET_HO_ERROR_AND_CONTINUE(EXIT_UNKNOWN_OPTION) } } } @@ -651,15 +653,17 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, if ((!option_is_autoset) && ((error= setval(optp, value, argument, set_maximum_value,filename))) && !option_is_loose) - DBUG_RETURN(error); + SET_HO_ERROR_AND_CONTINUE(error) if (get_one_option(optp, argument, filename)) - DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR) (*argc)--; /* option handled (long), decrease argument count */ } else /* non-option found */ (*argv)[argvpos++]= cur_arg; } + if (ho_error) + DBUG_RETURN(ho_error); /* Destroy the first, already handled option, so that programs that look for arguments in 'argv', without checking 'argc', know when to stop. @@ -899,7 +903,7 @@ static int setval(const struct my_option *opts, void *value, char *argument, } if (err) { - res= EXIT_UNKNOWN_SUFFIX; + res= err; goto ret; }; } @@ -1034,7 +1038,7 @@ static inline ulonglong eval_num_suffix(char *suffix, int *error) case 'E': return 1ULL << 60; default: - *error= 1; + *error= EXIT_UNKNOWN_SUFFIX; return 0ULL; } } @@ -1060,15 +1064,18 @@ static longlong eval_num_suffix_ll(char *argument, if (errno == ERANGE) { my_getopt_error_reporter(ERROR_LEVEL, - "Incorrect integer value: '%s'", argument); - *error= 1; + "Integer value out of range for int64:" + " '%s' for %s", + argument, option_name); + *error= EXIT_ARGUMENT_INVALID; DBUG_RETURN(0); } num*= eval_num_suffix(endchar, error); if (*error) - fprintf(stderr, - "Unknown suffix '%c' used for variable '%s' (value '%s')\n", - *endchar, option_name, argument); + my_getopt_error_reporter(ERROR_LEVEL, + "Unknown suffix '%c' used for variable '%s' (value '%s'). " + "Legal suffix characters are: K, M, G, T, P, E", + *endchar, option_name, argument); DBUG_RETURN(num); } @@ -1091,7 +1098,7 @@ static ulonglong eval_num_suffix_ull(char *argument, my_getopt_error_reporter(ERROR_LEVEL, "Incorrect unsigned value: '%s' for %s", argument, option_name); - *error= 1; + *error= EXIT_ARGUMENT_INVALID; DBUG_RETURN(0); } *error= 0; @@ -1100,15 +1107,18 @@ static ulonglong eval_num_suffix_ull(char *argument, if (errno == ERANGE) { my_getopt_error_reporter(ERROR_LEVEL, - "Incorrect integer value: '%s' for %s", + "Integer value out of range for uint64:" + " '%s' for %s", argument, option_name); - *error= 1; + *error= EXIT_ARGUMENT_INVALID; DBUG_RETURN(0); } num*= eval_num_suffix(endchar, error); if (*error) my_getopt_error_reporter(ERROR_LEVEL, - "Unknown suffix '%c' used for variable '%s' (value '%s')", + "Unknown suffix '%c' used for variable '%s'" + " (value '%s')." + " Legal suffix characters are: K, M, G, T, P, E", *endchar, option_name, argument); DBUG_RETURN(num); } @@ -1128,6 +1138,8 @@ static ulonglong eval_num_suffix_ull(char *argument, static longlong getopt_ll(char *arg, const struct my_option *optp, int *err) { longlong num=eval_num_suffix_ll(arg, err, (char*) optp->name); + if (*err) + return(0); return getopt_ll_limit_value(num, optp, NULL); } @@ -1205,6 +1217,8 @@ longlong getopt_ll_limit_value(longlong num, const struct my_option *optp, static ulonglong getopt_ull(char *arg, const struct my_option *optp, int *err) { ulonglong num= eval_num_suffix_ull(arg, err, (char*) optp->name); + if (*err) + return(0); return getopt_ull_limit_value(num, optp, NULL); } diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index 2e8decd7..b2436d2b 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -220,7 +220,11 @@ void my_thread_global_end(void) fprintf(stderr, "Error in my_thread_global_end(): %d threads didn't exit\n", THR_thread_count); -#endif +#endif /* HAVE_PTHREAD_KILL */ +#ifdef SAFEMALLOC + /* We know we will have memoryleaks, suppress the leak report */ + sf_leaking_memory= 1; +#endif /* SAFEMALLOC */ all_threads_killed= 0; break; } @@ -234,9 +238,7 @@ void my_thread_global_end(void) that could use them. */ if (all_threads_killed) - { my_thread_destroy_internal_mutex(); - } my_thread_global_init_done= 0; } |