summaryrefslogtreecommitdiffstats
path: root/mysys
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 13:22:53 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 13:22:53 +0000
commit347c164c35eddab388009470e6848cb361ac93f8 (patch)
tree2c0c44eac690f510bb0a35b2a13b36d606b77b6b /mysys
parentReleasing progress-linux version 1:10.11.7-4~progress7.99u1. (diff)
downloadmariadb-347c164c35eddab388009470e6848cb361ac93f8.tar.xz
mariadb-347c164c35eddab388009470e6848cb361ac93f8.zip
Merging upstream version 1:10.11.8.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--mysys/CMakeLists.txt34
-rw-r--r--mysys/crc32/crc32_arm64.c338
-rw-r--r--mysys/crc32/crc32_x86.c16
-rw-r--r--mysys/crc32/crc32c.cc261
-rw-r--r--mysys/crc32/crc32c_amd64.cc8
-rw-r--r--mysys/crc32/crc32c_ppc.h3
-rw-r--r--mysys/crc32/crc32c_x86.cc457
-rw-r--r--mysys/crc32/crc_ppc64.h7
-rw-r--r--mysys/crc32ieee.cc19
-rw-r--r--mysys/errors.c9
-rw-r--r--mysys/lf_alloc-pin.c7
-rw-r--r--mysys/mf_tempfile.c37
-rw-r--r--mysys/my_bitmap.c645
-rw-r--r--mysys/my_getopt.c84
-rw-r--r--mysys/my_thr_init.c8
15 files changed, 1220 insertions, 713 deletions
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index 758243df..ab6d9edf 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -14,7 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
-INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys)
+INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys)
SET(MYSYS_SOURCES array.c charset-def.c charset.c my_default.c
get_password.c
@@ -59,29 +59,21 @@ IF (WIN32)
ENDIF()
IF(MSVC_INTEL)
- SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
+ SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c crc32/crc32c_x86.cc)
IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
ENDIF()
- ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL)
- IF(CLANG_CL)
- SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
- ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
- MY_CHECK_CXX_COMPILER_FLAG(-msse4.2)
- MY_CHECK_CXX_COMPILER_FLAG(-mpclmul)
- CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
- CHECK_INCLUDE_FILE(x86intrin.h HAVE_X86INTRIN_H)
- IF(have_CXX__msse4.2 AND HAVE_CPUID_H)
- ADD_DEFINITIONS(-DHAVE_SSE42)
- IF (have_CXX__mpclmul AND HAVE_X86INTRIN_H)
- ADD_DEFINITIONS(-DHAVE_PCLMUL)
- SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
- SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
- IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
- SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
- SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
- ENDIF()
+ SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c crc32/crc32c_x86.cc)
+ IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "5")
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES
+ COMPILE_FLAGS "-msse4.2 -mpclmul")
+ ENDIF()
+ IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
+ IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "5")
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES
+ COMPILE_FLAGS "-msse4.2 -mpclmul")
ENDIF()
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
@@ -164,7 +156,7 @@ ENDIF()
ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES})
MAYBE_DISABLE_IPO(mysys)
-TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY}
+TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARIES}
${LIBNSL} ${LIBM} ${LIBRT} ${CMAKE_DL_LIBS} ${LIBSOCKET} ${LIBEXECINFO})
DTRACE_INSTRUMENT(mysys)
diff --git a/mysys/crc32/crc32_arm64.c b/mysys/crc32/crc32_arm64.c
index 0e70c218..6588606a 100644
--- a/mysys/crc32/crc32_arm64.c
+++ b/mysys/crc32/crc32_arm64.c
@@ -1,13 +1,18 @@
#include <my_global.h>
#include <string.h>
#include <stdint.h>
+#include <stddef.h>
-static int pmull_supported;
+typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t);
-#if defined(HAVE_ARMV8_CRC)
+#ifdef HAVE_ARMV8_CRC
-#if defined(__APPLE__)
-#include <sys/sysctl.h>
+# ifdef HAVE_ARMV8_CRYPTO
+static unsigned crc32c_aarch64_pmull(unsigned, const void *, size_t);
+# endif
+
+# ifdef __APPLE__
+# include <sys/sysctl.h>
int crc32_aarch64_available(void)
{
@@ -18,17 +23,17 @@ int crc32_aarch64_available(void)
return ret;
}
-const char *crc32c_aarch64_available(void)
+my_crc32_t crc32c_aarch64_available(void)
{
- if (crc32_aarch64_available() == 0)
- return NULL;
- pmull_supported = 1;
- return "Using ARMv8 crc32 + pmull instructions";
+# ifdef HAVE_ARMV8_CRYPTO
+ if (crc32_aarch64_available())
+ return crc32c_aarch64_pmull;
+# endif
+ return NULL;
}
-
-#else
-#include <sys/auxv.h>
-#if defined(__FreeBSD__)
+# else
+# include <sys/auxv.h>
+# ifdef __FreeBSD__
static unsigned long getauxval(unsigned int key)
{
unsigned long val;
@@ -36,17 +41,17 @@ static unsigned long getauxval(unsigned int key)
return 0ul;
return val;
}
-#else
-# include <asm/hwcap.h>
-#endif
+# else
+# include <asm/hwcap.h>
+# endif
-#ifndef HWCAP_CRC32
-# define HWCAP_CRC32 (1 << 7)
-#endif
+# ifndef HWCAP_CRC32
+# define HWCAP_CRC32 (1 << 7)
+# endif
-#ifndef HWCAP_PMULL
-# define HWCAP_PMULL (1 << 4)
-#endif
+# ifndef HWCAP_PMULL
+# define HWCAP_PMULL (1 << 4)
+# endif
/* ARM made crc32 default from ARMv8.1 but optional in ARMv8A
* Runtime check API.
@@ -56,22 +61,37 @@ int crc32_aarch64_available(void)
unsigned long auxv= getauxval(AT_HWCAP);
return (auxv & HWCAP_CRC32) != 0;
}
+# endif
+
+# ifndef __APPLE__
+static unsigned crc32c_aarch64(unsigned, const void *, size_t);
-const char *crc32c_aarch64_available(void)
+my_crc32_t crc32c_aarch64_available(void)
{
unsigned long auxv= getauxval(AT_HWCAP);
-
if (!(auxv & HWCAP_CRC32))
return NULL;
+# ifdef HAVE_ARMV8_CRYPTO
+ /* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030). */
+ if (auxv & HWCAP_PMULL)
+ return crc32c_aarch64_pmull;
+# endif
+ return crc32c_aarch64;
+}
+# endif
- pmull_supported= (auxv & HWCAP_PMULL) != 0;
- if (pmull_supported)
+const char *crc32c_aarch64_impl(my_crc32_t c)
+{
+# ifdef HAVE_ARMV8_CRYPTO
+ if (c == crc32c_aarch64_pmull)
return "Using ARMv8 crc32 + pmull instructions";
- else
+# endif
+# ifndef __APPLE__
+ if (c == crc32c_aarch64)
return "Using ARMv8 crc32 instructions";
+# endif
+ return NULL;
}
-
-#endif /* __APPLE__ */
#endif /* HAVE_ARMV8_CRC */
#ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
@@ -157,131 +177,14 @@ asm(".arch_extension crypto");
PREF4X64L2(buffer,(PREF_OFFSET), 8) \
PREF4X64L2(buffer,(PREF_OFFSET), 12)
-uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
+#ifndef __APPLE__
+static unsigned crc32c_aarch64(unsigned crc, const void *buf, size_t len)
{
- uint32_t crc0, crc1, crc2;
int64_t length= (int64_t)len;
+ const unsigned char *buffer= buf;
crc^= 0xffffffff;
- /* Pmull runtime check here.
- * Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030).
- *
- * Consider the condition that the target platform does support hardware crc32
- * but not support PMULL. In this condition, it should leverage the aarch64
- * crc32 instruction (__crc32c) and just only skip parallel computation (pmull/vmull)
- * rather than skip all hardware crc32 instruction of computation.
- */
- if (pmull_supported)
- {
-/* The following Macro (HAVE_ARMV8_CRYPTO) is used for compiling check */
-#ifdef HAVE_ARMV8_CRYPTO
-
-/* Crypto extension Support
- * Parallel computation with 1024 Bytes (per block)
- * Intrinsics Support
- */
-# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
- const poly64_t k1= 0xe417f38a, k2= 0x8f158014;
- uint64_t t0, t1;
-
- /* Process per block size of 1024 Bytes
- * A block size = 8 + 42*3*sizeof(uint64_t) + 8
- */
- while ((length-= 1024) >= 0)
- {
- /* Prefetch 3*1024 data for avoiding L2 cache miss */
- PREF1KL2(buffer, 1024*3);
- /* Do first 8 bytes here for better pipelining */
- crc0= __crc32cd(crc, *(const uint64_t *)buffer);
- crc1= 0;
- crc2= 0;
- buffer+= sizeof(uint64_t);
-
- /* Process block inline
- * Process crc0 last to avoid dependency with above
- */
- CRC32C7X3X8(buffer, 0);
- CRC32C7X3X8(buffer, 1);
- CRC32C7X3X8(buffer, 2);
- CRC32C7X3X8(buffer, 3);
- CRC32C7X3X8(buffer, 4);
- CRC32C7X3X8(buffer, 5);
-
- buffer+= 42*3*sizeof(uint64_t);
- /* Prefetch data for following block to avoid L1 cache miss */
- PREF1KL1(buffer, 1024);
-
- /* Last 8 bytes
- * Merge crc0 and crc1 into crc2
- * crc1 multiply by K2
- * crc0 multiply by K1
- */
- t1= (uint64_t)vmull_p64(crc1, k2);
- t0= (uint64_t)vmull_p64(crc0, k1);
- crc= __crc32cd(crc2, *(const uint64_t *)buffer);
- crc1= __crc32cd(0, t1);
- crc^= crc1;
- crc0= __crc32cd(0, t0);
- crc^= crc0;
-
- buffer+= sizeof(uint64_t);
- }
-
-# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
-
- /*No intrinsics*/
- __asm__("mov x16, #0xf38a \n\t"
- "movk x16, #0xe417, lsl 16 \n\t"
- "mov v1.2d[0], x16 \n\t"
- "mov x16, #0x8014 \n\t"
- "movk x16, #0x8f15, lsl 16 \n\t"
- "mov v0.2d[0], x16 \n\t"
- :::"x16");
-
- while ((length-= 1024) >= 0)
- {
- PREF1KL2(buffer, 1024*3);
- __asm__("crc32cx %w[c0], %w[c], %x[v]\n\t"
- :[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):);
- crc1= 0;
- crc2= 0;
- buffer+= sizeof(uint64_t);
-
- CRC32C7X3X8(buffer, 0);
- CRC32C7X3X8(buffer, 1);
- CRC32C7X3X8(buffer, 2);
- CRC32C7X3X8(buffer, 3);
- CRC32C7X3X8(buffer, 4);
- CRC32C7X3X8(buffer, 5);
-
- buffer+= 42*3*sizeof(uint64_t);
- PREF1KL1(buffer, 1024);
- __asm__("mov v2.2d[0], %x[c1] \n\t"
- "pmull v2.1q, v2.1d, v0.1d \n\t"
- "mov v3.2d[0], %x[c0] \n\t"
- "pmull v3.1q, v3.1d, v1.1d \n\t"
- "crc32cx %w[c], %w[c2], %x[v] \n\t"
- "mov %x[c1], v2.2d[0] \n\t"
- "crc32cx %w[c1], wzr, %x[c1] \n\t"
- "eor %w[c], %w[c], %w[c1] \n\t"
- "mov %x[c0], v3.2d[0] \n\t"
- "crc32cx %w[c0], wzr, %x[c0] \n\t"
- "eor %w[c], %w[c], %w[c0] \n\t"
- :[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc)
- :[v]"r"(*((const uint64_t *)buffer)));
- buffer+= sizeof(uint64_t);
- }
-# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
-
- /* Done if Input data size is aligned with 1024 */
- if (!(length+= 1024))
- return ~crc;
-
-#endif /* HAVE_ARMV8_CRYPTO */
-
- } // end if pmull_supported
-
while ((length-= sizeof(uint64_t)) >= 0)
{
CRC32CX(crc, *(uint64_t *)buffer);
@@ -306,6 +209,143 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
return ~crc;
}
+#endif
+
+#ifdef HAVE_ARMV8_CRYPTO
+static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len)
+{
+ int64_t length= (int64_t)len;
+ const unsigned char *buffer= buf;
+
+ crc^= 0xffffffff;
+
+ /* Crypto extension Support
+ * Parallel computation with 1024 Bytes (per block)
+ * Intrinsics Support
+ */
+# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
+ /* Process per block size of 1024 Bytes
+ * A block size = 8 + 42*3*sizeof(uint64_t) + 8
+ */
+ for (const poly64_t k1= 0xe417f38a, k2= 0x8f158014; (length-= 1024) >= 0; )
+ {
+ uint32_t crc0, crc1, crc2;
+ uint64_t t0, t1;
+ /* Prefetch 3*1024 data for avoiding L2 cache miss */
+ PREF1KL2(buffer, 1024*3);
+ /* Do first 8 bytes here for better pipelining */
+ crc0= __crc32cd(crc, *(const uint64_t *)buffer);
+ crc1= 0;
+ crc2= 0;
+ buffer+= sizeof(uint64_t);
+
+ /* Process block inline
+ * Process crc0 last to avoid dependency with above
+ */
+ CRC32C7X3X8(buffer, 0);
+ CRC32C7X3X8(buffer, 1);
+ CRC32C7X3X8(buffer, 2);
+ CRC32C7X3X8(buffer, 3);
+ CRC32C7X3X8(buffer, 4);
+ CRC32C7X3X8(buffer, 5);
+
+ buffer+= 42*3*sizeof(uint64_t);
+ /* Prefetch data for following block to avoid L1 cache miss */
+ PREF1KL1(buffer, 1024);
+
+ /* Last 8 bytes
+ * Merge crc0 and crc1 into crc2
+ * crc1 multiply by K2
+ * crc0 multiply by K1
+ */
+ t1= (uint64_t)vmull_p64(crc1, k2);
+ t0= (uint64_t)vmull_p64(crc0, k1);
+ crc= __crc32cd(crc2, *(const uint64_t *)buffer);
+ crc1= __crc32cd(0, t1);
+ crc^= crc1;
+ crc0= __crc32cd(0, t0);
+ crc^= crc0;
+
+ buffer+= sizeof(uint64_t);
+ }
+
+# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
+ /*No intrinsics*/
+ __asm__("mov x16, #0xf38a \n\t"
+ "movk x16, #0xe417, lsl 16 \n\t"
+ "mov v1.2d[0], x16 \n\t"
+ "mov x16, #0x8014 \n\t"
+ "movk x16, #0x8f15, lsl 16 \n\t"
+ "mov v0.2d[0], x16 \n\t"
+ :::"x16");
+
+ while ((length-= 1024) >= 0)
+ {
+ uint32_t crc0, crc1, crc2;
+
+ PREF1KL2(buffer, 1024*3);
+ __asm__("crc32cx %w[c0], %w[c], %x[v]\n\t"
+ :[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):);
+ crc1= 0;
+ crc2= 0;
+ buffer+= sizeof(uint64_t);
+
+ CRC32C7X3X8(buffer, 0);
+ CRC32C7X3X8(buffer, 1);
+ CRC32C7X3X8(buffer, 2);
+ CRC32C7X3X8(buffer, 3);
+ CRC32C7X3X8(buffer, 4);
+ CRC32C7X3X8(buffer, 5);
+
+ buffer+= 42*3*sizeof(uint64_t);
+ PREF1KL1(buffer, 1024);
+ __asm__("mov v2.2d[0], %x[c1] \n\t"
+ "pmull v2.1q, v2.1d, v0.1d \n\t"
+ "mov v3.2d[0], %x[c0] \n\t"
+ "pmull v3.1q, v3.1d, v1.1d \n\t"
+ "crc32cx %w[c], %w[c2], %x[v] \n\t"
+ "mov %x[c1], v2.2d[0] \n\t"
+ "crc32cx %w[c1], wzr, %x[c1] \n\t"
+ "eor %w[c], %w[c], %w[c1] \n\t"
+ "mov %x[c0], v3.2d[0] \n\t"
+ "crc32cx %w[c0], wzr, %x[c0] \n\t"
+ "eor %w[c], %w[c], %w[c0] \n\t"
+ :[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc)
+ :[v]"r"(*((const uint64_t *)buffer)));
+ buffer+= sizeof(uint64_t);
+ }
+# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
+
+ /* Done if Input data size is aligned with 1024 */
+ length+= 1024;
+ if (length)
+ {
+ while ((length-= sizeof(uint64_t)) >= 0)
+ {
+ CRC32CX(crc, *(uint64_t *)buffer);
+ buffer+= sizeof(uint64_t);
+ }
+
+ /* The following is more efficient than the straight loop */
+ if (length & sizeof(uint32_t))
+ {
+ CRC32CW(crc, *(uint32_t *)buffer);
+ buffer+= sizeof(uint32_t);
+ }
+
+ if (length & sizeof(uint16_t))
+ {
+ CRC32CH(crc, *(uint16_t *)buffer);
+ buffer+= sizeof(uint16_t);
+ }
+
+ if (length & sizeof(uint8_t))
+ CRC32CB(crc, *buffer);
+ }
+
+ return ~crc;
+}
+#endif /* HAVE_ARMV8_CRYPTO */
/* There are multiple approaches to calculate crc.
Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes
diff --git a/mysys/crc32/crc32_x86.c b/mysys/crc32/crc32_x86.c
index f077399c..ab2522d6 100644
--- a/mysys/crc32/crc32_x86.c
+++ b/mysys/crc32/crc32_x86.c
@@ -56,11 +56,16 @@
#include <stddef.h>
#ifdef __GNUC__
-#include <x86intrin.h>
+# include <emmintrin.h>
+# include <smmintrin.h>
+# include <tmmintrin.h>
+# include <wmmintrin.h>
+# define USE_PCLMUL __attribute__((target("sse4.2,pclmul")))
#elif defined(_MSC_VER)
-#include <intrin.h>
+# include <intrin.h>
+# define USE_PCLMUL /* nothing */
#else
-#error "unknown compiler"
+# error "unknown compiler"
#endif
/**
@@ -71,6 +76,7 @@
*
* @return \a reg << (\a num * 8)
*/
+USE_PCLMUL
static inline __m128i xmm_shift_left(__m128i reg, const unsigned int num)
{
static const MY_ALIGNED(16) uint8_t crc_xmm_shift_tab[48]= {
@@ -111,6 +117,7 @@ struct crcr_pclmulqdq_ctx
*
* @return New 16 byte folded data
*/
+USE_PCLMUL
static inline __m128i crcr32_folding_round(const __m128i data_block,
const __m128i precomp, const __m128i fold)
{
@@ -128,6 +135,7 @@ static inline __m128i crcr32_folding_round(const __m128i data_block,
*
* @return data reduced to 64 bits
*/
+USE_PCLMUL
static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i precomp)
{
__m128i tmp0, tmp1, tmp2;
@@ -152,6 +160,7 @@ static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i pre
*
* @return data reduced to 32 bits
*/
+USE_PCLMUL
static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i precomp)
{
static const MY_ALIGNED(16) uint32_t mask1[4]= {
@@ -188,6 +197,7 @@ static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i prec
*
* @return CRC for given \a data block (32 bits wide).
*/
+USE_PCLMUL
static inline uint32_t crcr32_calc_pclmulqdq(const uint8_t *data, uint32_t data_len,
uint32_t crc,
const struct crcr_pclmulqdq_ctx *params)
diff --git a/mysys/crc32/crc32c.cc b/mysys/crc32/crc32c.cc
index 2bec041e..32a45478 100644
--- a/mysys/crc32/crc32c.cc
+++ b/mysys/crc32/crc32c.cc
@@ -19,52 +19,23 @@
#include <stddef.h>
#include <stdint.h>
#include <my_global.h>
-#include <my_byteorder.h>
-static inline uint32_t DecodeFixed32(const char *ptr)
-{
- return uint4korr(ptr);
-}
-
-#include <stdint.h>
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
-#ifdef HAVE_SSE42
-# ifdef __GNUC__
-# include <cpuid.h>
-# if __GNUC__ < 5 && !defined __clang__
-/* the headers do not really work in GCC before version 5 */
-# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data)
-# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data)
-# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data)
-# else
-# include <nmmintrin.h>
-# endif
-# define USE_SSE42 __attribute__((target("sse4.2")))
-# else
-# define USE_SSE42 /* nothing */
-# endif
-#endif
-
#ifdef __powerpc64__
-#include "crc32c_ppc.h"
-
-#if __linux__
-#include <sys/auxv.h>
+# include "crc32c_ppc.h"
+# ifdef __linux__
+# include <sys/auxv.h>
-#ifndef PPC_FEATURE2_VEC_CRYPTO
-#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
-#endif
+# ifndef PPC_FEATURE2_VEC_CRYPTO
+# define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+# endif
-#ifndef AT_HWCAP2
-#define AT_HWCAP2 26
+# ifndef AT_HWCAP2
+# define AT_HWCAP2 26
+# endif
+# endif
#endif
-#endif /* __linux__ */
-
-#endif
+typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t);
namespace mysys_namespace {
namespace crc32c {
@@ -75,6 +46,7 @@ static int arch_ppc_crc32 = 0;
#endif /* __powerpc64__ */
#endif
+alignas(CPU_LEVEL1_DCACHE_LINESIZE)
static const uint32_t table0_[256] = {
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
@@ -341,8 +313,9 @@ static const uint32_t table3_[256] = {
};
// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
-static inline uint32_t LE_LOAD32(const uint8_t *p) {
- return DecodeFixed32(reinterpret_cast<const char*>(p));
+static inline uint32_t LE_LOAD32(const uint8_t *p)
+{
+ return uint4korr(reinterpret_cast<const char*>(p));
}
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
@@ -362,10 +335,7 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
table0_[c >> 24];
}
-#ifdef ALIGN
#undef ALIGN
-#endif
-
// Align n to (1 << m) byte boundary
#define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1))
@@ -374,70 +344,30 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
l = table0_[c] ^ (l >> 8); \
} while (0)
-static uint32_t crc32c_slow(uint32_t crc, const char* buf, size_t size)
-{
- const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
- const uint8_t *e = p + size;
- uint64_t l = crc ^ 0xffffffffu;
-
- // Point x at first 16-byte aligned byte in string. This might be
- // just past the end of the string.
- const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
- const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
- if (x <= e)
- // Process bytes until finished or p is 16-byte aligned
- while (p != x)
- STEP1;
- // Process bytes 16 at a time
- while ((e-p) >= 16)
- {
- Slow_CRC32(&l, &p);
- Slow_CRC32(&l, &p);
- }
- // Process bytes 8 at a time
- while ((e-p) >= 8)
- Slow_CRC32(&l, &p);
- // Process the last few bytes
- while (p != e)
- STEP1;
- return static_cast<uint32_t>(l ^ 0xffffffffu);
-}
-
-#if defined HAVE_POWER8
-#elif defined HAVE_ARMV8_CRC
-#elif defined HAVE_SSE42
-constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
-constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U<<1;
-
-static uint32_t cpuid_ecx()
-{
-#ifdef __GNUC__
- uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
- __cpuid(1, reax, rebx, recx, redx);
- return recx;
-#elif defined _MSC_VER
- int regs[4];
- __cpuid(regs, 1);
- return regs[2];
-#else
-# error "unknown compiler"
+#undef USE_SSE42
+#if defined _MSC_VER && (defined _M_X64 || defined _M_IX86)
+# include <intrin.h>
+# include <immintrin.h>
+# define USE_SSE42 /* nothing */
+#elif defined __GNUC__ && (defined __i386__||defined __x86_64__)
+# if __GNUC__ < 5 && !defined __clang_major__
+/* the headers do not really work in GCC before version 5 */
+# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data)
+# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data)
+# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data)
+# else
+# include <nmmintrin.h>
+# endif
+# define USE_SSE42 __attribute__((target("sse4.2")))
#endif
-}
-
-extern "C" int crc32_pclmul_enabled(void)
-{
- return !(~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL);
-}
-
-#if SIZEOF_SIZE_T == 8
-extern "C" uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len);
-USE_SSE42
+#ifdef USE_SSE42
+# if SIZEOF_SIZE_T == 8
static inline uint64_t LE_LOAD64(const uint8_t *ptr)
{
return uint8korr(reinterpret_cast<const char*>(ptr));
}
-#endif
+# endif
USE_SSE42
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p)
@@ -453,10 +383,11 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p)
# endif
}
+extern "C"
USE_SSE42
-static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size)
+unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size)
{
- const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
+ const uint8_t *p = static_cast<const uint8_t *>(buf);
const uint8_t *e = p + size;
uint64_t l = crc ^ 0xffffffffu;
@@ -484,107 +415,111 @@ static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size)
}
#endif
-typedef uint32_t (*Function)(uint32_t, const char*, size_t);
+static unsigned crc32c_slow(unsigned crc, const void* buf, size_t size)
+{
+ const uint8_t *p = static_cast<const uint8_t *>(buf);
+ const uint8_t *e = p + size;
+ uint64_t l = crc ^ 0xffffffffu;
-#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
-uint32_t ExtendPPCImpl(uint32_t crc, const char *buf, size_t size) {
- return crc32c_ppc(crc, (const unsigned char *)buf, size);
+ // Point x at first 16-byte aligned byte in string. This might be
+ // just past the end of the string.
+ const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
+ const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
+ if (x <= e)
+ // Process bytes until finished or p is 16-byte aligned
+ while (p != x)
+ STEP1;
+ // Process bytes 16 at a time
+ while ((e-p) >= 16)
+ {
+ Slow_CRC32(&l, &p);
+ Slow_CRC32(&l, &p);
+ }
+ // Process bytes 8 at a time
+ while ((e-p) >= 8)
+ Slow_CRC32(&l, &p);
+ // Process the last few bytes
+ while (p != e)
+ STEP1;
+ return static_cast<uint32_t>(l ^ 0xffffffffu);
}
-#if __linux__
+#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+# ifdef __linux__
static int arch_ppc_probe(void) {
arch_ppc_crc32 = 0;
-#if defined(__powerpc64__)
+# if defined(__powerpc64__)
if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1;
-#endif /* __powerpc64__ */
+# endif /* __powerpc64__ */
return arch_ppc_crc32;
}
-#elif __FreeBSD_version >= 1200000
-#include <machine/cpu.h>
-#include <sys/auxv.h>
-#include <sys/elf_common.h>
+# elif defined __FreeBSD_version && __FreeBSD_version >= 1200000
+# include <machine/cpu.h>
+# include <sys/auxv.h>
+# include <sys/elf_common.h>
static int arch_ppc_probe(void) {
unsigned long cpufeatures;
arch_ppc_crc32 = 0;
-#if defined(__powerpc64__)
+# if defined(__powerpc64__)
elf_aux_info(AT_HWCAP2, &cpufeatures, sizeof(cpufeatures));
if (cpufeatures & PPC_FEATURE2_HAS_VEC_CRYPTO) arch_ppc_crc32 = 1;
-#endif /* __powerpc64__ */
+# endif /* __powerpc64__ */
return arch_ppc_crc32;
}
-#elif defined(_AIX) || defined(__OpenBSD__)
+# elif defined(_AIX) || defined(__OpenBSD__)
static int arch_ppc_probe(void) {
arch_ppc_crc32 = 0;
-#if defined(__powerpc64__)
+# if defined(__powerpc64__)
// AIX 7.1+/OpenBSD has vector crypto features on all POWER 8+
arch_ppc_crc32 = 1;
-#endif /* __powerpc64__ */
+# endif /* __powerpc64__ */
return arch_ppc_crc32;
}
-#endif // __linux__
+# endif
#endif
#if defined(HAVE_ARMV8_CRC)
-extern "C" const char *crc32c_aarch64_available(void);
-extern "C" uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len);
-
-static uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) {
- return crc32c_aarch64(crc, (const unsigned char *)buf, (size_t) size);
-}
+extern "C" my_crc32_t crc32c_aarch64_available(void);
+extern "C" const char *crc32c_aarch64_impl(my_crc32_t);
+#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86
+extern "C" my_crc32_t crc32c_x86_available(void);
+extern "C" const char *crc32c_x86_impl(my_crc32_t);
#endif
-static inline Function Choose_Extend()
+static inline my_crc32_t Choose_Extend()
{
#if defined HAVE_POWER8 && defined HAS_ALTIVEC
if (arch_ppc_probe())
- return ExtendPPCImpl;
-#elif defined(HAVE_ARMV8_CRC)
- if (crc32c_aarch64_available())
- return ExtendARMImpl;
-#elif HAVE_SSE42
-# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
- switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) {
- case cpuid_ecx_SSE42_AND_PCLMUL:
- return crc32c_3way;
- case cpuid_ecx_SSE42:
- return crc32c_sse42;
- }
-# else
- if (cpuid_ecx() & cpuid_ecx_SSE42)
- return crc32c_sse42;
-# endif
+ return crc32c_ppc;
+#elif defined HAVE_ARMV8_CRC
+ if (my_crc32_t crc= crc32c_aarch64_available())
+ return crc;
+#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86
+ if (my_crc32_t crc= crc32c_x86_available())
+ return crc;
#endif
return crc32c_slow;
}
-static const Function ChosenExtend= Choose_Extend();
-
-static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size)
-{
- return ChosenExtend(crc, buf, size);
-}
+static const my_crc32_t ChosenExtend= Choose_Extend();
extern "C" const char *my_crc32c_implementation()
{
-#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
- if (ChosenExtend == ExtendPPCImpl)
+#if defined HAVE_POWER8 && defined HAS_ALTIVEC
+ if (ChosenExtend == crc32c_ppc)
return "Using POWER8 crc32 instructions";
-#elif defined(HAVE_ARMV8_CRC)
- if (const char *ret= crc32c_aarch64_available())
+#elif defined HAVE_ARMV8_CRC
+ if (const char *ret= crc32c_aarch64_impl(ChosenExtend))
+ return ret;
+#elif defined __i386__||defined __x86_64__||defined _M_X64||defined _M_IX86
+ if (const char *ret= crc32c_x86_impl(ChosenExtend))
return ret;
-#elif HAVE_SSE42
-# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
- if (ChosenExtend == crc32c_3way)
- return "Using crc32 + pclmulqdq instructions";
-# endif
- if (ChosenExtend == crc32c_sse42)
- return "Using SSE4.2 crc32 instructions";
#endif
return "Using generic crc32 instructions";
}
@@ -593,5 +528,5 @@ extern "C" const char *my_crc32c_implementation()
extern "C" unsigned my_crc32c(unsigned int crc, const char *buf, size_t size)
{
- return mysys_namespace::crc32c::Extend(crc,buf, size);
+ return mysys_namespace::crc32c::ChosenExtend(crc,buf, size);
}
diff --git a/mysys/crc32/crc32c_amd64.cc b/mysys/crc32/crc32c_amd64.cc
index 22c492b4..147c0cca 100644
--- a/mysys/crc32/crc32c_amd64.cc
+++ b/mysys/crc32/crc32c_amd64.cc
@@ -47,6 +47,11 @@
#include <nmmintrin.h>
#include <wmmintrin.h>
+#ifdef _MSC_VER
+# define USE_PCLMUL /* nothing */
+#else
+# define USE_PCLMUL __attribute__((target("sse4.2,pclmul")))
+#endif
#define CRCtriplet(crc, buf, offset) \
crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
@@ -131,6 +136,7 @@ static const uint64_t clmul_constants alignas(16) [] = {
};
// Compute the crc32c value for buffer smaller than 8
+USE_PCLMUL
static inline void align_to_8(
size_t len,
uint64_t& crc0, // crc so far, updated on return
@@ -155,6 +161,7 @@ static inline void align_to_8(
// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
// chosen constant and xor's these with the remaining CRC.
//
+USE_PCLMUL
static inline uint64_t CombineCRC(
size_t block_size,
uint64_t crc0,
@@ -176,6 +183,7 @@ static inline uint64_t CombineCRC(
// Compute CRC-32C using the Intel hardware instruction.
extern "C"
+USE_PCLMUL
uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len)
{
const unsigned char* next = (const unsigned char*)buf;
diff --git a/mysys/crc32/crc32c_ppc.h b/mysys/crc32/crc32c_ppc.h
index c359061c..797e849b 100644
--- a/mysys/crc32/crc32c_ppc.h
+++ b/mysys/crc32/crc32c_ppc.h
@@ -11,8 +11,7 @@
extern "C" {
#endif
-extern uint32_t crc32c_ppc(uint32_t crc, unsigned char const *buffer,
- unsigned len);
+extern unsigned crc32c_ppc(unsigned crc, const void *buffer, size_t len);
#ifdef __cplusplus
}
diff --git a/mysys/crc32/crc32c_x86.cc b/mysys/crc32/crc32c_x86.cc
new file mode 100644
index 00000000..02dbf292
--- /dev/null
+++ b/mysys/crc32/crc32c_x86.cc
@@ -0,0 +1,457 @@
+/* Copyright (c) 2024, MariaDB plc
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#include <my_global.h>
+#include <cstddef>
+#include <cstdint>
+
+#ifdef _MSC_VER
+# include <intrin.h>
+# if 0 /* So far, we have no environment where this could be tested. */
+# define USE_VPCLMULQDQ /* nothing */
+# endif
+#else
+# include <cpuid.h>
+# if __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 8)
+# define TARGET "pclmul,avx512f,avx512dq,avx512bw,avx512vl,vpclmulqdq"
+# define USE_VPCLMULQDQ __attribute__((target(TARGET)))
+# endif
+#endif
+
+extern "C" unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size);
+
+constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
+constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U << 1;
+
+static uint32_t cpuid_ecx()
+{
+#ifdef __GNUC__
+ uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
+ __cpuid(1, reax, rebx, recx, redx);
+ return recx;
+#elif defined _MSC_VER
+ int regs[4];
+ __cpuid(regs, 1);
+ return regs[2];
+#else
+# error "unknown compiler"
+#endif
+}
+
+typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t);
+extern "C" unsigned int crc32_pclmul(unsigned int, const void *, size_t);
+extern "C" unsigned int crc32c_3way(unsigned int, const void *, size_t);
+
+#ifdef USE_VPCLMULQDQ
+# include <immintrin.h>
+
+# ifdef _MSC_VER
+/* MSVC does not seem to define this intrinsic for vmovdqa */
+# define _mm_load_epi32(x) *reinterpret_cast<const __m128i*>(x)
+# endif
+
+/*
+ This implementation is based on
+ crc32_by16_vclmul_avx512 and crc32_refl_by16_vclmul_avx512
+ in https://github.com/intel/intel-ipsec-mb/ with some optimizations.
+ The // comments in crc32_avx512() correspond to assembler labels.
+*/
+
+/** table of constants corresponding to a CRC polynomial up to degree 32 */
+struct alignas(64) crc32_tab
+{
+ const uint64_t b2048[2], b1024[2];
+ alignas(64) const uint64_t b896[6]; /* includes b786, b640 */
+ const uint64_t b512[2];
+ const uint64_t b384[2], b256[2], b128[2], zeropad_for_b384[2];
+ const uint64_t b64[2], b32[2];
+};
+
+/** ISO 3309 CRC-32 (reflected polynomial 0x04C11DB7); zlib crc32() */
+static const crc32_tab refl32 = {
+ { 0x00000000e95c1271, 0x00000000ce3371cb },
+ { 0x00000000910eeec1, 0x0000000033fff533 },
+ { 0x000000000cbec0ed, 0x0000000031f8303f,
+ 0x0000000057c54819, 0x00000000df068dc2,
+ 0x00000000ae0b5394, 0x000000001c279815 },
+ { 0x000000001d9513d7, 0x000000008f352d95 },
+ { 0x00000000af449247, 0x000000003db1ecdc },
+ { 0x0000000081256527, 0x00000000f1da05aa },
+ { 0x00000000ccaa009e, 0x00000000ae689191 },
+ { 0, 0 },
+ { 0x00000000ccaa009e, 0x00000000b8bc6765 },
+ { 0x00000001f7011640, 0x00000001db710640 }
+};
+
+/** Castagnoli CRC-32C (reflected polynomial 0x1EDC6F41) */
+static const crc32_tab refl32c = {
+ { 0x00000000b9e02b86, 0x00000000dcb17aa4 },
+ { 0x000000000d3b6092, 0x000000006992cea2 },
+ { 0x0000000047db8317, 0x000000002ad91c30,
+ 0x000000000715ce53, 0x00000000c49f4f67,
+ 0x0000000039d3b296, 0x00000000083a6eec },
+ { 0x000000009e4addf8, 0x00000000740eef02 },
+ { 0x00000000ddc0152b, 0x000000001c291d04 },
+ { 0x00000000ba4fc28e, 0x000000003da6d0cb },
+ { 0x00000000493c7d27, 0x00000000f20c0dfe },
+ { 0, 0 },
+ { 0x00000000493c7d27, 0x00000000dd45aab8 },
+ { 0x00000000dea713f0, 0x0000000105ec76f0 }
+};
+
+/** Some ternary functions */
+class ternary
+{
+ static constexpr uint8_t A = 0b11110000;
+ static constexpr uint8_t B = 0b11001100;
+ static constexpr uint8_t C = 0b10101010;
+public:
+ static constexpr uint8_t XOR3 = A ^ B ^ C;
+ static constexpr uint8_t XNOR3 = uint8_t(~(A ^ B ^ C));
+ static constexpr uint8_t XOR2_AND = (A ^ B) & C;
+};
+
+USE_VPCLMULQDQ
+/** @return a^b^c */
+static inline __m128i xor3_128(__m128i a, __m128i b, __m128i c)
+{
+ return _mm_ternarylogic_epi64(a, b, c, ternary::XOR3);
+}
+
+USE_VPCLMULQDQ
+/** @return ~(a^b^c) */
+static inline __m128i xnor3_128(__m128i a, __m128i b, __m128i c)
+{
+ return _mm_ternarylogic_epi64(a, b, c, ternary::XNOR3);
+}
+
+USE_VPCLMULQDQ
+/** @return a^b^c */
+static inline __m512i xor3_512(__m512i a, __m512i b, __m512i c)
+{
+ return _mm512_ternarylogic_epi64(a, b, c, ternary::XOR3);
+}
+
+USE_VPCLMULQDQ
+/** @return (a^b)&c */
+static inline __m128i xor2_and_128(__m128i a, __m128i b, __m128i c)
+{
+ return _mm_ternarylogic_epi64(a, b, c, ternary::XOR2_AND);
+}
+
+USE_VPCLMULQDQ
+/** Load 64 bytes */
+static inline __m512i load512(const char *b) { return _mm512_loadu_epi8(b); }
+
+USE_VPCLMULQDQ
+/** Load 16 bytes */
+static inline __m128i load128(const char *b) { return _mm_loadu_epi64(b); }
+
+/** Combine 512 data bits with CRC */
+USE_VPCLMULQDQ
+static inline __m512i combine512(__m512i a, __m512i tab, __m512i b)
+{
+ return xor3_512(b, _mm512_clmulepi64_epi128(a, tab, 0x01),
+ _mm512_clmulepi64_epi128(a, tab, 0x10));
+}
+
+# define xor512(a, b) _mm512_xor_epi64(a, b)
+# define xor256(a, b) _mm256_xor_epi64(a, b)
+# define xor128(a, b) _mm_xor_epi64(a, b)
+# define and128(a, b) _mm_and_si128(a, b)
+
+template<uint8_t bits> USE_VPCLMULQDQ
+/** Pick a 128-bit component of a 512-bit vector */
+static inline __m512i extract512_128(__m512i a)
+{
+ static_assert(bits <= 3, "usage");
+# if defined __GNUC__ && __GNUC__ >= 11
+ /* While technically incorrect, this would seem to translate into a
+ vextracti32x4 instruction, which actually outputs a ZMM register
+ (anything above the XMM range is cleared). */
+ return _mm512_castsi128_si512(_mm512_extracti64x2_epi64(a, bits));
+# else
+ /* On clang, this is needed in order to get a correct result. */
+ return _mm512_maskz_shuffle_i64x2(3, a, a, bits);
+# endif
+}
+
+alignas(16) static const uint64_t shuffle128[4] = {
+ 0x8786858483828100, 0x8f8e8d8c8b8a8988,
+ 0x0706050403020100, 0x000e0d0c0b0a0908
+};
+
+static const __mmask16 size_mask[16] = {
+ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+alignas(16) static const uint64_t shift128[4] = {
+ 0x8786858483828100, 0x8f8e8d8c8b8a8988,
+ 0x0706050403020100, 0x000e0d0c0b0a0908
+};
+
+static const char shift_1_to_3_reflect[7 + 11] = {
+ -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
+};
+
+USE_VPCLMULQDQ
+static unsigned crc32_avx512(unsigned crc, const char *buf, size_t size,
+ const crc32_tab &tab)
+{
+ const __m512i crc_in = _mm512_castsi128_si512(_mm_cvtsi32_si128(~crc)),
+ b512 = _mm512_broadcast_i32x4(_mm_load_epi32(tab.b512));
+ __m128i crc_out;
+ __m512i lo;
+
+ if (size >= 256) {
+ lo = xor512(load512(buf), crc_in);
+ __m512i l1 = load512(buf + 64);
+
+ const __m512i b1024 = _mm512_broadcast_i32x4(_mm_load_epi32(&tab.b1024));
+ size -= 256;
+ if (size >= 256) {
+ __m512i h0 = load512(buf + 128),
+ hi = load512(buf + 192);
+ const __m512i b2048 = _mm512_broadcast_i32x4(_mm_load_epi32(&tab.b2048));
+ size -= 256;
+ do {
+ buf += 256;
+ lo = combine512(lo, b2048, load512(buf));
+ l1 = combine512(l1, b2048, load512(buf + 64));
+ h0 = combine512(h0, b2048, load512(buf + 128));
+ hi = combine512(hi, b2048, load512(buf + 192));
+ size -= 256;
+ } while (ssize_t(size) >= 0);
+
+ buf += 256;
+ lo = combine512(lo, b1024, h0);
+ l1 = combine512(l1, b1024, hi);
+ size += 128;
+ } else {
+ do {
+ buf += 128;
+ lo = combine512(lo, b1024, load512(buf));
+ l1 = combine512(l1, b1024, load512(buf + 64));
+ size -= 128;
+ } while (ssize_t(size) >= 0);
+
+ buf += 128;
+ }
+
+ if (ssize_t(size) >= -64) {
+ size += 128;
+ lo = combine512(lo, b512, l1);
+ goto fold_64_B_loop;
+ }
+
+ const __m512i
+ b896 = _mm512_load_epi32(&tab.b896),
+ b384 = _mm512_load_epi32(&tab.b384);
+
+ __m512i c4 = xor3_512(_mm512_clmulepi64_epi128(lo, b896, 1),
+ _mm512_clmulepi64_epi128(lo, b896, 0x10),
+ _mm512_clmulepi64_epi128(l1, b384, 1));
+ c4 = xor3_512(c4, _mm512_clmulepi64_epi128(l1, b384, 0x10),
+ extract512_128<3>(l1));
+
+ __m256i c2 = _mm512_castsi512_si256(_mm512_shuffle_i64x2(c4, c4, 0b01001110));
+ c2 = xor256(c2, _mm512_castsi512_si256(c4));
+ crc_out = xor128(_mm256_extracti64x2_epi64(c2, 1),
+ _mm256_castsi256_si128(c2));
+ size += 128 - 16;
+ goto final_reduction;
+ }
+
+ __m128i b;
+
+ // less_than_256
+ if (size >= 32) {
+ if (size >= 64) {
+ lo = xor512(load512(buf), crc_in);
+
+ while (buf += 64, (size -= 64) >= 64)
+ fold_64_B_loop:
+ lo = combine512(lo, b512, load512(buf));
+
+ // reduce_64B
+ const __m512i b384 = _mm512_load_epi32(&tab.b384);
+ __m512i crc512 =
+ xor3_512(_mm512_clmulepi64_epi128(lo, b384, 1),
+ _mm512_clmulepi64_epi128(lo, b384, 0x10),
+ extract512_128<3>(lo));
+ crc512 = xor512(crc512, _mm512_shuffle_i64x2(crc512, crc512, 0b01001110));
+ const __m256i crc256 = _mm512_castsi512_si256(crc512);
+ crc_out = xor128(_mm256_extracti64x2_epi64(crc256, 1),
+ _mm256_castsi256_si128(crc256));
+ size -= 16;
+ } else {
+ // less_than_64
+ crc_out = xor128(load128(buf),
+ _mm512_castsi512_si128(crc_in));
+ buf += 16;
+ size -= 32;
+ }
+
+ final_reduction:
+ b = _mm_load_epi32(&tab.b128);
+
+ while (ssize_t(size) >= 0) {
+ // reduction_loop_16B
+ crc_out = xor3_128(load128(buf),
+ _mm_clmulepi64_si128(crc_out, b, 1),
+ _mm_clmulepi64_si128(crc_out, b, 0x10));
+ buf += 16;
+ size -= 16;
+ }
+ // final_reduction_for_128
+
+ size += 16;
+ if (size) {
+ get_last_two_xmms:
+ const __m128i crc2 = crc_out, d = load128(buf + (size - 16));
+ __m128i S = load128(reinterpret_cast<const char*>(shuffle128) + size);
+ crc_out = _mm_shuffle_epi8(crc_out, S);
+ S = xor128(S, _mm_set1_epi32(0x80808080));
+ crc_out = xor3_128(_mm_blendv_epi8(_mm_shuffle_epi8(crc2, S), d, S),
+ _mm_clmulepi64_si128(crc_out, b, 1),
+ _mm_clmulepi64_si128(crc_out, b, 0x10));
+ }
+
+ done_128:
+ __m128i crc_tmp;
+ b = _mm_load_epi32(&tab.b64);
+ crc_tmp = xor128(_mm_clmulepi64_si128(crc_out, b, 0x00),
+ _mm_srli_si128(crc_out, 8));
+ crc_out = _mm_slli_si128(crc_tmp, 4);
+ crc_out = _mm_clmulepi64_si128(crc_out, b, 0x10);
+ crc_out = xor128(crc_out, crc_tmp);
+
+ barrett:
+ b = _mm_load_epi32(&tab.b32);
+ crc_tmp = crc_out;
+ crc_out = and128(crc_out, _mm_set_epi64x(~0ULL, ~0xFFFFFFFFULL));
+ crc_out = _mm_clmulepi64_si128(crc_out, b, 0);
+ crc_out = xor2_and_128(crc_out, crc_tmp, _mm_set_epi64x(0, ~0ULL));
+ crc_out = xnor3_128(crc_out, crc_tmp,
+ _mm_clmulepi64_si128(crc_out, b, 0x10));
+ return _mm_extract_epi32(crc_out, 2);
+ } else {
+ // less_than_32
+ if (size > 0) {
+ if (size > 16) {
+ crc_out = xor128(load128(buf),
+ _mm512_castsi512_si128(crc_in));
+ buf += 16;
+ size -= 16;
+ b = _mm_load_epi32(&tab.b128);
+ goto get_last_two_xmms;
+ } else if (size < 16) {
+ crc_out = _mm_maskz_loadu_epi8(size_mask[size - 1], buf);
+ crc_out = xor128(crc_out, _mm512_castsi512_si128(crc_in));
+
+ if (size >= 4) {
+ crc_out = _mm_shuffle_epi8
+ (crc_out,
+ load128(reinterpret_cast<const char*>(shift128) + size));
+ goto done_128;
+ } else {
+ // only_less_than_4
+ /* Shift, zero-filling 5 to 7 of the 8-byte crc_out */
+ crc_out = _mm_shuffle_epi8(crc_out,
+ load128(shift_1_to_3_reflect + size - 1));
+ goto barrett;
+ }
+ } else {
+ crc_out = xor128(load128(buf), _mm512_castsi512_si128(crc_in));
+ goto done_128;
+ }
+ } else
+ return crc;
+ }
+}
+
+static ATTRIBUTE_NOINLINE int have_vpclmulqdq()
+{
+# ifdef _MSC_VER
+ int regs[4];
+ __cpuidex(regs, 7, 0);
+ uint32_t ebx = regs[1], ecx = regs[2];
+# else
+ uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
+ __cpuid_count(7, 0, eax, ebx, ecx, edx);
+# endif
+ return ecx & 1U<<10/*VPCLMULQDQ*/ &&
+ !(~ebx & ((1U<<16/*AVX512F*/ | 1U<<17/*AVX512DQ*/ |
+ 1U<<30/*AVX512BW*/ | 1U<<31/*AVX512VL*/)));
+}
+
+static unsigned crc32_vpclmulqdq(unsigned crc, const void *buf, size_t size)
+{
+ return crc32_avx512(crc, static_cast<const char*>(buf), size, refl32);
+}
+
+static unsigned crc32c_vpclmulqdq(unsigned crc, const void *buf, size_t size)
+{
+ return crc32_avx512(crc, static_cast<const char*>(buf), size, refl32c);
+}
+#endif
+
+extern "C" my_crc32_t crc32_pclmul_enabled(void)
+{
+ if (~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL)
+ return nullptr;
+#ifdef USE_VPCLMULQDQ
+ if (have_vpclmulqdq())
+ return crc32_vpclmulqdq;
+#endif
+ return crc32_pclmul;
+}
+
+extern "C" my_crc32_t crc32c_x86_available(void)
+{
+#ifdef USE_VPCLMULQDQ
+ if (have_vpclmulqdq())
+ return crc32c_vpclmulqdq;
+#endif
+#if SIZEOF_SIZE_T == 8
+ switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) {
+ case cpuid_ecx_SSE42_AND_PCLMUL:
+ return crc32c_3way;
+ case cpuid_ecx_SSE42:
+ return crc32c_sse42;
+ }
+#else
+ if (cpuid_ecx() & cpuid_ecx_SSE42)
+ return crc32c_sse42;
+#endif
+ return nullptr;
+}
+
+extern "C" const char *crc32c_x86_impl(my_crc32_t c)
+{
+#ifdef USE_VPCLMULQDQ
+ if (c == crc32c_vpclmulqdq)
+ return "Using AVX512 instructions";
+#endif
+#if SIZEOF_SIZE_T == 8
+ if (c == crc32c_3way)
+ return "Using crc32 + pclmulqdq instructions";
+#endif
+ if (c == crc32c_sse42)
+ return "Using SSE4.2 crc32 instructions";
+ return nullptr;
+}
diff --git a/mysys/crc32/crc_ppc64.h b/mysys/crc32/crc_ppc64.h
index eb9379ab..81bbc16d 100644
--- a/mysys/crc32/crc_ppc64.h
+++ b/mysys/crc32/crc_ppc64.h
@@ -28,7 +28,7 @@
* any later version, or
* b) the Apache License, Version 2.0
*/
-
+#include <stddef.h>
#include <altivec.h>
@@ -57,12 +57,13 @@ static unsigned int __attribute__ ((aligned (32)))
__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len);
-unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
- unsigned long len)
+unsigned CRC32_FUNCTION(unsigned crc, const void *buffer, size_t len)
{
unsigned int prealign;
unsigned int tail;
+ const unsigned char *p = buffer;
+
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
diff --git a/mysys/crc32ieee.cc b/mysys/crc32ieee.cc
index 14e8017d..c11bdacf 100644
--- a/mysys/crc32ieee.cc
+++ b/mysys/crc32ieee.cc
@@ -26,23 +26,22 @@ static unsigned int my_crc32_zlib(unsigned int crc, const void *data,
return (unsigned int) crc32(crc, (const Bytef *)data, (unsigned int) len);
}
-#ifdef HAVE_PCLMUL
-extern "C" int crc32_pclmul_enabled();
-extern "C" unsigned int crc32_pclmul(unsigned int, const void *, size_t);
-#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
+typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t);
+
+#if defined _M_IX86 || defined _M_X64 || defined __i386__ || defined __x86_64__
+extern "C" my_crc32_t crc32_pclmul_enabled();
+#elif defined HAVE_ARMV8_CRC
extern "C" int crc32_aarch64_available();
extern "C" unsigned int crc32_aarch64(unsigned int, const void *, size_t);
#endif
-typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t);
-
static my_crc32_t init_crc32()
{
-#ifdef HAVE_PCLMUL
- if (crc32_pclmul_enabled())
- return crc32_pclmul;
-#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
+#if defined _M_IX86 || defined _M_X64 || defined __i386__ || defined __x86_64__
+ if (my_crc32_t crc= crc32_pclmul_enabled())
+ return crc;
+#elif defined HAVE_ARMV8_CRC
if (crc32_aarch64_available())
return crc32_aarch64;
#endif
diff --git a/mysys/errors.c b/mysys/errors.c
index d88540fe..a8be37bf 100644
--- a/mysys/errors.c
+++ b/mysys/errors.c
@@ -112,6 +112,13 @@ void init_glob_errs()
}
#endif
+static void my_space_sleep(uint seconds)
+{
+ sleep(seconds);
+}
+
+void (*my_sleep_for_space)(uint seconds)= my_space_sleep;
+
void wait_for_free_space(const char *filename, int errors)
{
if (errors == 0)
@@ -123,7 +130,7 @@ void wait_for_free_space(const char *filename, int errors)
MYF(ME_BELL | ME_ERROR_LOG | ME_WARNING),
MY_WAIT_FOR_USER_TO_FIX_PANIC,
MY_WAIT_GIVE_USER_A_MESSAGE * MY_WAIT_FOR_USER_TO_FIX_PANIC );
- (void) sleep(MY_WAIT_FOR_USER_TO_FIX_PANIC);
+ my_sleep_for_space(MY_WAIT_FOR_USER_TO_FIX_PANIC);
}
const char **get_global_errmsgs(int nr __attribute__((unused)))
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
index fc3f320a..4dc41645 100644
--- a/mysys/lf_alloc-pin.c
+++ b/mysys/lf_alloc-pin.c
@@ -291,7 +291,7 @@ static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
{
for (i= 0; i < LF_PINBOX_PINS; i++)
{
- void *p= el->pin[i];
+ void *p= my_atomic_loadptr((void **)&el->pin[i]);
if (p)
*hv->granary++= p;
}
@@ -316,7 +316,7 @@ static int match_pins(LF_PINS *el, void *addr)
LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH;
for (; el < el_end; el++)
for (i= 0; i < LF_PINBOX_PINS; i++)
- if (el->pin[i] == addr)
+ if (my_atomic_loadptr((void **)&el->pin[i]) == addr)
return 1;
return 0;
}
@@ -501,7 +501,8 @@ void *lf_alloc_new(LF_PINS *pins)
{
node= allocator->top;
lf_pin(pins, 0, node);
- } while (node != allocator->top && LF_BACKOFF());
+ } while (node != my_atomic_loadptr((void **)(char *)&allocator->top)
+ && LF_BACKOFF());
if (!node)
{
node= (void *)my_malloc(key_memory_lf_node, allocator->element_size,
diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c
index 0f1c6d6b..3393b610 100644
--- a/mysys/mf_tempfile.c
+++ b/mysys/mf_tempfile.c
@@ -66,7 +66,11 @@ File create_temp_file(char *to, const char *dir, const char *prefix,
DBUG_ENTER("create_temp_file");
DBUG_PRINT("enter", ("dir: %s, prefix: %s", dir ? dir : "(null)", prefix));
+#if !defined _WIN32 && defined O_DIRECT
+ DBUG_ASSERT((mode & (O_EXCL | O_TRUNC | O_CREAT | O_RDWR | O_DIRECT)) == 0);
+#else
DBUG_ASSERT((mode & (O_EXCL | O_TRUNC | O_CREAT | O_RDWR)) == 0);
+#endif
mode|= O_TRUNC | O_CREAT | O_RDWR; /* not O_EXCL, see Windows code below */
@@ -118,16 +122,41 @@ File create_temp_file(char *to, const char *dir, const char *prefix,
if ((MyFlags & MY_TEMPORARY) && O_TMPFILE_works)
{
- /* explictly don't use O_EXCL here has it has a different
- meaning with O_TMPFILE
+ /*
+ explicitly don't use O_EXCL here has it has a different
+ meaning with O_TMPFILE
*/
- if ((file= open(dir, (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) >= 0)
+ const int flags= (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC;
+ const mode_t open_mode= S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+# ifdef O_DIRECT
+ static int O_TMPFILE_works_with_O_DIRECT= O_DIRECT;
+ const int try_O_DIRECT= mode & O_TMPFILE_works_with_O_DIRECT;
+ if (try_O_DIRECT)
+ file= open(dir, flags | O_DIRECT, open_mode);
+ else
+# endif
+ file= open(dir, flags, open_mode);
+
+ if (file >= 0)
{
+# ifdef O_DIRECT
+ O_TMPFILE_works:
+# endif
my_snprintf(to, FN_REFLEN, "%s/#sql/fd=%d", dir, file);
file=my_register_filename(file, to, FILE_BY_O_TMPFILE,
EE_CANTCREATEFILE, MyFlags);
}
+# ifdef O_DIRECT
+ else if (errno == EINVAL && try_O_DIRECT)
+ {
+ file= open(dir, flags, open_mode);
+ if (file >= 0)
+ {
+ O_TMPFILE_works_with_O_DIRECT= 0;
+ goto O_TMPFILE_works;
+ }
+ }
+# endif
else if (errno == EOPNOTSUPP || errno == EINVAL)
{
my_printf_error(EE_CANTCREATEFILE, "O_TMPFILE is not supported on %s "
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 9893c7e4..c9bbcc4b 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -13,17 +13,28 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335
+ USA
+ */
/*
- Handling of uchar arrays as large bitmaps.
+ Handling of my_bitmap_map (ulonglong) arrays as large bitmaps.
API limitations (or, rather asserted safety assumptions,
to encourage correct programming)
- * the internal size is a set of 32 bit words
+ * the internal storage is a set of 64 bit words
* the number of bits specified in creation can be any number > 0
+ Implementation notes:
+ * MY_BITMAP includes a pointer, last_word_ptr, to the last word.
+ The implication is that if one copies bitmaps to another memory
+ location, one has to call create_last_bit_mask() on the bitmap to
+ fix the internal pointer.
+ * The not used part of a the last word should always be 0.
+ This avoids special handling of the last bitmap in several cases.
+ This is checked for most calls to bitmap functions.
+
TODO:
Make assembler thread safe versions of these using test-and-set instructions
@@ -31,117 +42,97 @@
New version written and test program added and some changes to the interface
was made by Mikael Ronstrom 2005, with assistance of Tomas Ulin and Mats
Kindahl.
+ Updated to 64 bits and use my_find_first_bit() to speed up
+ bitmap_get_next_set() by Monty in 2024
*/
#include "mysys_priv.h"
#include <my_bitmap.h>
#include <m_string.h>
#include <my_bit.h>
+#include <my_byteorder.h>
+
+
+/* Defines to check bitmaps */
+
+#define DBUG_ASSERT_BITMAP(M) \
+ DBUG_ASSERT((M)->bitmap); \
+ DBUG_ASSERT((M)->n_bits > 0); \
+ DBUG_ASSERT((M)->last_word_ptr == (M)->bitmap + no_words_in_map(M)-1); \
+ DBUG_ASSERT((*(M)->last_word_ptr & (M)->last_bit_mask) == 0);
+
+#define DBUG_ASSERT_BITMAP_AND_BIT(M,B) \
+ DBUG_ASSERT_BITMAP(M); \
+ DBUG_ASSERT((B) < (M)->n_bits);
+
+#define DBUG_ASSERT_DIFFERENT_BITMAPS(M,N) \
+ DBUG_ASSERT_BITMAP(M); \
+ DBUG_ASSERT_BITMAP(N);
+
+#define DBUG_ASSERT_IDENTICAL_BITMAPS(M,N) \
+ DBUG_ASSERT_BITMAP(M); \
+ DBUG_ASSERT_BITMAP(N); \
+ DBUG_ASSERT((M)->n_bits == (N)->n_bits);
/*
- Create a mask with the upper 'unused' bits set and the lower 'used'
- bits clear. The bits within each byte is stored in big-endian order.
+ Create a mask for the usable bits on the LAST my_bitmap_map position for
+ a bitmap with 'bits' number of bits.
+
+ The lowest 'bits' bits are set to zero and the rest bits are set to 1.
+ For (bits & 63) == 0 , 0 is returned as in this case all bits in the
+ my_bitmap_position are significant. (This example assumes the
+ storage is ulonglong).
+
+ For 'bits & 63' it will return values from the series
+ 0, 0xfffffffffffffffe,.... 0x8000000000000000
*/
-static inline uchar invers_last_byte_mask(uint bits)
-{
- return last_byte_mask(bits) ^ 255;
-}
-
-
-void create_last_word_mask(MY_BITMAP *map)
-{
- unsigned char const mask= invers_last_byte_mask(map->n_bits);
-
- /*
- The first bytes are to be set to zero since they represent real bits
- in the bitvector. The last bytes are set to 0xFF since they represent
- bytes not used by the bitvector. Finally the last byte contains bits
- as set by the mask above.
- */
- unsigned char *ptr= (unsigned char*)&map->last_word_mask;
-
- map->last_word_ptr= map->bitmap + no_words_in_map(map)-1;
- switch (no_bytes_in_map(map) & 3) {
- case 1:
- map->last_word_mask= ~0U;
- ptr[0]= mask;
- return;
- case 2:
- map->last_word_mask= ~0U;
- ptr[0]= 0;
- ptr[1]= mask;
- return;
- case 3:
- map->last_word_mask= 0U;
- ptr[2]= mask;
- ptr[3]= 0xFFU;
- return;
- case 0:
- map->last_word_mask= 0U;
- ptr[3]= mask;
- return;
- }
+static inline my_bitmap_map last_bit_mask(uint bits)
+{
+ uint bits_in_last_map= (bits & (my_bitmap_map_bits-1));
+ return bits_in_last_map ? ~((1ULL << bits_in_last_map)-1) : 0ULL;
}
-static inline my_bitmap_map last_word_mask(uint bit)
-{
- my_bitmap_map last_word_mask;
- uint n_bits= bit + 1;
- unsigned char const mask= invers_last_byte_mask(n_bits);
-
- /*
- The first bytes are to be set to zero since they represent real bits
- in the bitvector. The last bytes are set to 0xFF since they represent
- bytes not used by the bitvector. Finally the last byte contains bits
- as set by the mask above.
- */
- unsigned char *ptr= (unsigned char*)&last_word_mask;
-
- switch ((n_bits + 7)/8 & 3) {
- case 1:
- last_word_mask= ~0U;
- ptr[0]= mask;
- break;
- case 2:
- last_word_mask= ~0U;
- ptr[0]= 0;
- ptr[1]= mask;
- break;
- case 3:
- last_word_mask= 0U;
- ptr[2]= mask;
- ptr[3]= 0xFFU;
- break;
- case 0:
- last_word_mask= 0U;
- ptr[3]= mask;
- break;
- }
- return last_word_mask;
-}
+/*
+ Get a mask of the bits that are to be considered as 'on' at location
+ starting with 'bits'.
+ This function has _inv in it's name as it's usage is invers compared
+ to last_bit_mask().
+ For (bits & 63) it will return values from the series
+ 0xffffffffffffffff, 0xfffffffffffffffe,.... 0x8000000000000000
+*/
-static inline uint get_first_set(my_bitmap_map value, uint word_pos)
+static inline my_bitmap_map first_bit_mask_inv(uint bits)
{
- uchar *byte_ptr= (uchar*)&value;
- uchar byte_value;
- uint byte_pos, bit_pos;
+ uint bits_in_last_map= (bits & (my_bitmap_map_bits-1));
+ return ~((1ULL << bits_in_last_map)-1);
+}
+
+
+/*
+ Update the bitmap's last_word_ptr and last_bit_mask
+ Also ensure that the last world is all zero to make it
+ easy to find the next set bit.
+
+ Note that if n_bits is 0, then last_word_ptr will point to
+ bitmap (safely). The bitmap will not be usable for almost any operation.
+*/
- DBUG_ASSERT(value);
- for (byte_pos=0; ; byte_pos++, byte_ptr++)
+void create_last_bit_mask(MY_BITMAP *map)
+{
+ my_bitmap_map mask= last_bit_mask(map->n_bits);
+ map->last_bit_mask= mask;
+ map->last_word_ptr= map->bitmap + MY_MAX(no_words_in_map(map),1) -1;
+ if (map->n_bits > 0)
{
- if ((byte_value= *byte_ptr))
- {
- for (bit_pos=0; ; bit_pos++)
- if (byte_value & (1 << bit_pos))
- return (word_pos*32) + (byte_pos*8) + bit_pos;
- }
+ *map->last_word_ptr&= ~mask; /* Set not used bits to 0 */
+ DBUG_ASSERT_BITMAP(map);
}
- return MY_BIT_NONE; /* Impossible */
}
+
/*
Initialize a bitmap object. All bits will be set to zero
*/
@@ -149,17 +140,24 @@ static inline uint get_first_set(my_bitmap_map value, uint word_pos)
my_bool my_bitmap_init(MY_BITMAP *map, my_bitmap_map *buf, uint n_bits)
{
DBUG_ENTER("my_bitmap_init");
+
if (!buf)
{
uint size_in_bytes= bitmap_buffer_size(n_bits);
if (!(buf= (my_bitmap_map*) my_malloc(key_memory_MY_BITMAP_bitmap,
size_in_bytes, MYF(MY_WME))))
+ {
+ map->bitmap= 0;
DBUG_RETURN(1);
+ }
+ map->bitmap_allocated= 1;
}
+ else
+ map->bitmap_allocated= 0;
map->bitmap= buf;
map->n_bits= n_bits;
- create_last_word_mask(map);
+ create_last_bit_mask(map);
bitmap_clear_all(map);
DBUG_RETURN(0);
}
@@ -170,7 +168,8 @@ void my_bitmap_free(MY_BITMAP *map)
DBUG_ENTER("my_bitmap_free");
if (map->bitmap)
{
- my_free(map->bitmap);
+ if (map->bitmap_allocated)
+ my_free(map->bitmap);
map->bitmap=0;
}
DBUG_VOID_RETURN;
@@ -192,11 +191,14 @@ void my_bitmap_free(MY_BITMAP *map)
my_bool bitmap_fast_test_and_set(MY_BITMAP *map, uint bitmap_bit)
{
- uchar *value= ((uchar*) map->bitmap) + (bitmap_bit / 8);
- uchar bit= 1 << ((bitmap_bit) & 7);
- uchar res= (*value) & bit;
+ my_bitmap_map *value, bit, res;
+ DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit);
+
+ value= map->bitmap + (bitmap_bit/my_bitmap_map_bits);
+ bit= 1ULL << (bitmap_bit & (my_bitmap_map_bits-1));
+ res= *value & bit;
*value|= bit;
- return res;
+ return MY_TEST(res);
}
@@ -215,8 +217,7 @@ my_bool bitmap_fast_test_and_set(MY_BITMAP *map, uint bitmap_bit)
my_bool bitmap_test_and_set(MY_BITMAP *map, uint bitmap_bit)
{
- DBUG_ASSERT(map->bitmap);
- DBUG_ASSERT(bitmap_bit < map->n_bits);
+ DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit);
return bitmap_fast_test_and_set(map, bitmap_bit);
}
@@ -235,18 +236,20 @@ my_bool bitmap_test_and_set(MY_BITMAP *map, uint bitmap_bit)
my_bool bitmap_fast_test_and_clear(MY_BITMAP *map, uint bitmap_bit)
{
- uchar *byte= (uchar*) map->bitmap + (bitmap_bit / 8);
- uchar bit= 1 << ((bitmap_bit) & 7);
- uchar res= (*byte) & bit;
- *byte&= ~bit;
- return res;
+ my_bitmap_map *value, bit, res;
+ DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit);
+
+ value= map->bitmap + (bitmap_bit/my_bitmap_map_bits);
+ bit= 1ULL << (bitmap_bit & (my_bitmap_map_bits-1));
+ res= *value & bit;
+ *value&= ~bit;
+ return MY_TEST(res);
}
my_bool bitmap_test_and_clear(MY_BITMAP *map, uint bitmap_bit)
{
- DBUG_ASSERT(map->bitmap);
- DBUG_ASSERT(bitmap_bit < map->n_bits);
+ DBUG_ASSERT_BITMAP_AND_BIT(map, bitmap_bit);
return bitmap_fast_test_and_clear(map, bitmap_bit);
}
@@ -254,8 +257,8 @@ my_bool bitmap_test_and_clear(MY_BITMAP *map, uint bitmap_bit)
uint bitmap_set_next(MY_BITMAP *map)
{
uint bit_found;
- DBUG_ASSERT(map->bitmap);
- if ((bit_found= bitmap_get_first(map)) != MY_BIT_NONE)
+ DBUG_ASSERT_BITMAP(map);
+ if ((bit_found= bitmap_get_first_clear(map)) != MY_BIT_NONE)
bitmap_set_bit(map, bit_found);
return bit_found;
}
@@ -265,58 +268,69 @@ uint bitmap_set_next(MY_BITMAP *map)
Set the specified number of bits in the bitmap buffer.
@param map [IN] Bitmap
- @param prefix_size [IN] Number of bits to be set
+ @param prefix_size [IN] Number of bits to be set or (uint) ~0 for all
*/
+
void bitmap_set_prefix(MY_BITMAP *map, uint prefix_size)
{
- uint prefix_bytes, prefix_bits, d;
- uchar *m= (uchar *)map->bitmap;
-
- DBUG_ASSERT(map->bitmap);
+ uint prefix, prefix_bits;
+ my_bitmap_map *value= map->bitmap;
+ DBUG_ASSERT_BITMAP(map);
DBUG_ASSERT(prefix_size <= map->n_bits || prefix_size == (uint) ~0);
set_if_smaller(prefix_size, map->n_bits);
- if ((prefix_bytes= prefix_size / 8))
- memset(m, 0xff, prefix_bytes);
- m+= prefix_bytes;
- if ((prefix_bits= prefix_size & 7))
+
+ if ((prefix= prefix_size / my_bitmap_map_bits))
{
- *(m++)= (1 << prefix_bits)-1;
- // As the prefix bits are set, lets count this byte too as a prefix byte.
- prefix_bytes ++;
+ my_bitmap_map *end= value+prefix;
+ do
+ {
+ *value++= ~(my_bitmap_map) 0;
+ } while (value < end);
}
- if ((d= no_bytes_in_map(map)-prefix_bytes))
- memset(m, 0, d);
+ if ((prefix_bits= prefix_size & (my_bitmap_map_bits-1)))
+ *value++= (1ULL << prefix_bits)-1;
+ while (value <= map->last_word_ptr)
+ *value++= 0;
+ DBUG_ASSERT_BITMAP(map);
}
+/**
+ Check if bitmap is a bitmap of prefix bits set in the beginning
+
+ @param map bitmap
+ @param prefix_size number of bits that should be set. 0 is allowed.
+
+ @return 1 Yes, prefix bits where set or prefix_size == 0.
+ @return 0 No
+*/
+
my_bool bitmap_is_prefix(const MY_BITMAP *map, uint prefix_size)
{
- uint prefix_mask= last_byte_mask(prefix_size);
- uchar *m= (uchar*) map->bitmap;
- uchar *end_prefix= m+(prefix_size-1)/8;
- uchar *end;
- DBUG_ASSERT(m);
- DBUG_ASSERT(prefix_size <= map->n_bits);
+ my_bitmap_map *value= map->bitmap;
+ my_bitmap_map *end= value+ (prefix_size/my_bitmap_map_bits);
+ uint prefix_bits;
/* Empty prefix is always true */
if (!prefix_size)
return 1;
- while (m < end_prefix)
- if (*m++ != 0xff)
- return 0;
+ DBUG_ASSERT_BITMAP_AND_BIT(map, prefix_size-1);
- end= ((uchar*) map->bitmap) + no_bytes_in_map(map) - 1;
- if (m == end)
- return ((*m & last_byte_mask(map->n_bits)) == prefix_mask);
-
- if (*m != prefix_mask)
- return 0;
+ while (value < end)
+ if (*value++ != ~(my_bitmap_map) 0)
+ return 0;
- while (++m < end)
- if (*m != 0)
+ if ((prefix_bits= prefix_size & (my_bitmap_map_bits-1)))
+ {
+ if (*value++ != (1ULL << prefix_bits)-1)
+ return 0;
+ }
+ end= map->last_word_ptr;
+ while (value <= end)
+ if (*value++ != 0)
return 0;
- return ((*m & last_byte_mask(map->n_bits)) == 0);
+ return 1;
}
@@ -324,10 +338,12 @@ my_bool bitmap_is_set_all(const MY_BITMAP *map)
{
my_bitmap_map *data_ptr= map->bitmap;
my_bitmap_map *end= map->last_word_ptr;
+ DBUG_ASSERT_BITMAP(map);
+
for (; data_ptr < end; data_ptr++)
- if (*data_ptr != 0xFFFFFFFF)
+ if (*data_ptr != ~(my_bitmap_map)0)
return FALSE;
- return (*data_ptr | map->last_word_mask) == 0xFFFFFFFF;
+ return (*data_ptr | map->last_bit_mask) == ~(my_bitmap_map)0;
}
@@ -335,61 +351,58 @@ my_bool bitmap_is_clear_all(const MY_BITMAP *map)
{
my_bitmap_map *data_ptr= map->bitmap;
my_bitmap_map *end= map->last_word_ptr;
+ DBUG_ASSERT_BITMAP(map);
- DBUG_ASSERT(map->n_bits > 0);
- for (; data_ptr < end; data_ptr++)
+ for (; data_ptr <= end; data_ptr++)
if (*data_ptr)
return FALSE;
- return (*data_ptr & ~map->last_word_mask) == 0;
+ return TRUE;
}
+
/* Return TRUE if map1 is a subset of map2 */
my_bool bitmap_is_subset(const MY_BITMAP *map1, const MY_BITMAP *map2)
{
- my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end;
+ my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end= map1->last_word_ptr;
+ DBUG_ASSERT_IDENTICAL_BITMAPS(map1,map2);
- DBUG_ASSERT(map1->bitmap && map2->bitmap);
- DBUG_ASSERT(map1->n_bits==map2->n_bits);
-
- end= map1->last_word_ptr;
- while (m1 < end)
+ while (m1 <= end)
{
if ((*m1++) & ~(*m2++))
return 0;
}
- /* here both maps have the same number of bits - see assert above */
- return ((*m1 & ~*m2 & ~map1->last_word_mask) ? 0 : 1);
+ return 1;
}
/* True if bitmaps has any common bits */
my_bool bitmap_is_overlapping(const MY_BITMAP *map1, const MY_BITMAP *map2)
{
- my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end;
+ my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end= map1->last_word_ptr;
+ DBUG_ASSERT_IDENTICAL_BITMAPS(map1,map2);
- DBUG_ASSERT(map1->bitmap);
- DBUG_ASSERT(map2->bitmap);
- DBUG_ASSERT(map1->n_bits==map2->n_bits);
-
- end= map1->last_word_ptr;
- while (m1 < end)
+ while (m1 <= end)
{
if ((*m1++) & (*m2++))
return 1;
}
- /* here both maps have the same number of bits - see assert above */
- return ((*m1 & *m2 & ~map1->last_word_mask) ? 1 : 0);
+ return 0;
}
+/*
+ Create intersection of two bitmaps
+
+ @param map map1. Result is stored here
+ @param map2 map2
+*/
+
void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2)
{
my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end;
uint len= no_words_in_map(map), len2 = no_words_in_map(map2);
-
- DBUG_ASSERT(map->bitmap);
- DBUG_ASSERT(map2->bitmap);
+ DBUG_ASSERT_DIFFERENT_BITMAPS(map,map2);
end= to+MY_MIN(len,len2);
while (to < end)
@@ -397,7 +410,7 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2)
if (len2 <= len)
{
- to[-1]&= ~map2->last_word_mask; /* Clear last not relevant bits */
+ to[-1]&= ~map2->last_bit_mask; /* Clear last not relevant bits */
end+= len-len2;
while (to < end)
*to++= 0;
@@ -407,50 +420,51 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2)
/*
Check if there is some bit index between start_bit and end_bit, such that
- this is bit is set for all bitmaps in bitmap_list.
+ this is at least on bit that set for all bitmaps in bitmap_list.
SYNOPSIS
bitmap_exists_intersection()
bitmpap_array [in] a set of MY_BITMAPs
- bitmap_count [in] number of elements in bitmpap_array
+ bitmap_count [in] number of elements in bitmap_array
start_bit [in] beginning (inclusive) of the range of bits to search
end_bit [in] end (inclusive) of the range of bits to search, must be
no bigger than the bits of the shortest bitmap.
- NOTES
- This function assumes that for at least one of the bitmaps in bitmap_array all
- bits outside the range [start_bit, end_bit] are 0. As a result is not
- necessary to take care of the bits outside the range [start_bit, end_bit].
-
RETURN
TRUE if an intersecion exists
FALSE no intersection
*/
-my_bool bitmap_exists_intersection(const MY_BITMAP **bitmap_array,
+my_bool bitmap_exists_intersection(MY_BITMAP **bitmap_array,
uint bitmap_count,
uint start_bit, uint end_bit)
{
uint i, j, start_idx, end_idx;
- my_bitmap_map cur_res;
+ my_bitmap_map cur_res, first_map;
DBUG_ASSERT(bitmap_count);
DBUG_ASSERT(end_bit >= start_bit);
for (j= 0; j < bitmap_count; j++)
- DBUG_ASSERT(end_bit < bitmap_array[j]->n_bits);
+ {
+ DBUG_ASSERT_BITMAP_AND_BIT(bitmap_array[j], end_bit);
+ }
start_idx= start_bit/8/sizeof(my_bitmap_map);
end_idx= end_bit/8/sizeof(my_bitmap_map);
+ first_map= first_bit_mask_inv(start_bit);
+ cur_res= first_map;
for (i= start_idx; i < end_idx; i++)
{
- cur_res= ~0;
for (j= 0; cur_res && j < bitmap_count; j++)
cur_res &= bitmap_array[j]->bitmap[i];
if (cur_res)
return TRUE;
+ cur_res= ~(my_bitmap_map) 0;
}
- cur_res= ~last_word_mask(end_bit);
+ cur_res= ~last_bit_mask(end_bit+1);
+ if (start_idx == end_idx)
+ cur_res&= first_map;
for (j= 0; cur_res && j < bitmap_count; j++)
cur_res &= bitmap_array[j]->bitmap[end_idx];
return cur_res != 0;
@@ -461,60 +475,21 @@ my_bool bitmap_exists_intersection(const MY_BITMAP **bitmap_array,
my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2)
{
- my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end;
+ my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end= map1->last_word_ptr;
+ DBUG_ASSERT_IDENTICAL_BITMAPS(map1,map2);
- DBUG_ASSERT(map1->bitmap);
- DBUG_ASSERT(map2->bitmap);
- DBUG_ASSERT(map1->n_bits==map2->n_bits);
- end= map1->last_word_ptr;
while ( m1 < end)
- if ((*m1++ | *m2++) != 0xFFFFFFFF)
+ if ((*m1++ | *m2++) != ~(my_bitmap_map)0)
return FALSE;
/* here both maps have the same number of bits - see assert above */
- return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFF);
-}
-
-
-
-/*
- Set/clear all bits above a bit.
-
- SYNOPSIS
- bitmap_set_above()
- map RETURN The bitmap to change.
- from_byte The bitmap buffer byte offset to start with.
- use_bit The bit value (1/0) to use for all upper bits.
-
- NOTE
- You can only set/clear full bytes.
- The function is meant for the situation that you copy a smaller bitmap
- to a bigger bitmap. Bitmap lengths are always multiple of eigth (the
- size of a byte). Using 'from_byte' saves multiplication and division
- by eight during parameter passing.
-
- RETURN
- void
-*/
-
-void bitmap_set_above(MY_BITMAP *map, uint from_byte, uint use_bit)
-{
- uchar use_byte= use_bit ? 0xff : 0;
- uchar *to= (uchar *)map->bitmap + from_byte;
- uchar *end= (uchar *)map->bitmap + (map->n_bits+7)/8;
-
- while (to < end)
- *to++= use_byte;
+ return ((*m1 | *m2 | map1->last_bit_mask) != ~(my_bitmap_map)0);
}
void bitmap_subtract(MY_BITMAP *map, const MY_BITMAP *map2)
{
- my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end;
- DBUG_ASSERT(map->bitmap);
- DBUG_ASSERT(map2->bitmap);
- DBUG_ASSERT(map->n_bits==map2->n_bits);
-
- end= map->last_word_ptr;
+ my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr;
+ DBUG_ASSERT_IDENTICAL_BITMAPS(map,map2);
while (to <= end)
*to++ &= ~(*from++);
@@ -523,12 +498,8 @@ void bitmap_subtract(MY_BITMAP *map, const MY_BITMAP *map2)
void bitmap_union(MY_BITMAP *map, const MY_BITMAP *map2)
{
- my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end;
-
- DBUG_ASSERT(map->bitmap);
- DBUG_ASSERT(map2->bitmap);
- DBUG_ASSERT(map->n_bits == map2->n_bits);
- end= map->last_word_ptr;
+ my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr;
+ DBUG_ASSERT_IDENTICAL_BITMAPS(map,map2);
while (to <= end)
*to++ |= *from++;
@@ -538,9 +509,8 @@ void bitmap_union(MY_BITMAP *map, const MY_BITMAP *map2)
void bitmap_xor(MY_BITMAP *map, const MY_BITMAP *map2)
{
my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr;
- DBUG_ASSERT(map->bitmap);
- DBUG_ASSERT(map2->bitmap);
- DBUG_ASSERT(map->n_bits == map2->n_bits);
+ DBUG_ASSERT_IDENTICAL_BITMAPS(map,map2);
+
while (to <= end)
*to++ ^= *from++;
}
@@ -548,13 +518,14 @@ void bitmap_xor(MY_BITMAP *map, const MY_BITMAP *map2)
void bitmap_invert(MY_BITMAP *map)
{
- my_bitmap_map *to= map->bitmap, *end;
+ my_bitmap_map *to= map->bitmap, *end= map->last_word_ptr;
+ DBUG_ASSERT_BITMAP(map);
- DBUG_ASSERT(map->bitmap);
- end= map->last_word_ptr;
+ while (to < end)
+ *to++ ^= ~(my_bitmap_map)0;
- while (to <= end)
- *to++ ^= 0xFFFFFFFF;
+ *to ^= (~(my_bitmap_map)0 & ~map->last_bit_mask);
+ DBUG_ASSERT_BITMAP(map);
}
@@ -563,45 +534,54 @@ uint bitmap_bits_set(const MY_BITMAP *map)
my_bitmap_map *data_ptr= map->bitmap;
my_bitmap_map *end= map->last_word_ptr;
uint res= 0;
- DBUG_ASSERT(map->bitmap);
+ DBUG_ASSERT_BITMAP(map);
- for (; data_ptr < end; data_ptr++)
- res+= my_count_bits_uint32(*data_ptr);
+ for (; data_ptr <= end; data_ptr++)
+ res+= my_count_bits(*data_ptr);
- /*Reset last bits to zero*/
- res+= my_count_bits_uint32(*map->last_word_ptr & ~map->last_word_mask);
return res;
}
-void bitmap_copy(MY_BITMAP *map, const MY_BITMAP *map2)
-{
- my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end;
- DBUG_ASSERT(map->bitmap);
- DBUG_ASSERT(map2->bitmap);
- DBUG_ASSERT(map->n_bits == map2->n_bits);
- end= map->last_word_ptr;
+/**
+ Copy bitmaps
- while (to <= end)
- *to++ = *from++;
+ @param map1 to-bitmap
+ @param map2 from-bitmap
+
+ @notes
+ Code will work even of the bitmaps are of different size.
+ In this case, only up to to->n_bits will be copied.
+*/
+
+void bitmap_copy(MY_BITMAP *map1, const MY_BITMAP *map2)
+{
+ my_bitmap_map *to= map1->bitmap, *from= map2->bitmap;
+ uint map1_length= no_words_in_map(map1)*sizeof(my_bitmap_map);
+ uint map2_length= no_words_in_map(map2)*sizeof(my_bitmap_map);
+ uint length= MY_MIN(map1_length, map2_length);
+ DBUG_ASSERT_DIFFERENT_BITMAPS(map1,map2);
+
+ memcpy(to, from, length);
+ if (length < map1_length)
+ bzero(to + length, map1_length - length);
+ *map1->last_word_ptr&= ~map1->last_bit_mask;
}
+/*
+ Find first set bit in the bitmap
+*/
+
uint bitmap_get_first_set(const MY_BITMAP *map)
{
- uint i;
my_bitmap_map *data_ptr= map->bitmap, *end= map->last_word_ptr;
+ DBUG_ASSERT_BITMAP(map);
- DBUG_ASSERT(map->bitmap);
-
- for (i=0; data_ptr < end; data_ptr++, i++)
+ for (uint i=0; data_ptr <= end; data_ptr++, i++)
if (*data_ptr)
- goto found;
- if (!(*data_ptr & ~map->last_word_mask))
- return MY_BIT_NONE;
-
-found:
- return get_first_set(*data_ptr, i);
+ return my_find_first_bit(*data_ptr) + i * sizeof(my_bitmap_map)*8;
+ return MY_BIT_NONE;
}
@@ -616,80 +596,113 @@ found:
uint bitmap_get_next_set(const MY_BITMAP *map, uint bitmap_bit)
{
- uint word_pos, byte_to_mask, i;
- union { my_bitmap_map bitmap ; uchar bitmap_buff[sizeof(my_bitmap_map)]; }
- first_word;
- uchar *ptr= &first_word.bitmap_buff[0];
- my_bitmap_map *data_ptr, *end= map->last_word_ptr;
-
- DBUG_ASSERT(map->bitmap);
+ uint word_pos;
+ my_bitmap_map first_word, *data_ptr, *end= map->last_word_ptr;
+ DBUG_ASSERT_BITMAP(map);
/* Look for the next bit */
bitmap_bit++;
if (bitmap_bit >= map->n_bits)
return MY_BIT_NONE;
- word_pos= bitmap_bit / 32;
+
+ word_pos= bitmap_bit / 64;
data_ptr= map->bitmap + word_pos;
- first_word.bitmap= *data_ptr;
- /* Mask out previous bits from first_word */
- byte_to_mask= (bitmap_bit % 32) / 8;
- for (i= 0; i < byte_to_mask; i++)
- ptr[i]= 0;
- ptr[byte_to_mask]&= 0xFFU << (bitmap_bit & 7);
+ first_word= *data_ptr & first_bit_mask_inv(bitmap_bit);
- if (data_ptr == end)
+ if (first_word)
{
- if (first_word.bitmap & ~map->last_word_mask)
- return get_first_set(first_word.bitmap, word_pos);
- else
- return MY_BIT_NONE;
+ /* Optimize common case when most bits are set */
+ if (first_word & (1ULL << ((bitmap_bit & (my_bitmap_map_bits-1)))))
+ return bitmap_bit;
+ return my_find_first_bit(first_word) + (bitmap_bit & ~(my_bitmap_map_bits-1));
}
-
- if (first_word.bitmap)
- return get_first_set(first_word.bitmap, word_pos);
- for (data_ptr++, word_pos++; data_ptr < end; data_ptr++, word_pos++)
+ for (data_ptr++; data_ptr <= end; data_ptr++)
+ {
+ bitmap_bit+= 64;
if (*data_ptr)
- return get_first_set(*data_ptr, word_pos);
-
- if (!(*end & ~map->last_word_mask))
- return MY_BIT_NONE;
- return get_first_set(*end, word_pos);
+ return my_find_first_bit(*data_ptr) + (bitmap_bit & ~(my_bitmap_map_bits-1));
+ }
+ return MY_BIT_NONE;
}
-/* Get first free bit */
+/* Get first clear bit */
-uint bitmap_get_first(const MY_BITMAP *map)
+uint bitmap_get_first_clear(const MY_BITMAP *map)
{
- uchar *byte_ptr;
- uint i,j,k;
- my_bitmap_map *data_ptr, *end= map->last_word_ptr;
-
- DBUG_ASSERT(map->bitmap);
- data_ptr= map->bitmap;
- *map->last_word_ptr|= map->last_word_mask;
+ uint i;
+ my_bitmap_map *data_ptr= map->bitmap, *end= map->last_word_ptr;
+ DBUG_ASSERT_BITMAP(map);
- for (i=0; data_ptr < end; data_ptr++, i++)
- if (*data_ptr != 0xFFFFFFFF)
+ for (i= 0; data_ptr < end; data_ptr++, i++)
+ if (*data_ptr != ~(my_bitmap_map)0)
goto found;
- if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFF)
+ if ((*data_ptr | map->last_bit_mask) == ~(my_bitmap_map)0)
return MY_BIT_NONE;
-
found:
- byte_ptr= (uchar*)data_ptr;
- for (j=0; ; j++, byte_ptr++)
+ /* find first zero bit by reverting all bits and find first bit */
+ return my_find_first_bit(~*data_ptr) + i * sizeof(my_bitmap_map)*8;
+}
+/*
+ Functions to export/import bitmaps to an architecture independent format
+ (low_byte_first)
+*/
+
+#ifdef WORDS_BIGENDIAN
+/* Big endian machines, like powerpc or s390x */
+
+void bitmap_export(uchar *to, MY_BITMAP *map)
+{
+ my_bitmap_map *value;
+ uint length;
+ uchar buff[my_bitmap_map_bytes];
+
+ for (value= map->bitmap ; value < map->last_word_ptr ; value++)
{
- if (*byte_ptr != 0xFF)
- {
- for (k=0; ; k++)
- {
- if (!(*byte_ptr & (1 << k)))
- return (i*32) + (j*8) + k;
- }
- }
+ int8store(to, *value);
+ to+= 8;
}
- DBUG_ASSERT(0);
- return MY_BIT_NONE; /* Impossible */
+ int8store(buff, *value);
+
+ /* We want length & 7 to return a serie 8,2,3,4,5,6,7, 8,2,3,... */
+ length= 1+ ((no_bytes_in_export_map(map) + 7) & 7);
+ memcpy(to, buff, length);
+}
+
+
+void bitmap_import(MY_BITMAP *map, uchar *from)
+{
+ my_bitmap_map *value;
+ uint length;
+ uchar buff[my_bitmap_map_bytes];
+
+ for (value= map->bitmap ; value < map->last_word_ptr ; value++)
+ {
+ *value= uint8korr(from);
+ from+= 8;
+ }
+ bzero(buff, sizeof(buff));
+
+ /* We want length & 7 to return a serie 8,2,3,4,5,6,7, 8,2,3,... */
+ length= 1+ ((no_bytes_in_export_map(map) + 7) & 7);
+ memcpy(buff, from, length);
+ *value= uint8korr(buff) & ~map->last_bit_mask;
+}
+
+#else
+
+/* Little endian machines, like intel and amd */
+
+void bitmap_export(uchar *to, MY_BITMAP *map)
+{
+ memcpy(to, (uchar*) map->bitmap, no_bytes_in_export_map(map));
+}
+
+void bitmap_import(MY_BITMAP *map, uchar *from)
+{
+ memcpy((uchar*) map->bitmap, from, no_bytes_in_export_map(map));
+ *map->last_word_ptr&= ~map->last_bit_mask;
}
+#endif /* WORDS_BIGENDIAN */
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index eb665b1e..6b480dae 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -172,6 +172,8 @@ static void validate_value(const char *key, const char *value,
#define validate_value(key, value, filename) (void)filename
#endif
+#define SET_HO_ERROR_AND_CONTINUE(e) { ho_error= (e); (*argc)--; continue; }
+
/**
Handle command line options.
Sort options.
@@ -241,7 +243,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
const char *UNINIT_VAR(prev_found);
const struct my_option *optp;
void *value;
- int error, i;
+ int ho_error= 0, error, i;
my_bool is_cmdline_arg= 1;
DBUG_ENTER("handle_options");
@@ -255,7 +257,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
is_cmdline_arg= !is_file_marker(**argv);
- for (pos= *argv, pos_end=pos+ *argc; pos != pos_end ; pos++)
+ for (pos= *argv, pos_end=pos+ *argc; pos < pos_end ; pos++)
{
char **first= pos;
char *cur_arg= *pos;
@@ -344,7 +346,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
my_progname, special_opt_prefix[i],
opt_str, special_opt_prefix[i],
prev_found);
- DBUG_RETURN(EXIT_AMBIGUOUS_OPTION);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_AMBIGUOUS_OPTION)
}
switch (i) {
case OPT_SKIP:
@@ -389,7 +391,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
"%s: unknown variable '%s'",
my_progname, cur_arg);
if (!option_is_loose)
- DBUG_RETURN(EXIT_UNKNOWN_VARIABLE);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_UNKNOWN_VARIABLE)
}
else
{
@@ -399,7 +401,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
"%s: unknown option '--%s'",
my_progname, cur_arg);
if (!option_is_loose)
- DBUG_RETURN(EXIT_UNKNOWN_OPTION);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_UNKNOWN_OPTION)
}
if (option_is_loose)
{
@@ -416,7 +418,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
my_getopt_error_reporter(ERROR_LEVEL,
"%s: variable prefix '%s' is not unique",
my_progname, opt_str);
- DBUG_RETURN(EXIT_VAR_PREFIX_NOT_UNIQUE);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_VAR_PREFIX_NOT_UNIQUE)
}
else
{
@@ -425,7 +427,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
"%s: ambiguous option '--%s' (%s, %s)",
my_progname, opt_str, prev_found,
optp->name);
- DBUG_RETURN(EXIT_AMBIGUOUS_OPTION);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_AMBIGUOUS_OPTION)
}
}
if ((optp->var_type & GET_TYPE_MASK) == GET_DISABLED)
@@ -439,14 +441,14 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
(*argc)--;
continue;
}
- DBUG_RETURN(EXIT_OPTION_DISABLED);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_OPTION_DISABLED)
}
error= 0;
value= optp->var_type & GET_ASK_ADDR
? (*my_getopt_get_addr)(key_name, (uint)strlen(key_name), optp, &error)
: optp->value;
if (error)
- DBUG_RETURN(error);
+ SET_HO_ERROR_AND_CONTINUE(error)
if (optp->arg_type == NO_ARG)
{
@@ -461,7 +463,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
my_getopt_error_reporter(ERROR_LEVEL,
"%s: option '--%s' cannot take an argument",
my_progname, optp->name);
- DBUG_RETURN(EXIT_NO_ARGUMENT_ALLOWED);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_NO_ARGUMENT_ALLOWED)
}
if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL)
{
@@ -490,7 +492,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
if (get_one_option(optp, *((my_bool*) value) ?
enabled_my_option : disabled_my_option,
filename))
- DBUG_RETURN(EXIT_ARGUMENT_INVALID);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_INVALID)
continue;
}
argument= optend;
@@ -504,7 +506,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
"option '--%s' cannot take an argument",
my_progname, optp->name);
- DBUG_RETURN(EXIT_NO_ARGUMENT_ALLOWED);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_NO_ARGUMENT_ALLOWED)
}
if (!(optp->var_type & GET_AUTO))
{
@@ -514,7 +516,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
"unsupported by option '--%s'",
my_progname, optp->name);
if (!option_is_loose)
- DBUG_RETURN(EXIT_ARGUMENT_INVALID);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_INVALID)
continue;
}
else
@@ -533,7 +535,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
my_getopt_error_reporter(ERROR_LEVEL,
"%s: option '--%s' requires an argument",
my_progname, optp->name);
- DBUG_RETURN(EXIT_ARGUMENT_REQUIRED);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_REQUIRED)
}
argument= *pos;
(*argc)--;
@@ -558,14 +560,14 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
fprintf(stderr,
"%s: ERROR: Option '-%c' used, but is disabled\n",
my_progname, optp->id);
- DBUG_RETURN(EXIT_OPTION_DISABLED);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_OPTION_DISABLED)
}
if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL &&
optp->arg_type == NO_ARG)
{
*((my_bool*) optp->value)= (my_bool) 1;
if (get_one_option(optp, argument, filename))
- DBUG_RETURN(EXIT_UNSPECIFIED_ERROR);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR)
continue;
}
else if (optp->arg_type == REQUIRED_ARG ||
@@ -585,7 +587,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
if (optp->var_type == GET_BOOL)
*((my_bool*) optp->value)= (my_bool) 1;
if (get_one_option(optp, argument, filename))
- DBUG_RETURN(EXIT_UNSPECIFIED_ERROR);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR)
continue;
}
/* Check if there are more arguments after this one */
@@ -595,7 +597,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
my_getopt_error_reporter(ERROR_LEVEL,
"%s: option '-%c' requires an argument",
my_progname, optp->id);
- DBUG_RETURN(EXIT_ARGUMENT_REQUIRED);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_ARGUMENT_REQUIRED)
}
argument= *++pos;
(*argc)--;
@@ -603,10 +605,10 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
}
}
if ((error= setval(optp, optp->value, argument,
- set_maximum_value,filename)))
- DBUG_RETURN(error);
+ set_maximum_value,filename)))
+ SET_HO_ERROR_AND_CONTINUE(error)
if (get_one_option(optp, argument, filename))
- DBUG_RETURN(EXIT_UNSPECIFIED_ERROR);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR)
break;
}
}
@@ -640,7 +642,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
my_getopt_error_reporter(ERROR_LEVEL,
"%s: unknown option '-%c'",
my_progname, *optend);
- DBUG_RETURN(EXIT_UNKNOWN_OPTION);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_UNKNOWN_OPTION)
}
}
}
@@ -651,15 +653,17 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
if ((!option_is_autoset) &&
((error= setval(optp, value, argument, set_maximum_value,filename))) &&
!option_is_loose)
- DBUG_RETURN(error);
+ SET_HO_ERROR_AND_CONTINUE(error)
if (get_one_option(optp, argument, filename))
- DBUG_RETURN(EXIT_UNSPECIFIED_ERROR);
+ SET_HO_ERROR_AND_CONTINUE(EXIT_UNSPECIFIED_ERROR)
(*argc)--; /* option handled (long), decrease argument count */
}
else /* non-option found */
(*argv)[argvpos++]= cur_arg;
}
+ if (ho_error)
+ DBUG_RETURN(ho_error);
/*
Destroy the first, already handled option, so that programs that look
for arguments in 'argv', without checking 'argc', know when to stop.
@@ -899,7 +903,7 @@ static int setval(const struct my_option *opts, void *value, char *argument,
}
if (err)
{
- res= EXIT_UNKNOWN_SUFFIX;
+ res= err;
goto ret;
};
}
@@ -1034,7 +1038,7 @@ static inline ulonglong eval_num_suffix(char *suffix, int *error)
case 'E':
return 1ULL << 60;
default:
- *error= 1;
+ *error= EXIT_UNKNOWN_SUFFIX;
return 0ULL;
}
}
@@ -1060,15 +1064,18 @@ static longlong eval_num_suffix_ll(char *argument,
if (errno == ERANGE)
{
my_getopt_error_reporter(ERROR_LEVEL,
- "Incorrect integer value: '%s'", argument);
- *error= 1;
+ "Integer value out of range for int64:"
+ " '%s' for %s",
+ argument, option_name);
+ *error= EXIT_ARGUMENT_INVALID;
DBUG_RETURN(0);
}
num*= eval_num_suffix(endchar, error);
if (*error)
- fprintf(stderr,
- "Unknown suffix '%c' used for variable '%s' (value '%s')\n",
- *endchar, option_name, argument);
+ my_getopt_error_reporter(ERROR_LEVEL,
+ "Unknown suffix '%c' used for variable '%s' (value '%s'). "
+ "Legal suffix characters are: K, M, G, T, P, E",
+ *endchar, option_name, argument);
DBUG_RETURN(num);
}
@@ -1091,7 +1098,7 @@ static ulonglong eval_num_suffix_ull(char *argument,
my_getopt_error_reporter(ERROR_LEVEL,
"Incorrect unsigned value: '%s' for %s",
argument, option_name);
- *error= 1;
+ *error= EXIT_ARGUMENT_INVALID;
DBUG_RETURN(0);
}
*error= 0;
@@ -1100,15 +1107,18 @@ static ulonglong eval_num_suffix_ull(char *argument,
if (errno == ERANGE)
{
my_getopt_error_reporter(ERROR_LEVEL,
- "Incorrect integer value: '%s' for %s",
+ "Integer value out of range for uint64:"
+ " '%s' for %s",
argument, option_name);
- *error= 1;
+ *error= EXIT_ARGUMENT_INVALID;
DBUG_RETURN(0);
}
num*= eval_num_suffix(endchar, error);
if (*error)
my_getopt_error_reporter(ERROR_LEVEL,
- "Unknown suffix '%c' used for variable '%s' (value '%s')",
+ "Unknown suffix '%c' used for variable '%s'"
+ " (value '%s')."
+ " Legal suffix characters are: K, M, G, T, P, E",
*endchar, option_name, argument);
DBUG_RETURN(num);
}
@@ -1128,6 +1138,8 @@ static ulonglong eval_num_suffix_ull(char *argument,
static longlong getopt_ll(char *arg, const struct my_option *optp, int *err)
{
longlong num=eval_num_suffix_ll(arg, err, (char*) optp->name);
+ if (*err)
+ return(0);
return getopt_ll_limit_value(num, optp, NULL);
}
@@ -1205,6 +1217,8 @@ longlong getopt_ll_limit_value(longlong num, const struct my_option *optp,
static ulonglong getopt_ull(char *arg, const struct my_option *optp, int *err)
{
ulonglong num= eval_num_suffix_ull(arg, err, (char*) optp->name);
+ if (*err)
+ return(0);
return getopt_ull_limit_value(num, optp, NULL);
}
diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c
index 2e8decd7..b2436d2b 100644
--- a/mysys/my_thr_init.c
+++ b/mysys/my_thr_init.c
@@ -220,7 +220,11 @@ void my_thread_global_end(void)
fprintf(stderr,
"Error in my_thread_global_end(): %d threads didn't exit\n",
THR_thread_count);
-#endif
+#endif /* HAVE_PTHREAD_KILL */
+#ifdef SAFEMALLOC
+ /* We know we will have memoryleaks, suppress the leak report */
+ sf_leaking_memory= 1;
+#endif /* SAFEMALLOC */
all_threads_killed= 0;
break;
}
@@ -234,9 +238,7 @@ void my_thread_global_end(void)
that could use them.
*/
if (all_threads_killed)
- {
my_thread_destroy_internal_mutex();
- }
my_thread_global_init_done= 0;
}