From 06eaf7232e9a920468c0f8d74dcf2fe8b555501c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 13 Apr 2024 14:24:36 +0200 Subject: Adding upstream version 1:10.11.6. Signed-off-by: Daniel Baumann --- mysys/CMakeLists.txt | 198 ++ mysys/ChangeLog | 221 ++ mysys/array.c | 378 +++ mysys/base64.c | 443 +++ mysys/charset-def.c | 548 +++ mysys/charset.c | 1609 +++++++++ mysys/crc32/clang_workaround.h | 87 + mysys/crc32/crc32_arm64.c | 340 ++ mysys/crc32/crc32_ppc64.c | 5 + mysys/crc32/crc32_x86.c | 334 ++ mysys/crc32/crc32c.cc | 597 ++++ mysys/crc32/crc32c_amd64.cc | 711 ++++ mysys/crc32/crc32c_ppc.c | 5 + mysys/crc32/crc32c_ppc.h | 19 + mysys/crc32/crc_ppc64.h | 664 ++++ mysys/crc32/pcc_crc32_constants.h | 1206 +++++++ mysys/crc32/pcc_crc32c_constants.h | 1206 +++++++ mysys/crc32ieee.cc | 61 + mysys/errors.c | 132 + mysys/file_logger.c | 248 ++ mysys/get_password.c | 230 ++ mysys/guess_malloc_library.c | 65 + mysys/hash.c | 947 ++++++ mysys/lf_alloc-pin.c | 539 +++ mysys/lf_dynarray.c | 205 ++ mysys/lf_hash.cc | 591 ++++ mysys/list.c | 114 + mysys/ma_dyncol.c | 4433 ++++++++++++++++++++++++ mysys/mf_arr_appstr.c | 62 + mysys/mf_cache.c | 92 + mysys/mf_dirname.c | 155 + mysys/mf_fn_ext.c | 93 + mysys/mf_format.c | 139 + mysys/mf_getdate.c | 82 + mysys/mf_iocache.c | 1886 +++++++++++ mysys/mf_iocache2.c | 494 +++ mysys/mf_keycache.c | 6577 ++++++++++++++++++++++++++++++++++++ mysys/mf_keycaches.c | 107 + mysys/mf_loadpath.c | 59 + mysys/mf_pack.c | 456 +++ mysys/mf_path.c | 120 + mysys/mf_qsort.c | 217 ++ mysys/mf_qsort2.c | 20 + mysys/mf_radix.c | 60 + mysys/mf_same.c | 41 + mysys/mf_sort.c | 42 + mysys/mf_soundex.c | 106 + mysys/mf_tempdir.c | 96 + mysys/mf_tempfile.c | 177 + mysys/mf_unixpath.c | 36 + mysys/mf_wcomp.c | 90 + mysys/mulalloc.c | 127 + mysys/my_access.c | 267 ++ mysys/my_addr_resolve.c | 353 ++ mysys/my_alarm.c | 33 + mysys/my_alloc.c | 671 ++++ mysys/my_atomic_writes.c | 533 +++ mysys/my_basename.c | 42 + mysys/my_bit.c | 46 + mysys/my_bitmap.c | 695 ++++ mysys/my_chmod.c | 48 + mysys/my_chsize.c | 101 + mysys/my_compare.c | 633 ++++ mysys/my_compress.c | 186 + mysys/my_copy.c | 151 + mysys/my_cpu.c | 79 + mysys/my_create.c | 60 + mysys/my_default.c | 1089 ++++++ mysys/my_delete.c | 261 ++ mysys/my_div.c | 38 + mysys/my_dlerror.c | 31 + mysys/my_error.c | 329 ++ mysys/my_file.c | 136 + mysys/my_fopen.c | 301 ++ mysys/my_fstream.c | 194 ++ mysys/my_gethwaddr.c | 214 ++ mysys/my_getncpus.c | 85 + mysys/my_getopt.c | 1768 ++++++++++ mysys/my_getpagesize.c | 41 + mysys/my_getsystime.c | 146 + mysys/my_getwd.c | 168 + mysys/my_init.c | 554 +++ mysys/my_largepage.c | 488 +++ mysys/my_lib.c | 381 +++ mysys/my_libwrap.c | 42 + mysys/my_likely.c | 172 + mysys/my_lock.c | 228 ++ mysys/my_lockmem.c | 101 + mysys/my_malloc.c | 260 ++ mysys/my_memmem.c | 84 + mysys/my_mess.c | 36 + mysys/my_minidump.cc | 115 + mysys/my_mkdir.c | 48 + mysys/my_mmap.c | 105 + mysys/my_new.cc | 117 + mysys/my_once.c | 119 + mysys/my_open.c | 158 + mysys/my_port.c | 40 + mysys/my_pread.c | 200 ++ mysys/my_pthread.c | 467 +++ mysys/my_quick.c | 82 + mysys/my_rdtsc.c | 796 +++++ mysys/my_read.c | 112 + mysys/my_redel.c | 155 + mysys/my_rename.c | 104 + mysys/my_rnd.c | 66 + mysys/my_safehash.c | 298 ++ mysys/my_safehash.h | 56 + mysys/my_seek.c | 103 + mysys/my_setuser.c | 82 + mysys/my_sleep.c | 35 + mysys/my_static.c | 139 + mysys/my_static.h | 48 + mysys/my_symlink.c | 266 ++ mysys/my_symlink2.c | 191 ++ mysys/my_sync.c | 188 ++ mysys/my_thr_init.c | 614 ++++ mysys/my_uuid.c | 226 ++ mysys/my_win_popen.cc | 170 + mysys/my_wincond.c | 114 + mysys/my_winerr.c | 126 + mysys/my_winfile.c | 738 ++++ mysys/my_winthread.c | 179 + mysys/my_wintoken.c | 42 + mysys/my_write.c | 120 + mysys/mysys_priv.h | 251 ++ mysys/psi_noop.c | 1074 ++++++ mysys/ptr_cmp.c | 231 ++ mysys/queues.c | 386 +++ mysys/safemalloc.c | 421 +++ mysys/stacktrace.c | 739 ++++ mysys/string.c | 229 ++ mysys/test_charset.c | 81 + mysys/test_dir.c | 48 + mysys/test_thr_mutex.c | 162 + mysys/test_xml.c | 105 + mysys/testhash.c | 292 ++ mysys/thr_alarm.c | 845 +++++ mysys/thr_lock.c | 1837 ++++++++++ mysys/thr_mutex.c | 843 +++++ mysys/thr_rwlock.c | 377 +++ mysys/thr_timer.c | 599 ++++ mysys/tree.c | 804 +++++ mysys/typelib.c | 399 +++ mysys/waiting_threads.c | 1143 +++++++ mysys/wqueue.c | 242 ++ 146 files changed, 56042 insertions(+) create mode 100644 mysys/CMakeLists.txt create mode 100644 mysys/ChangeLog create mode 100644 mysys/array.c create mode 100644 mysys/base64.c create mode 100644 mysys/charset-def.c create mode 100644 mysys/charset.c create mode 100644 mysys/crc32/clang_workaround.h create mode 100644 mysys/crc32/crc32_arm64.c create mode 100644 mysys/crc32/crc32_ppc64.c create mode 100644 mysys/crc32/crc32_x86.c create mode 100644 mysys/crc32/crc32c.cc create mode 100644 mysys/crc32/crc32c_amd64.cc create mode 100644 mysys/crc32/crc32c_ppc.c create mode 100644 mysys/crc32/crc32c_ppc.h create mode 100644 mysys/crc32/crc_ppc64.h create mode 100644 mysys/crc32/pcc_crc32_constants.h create mode 100644 mysys/crc32/pcc_crc32c_constants.h create mode 100644 mysys/crc32ieee.cc create mode 100644 mysys/errors.c create mode 100644 mysys/file_logger.c create mode 100644 mysys/get_password.c create mode 100644 mysys/guess_malloc_library.c create mode 100644 mysys/hash.c create mode 100644 mysys/lf_alloc-pin.c create mode 100644 mysys/lf_dynarray.c create mode 100644 mysys/lf_hash.cc create mode 100644 mysys/list.c create mode 100644 mysys/ma_dyncol.c create mode 100644 mysys/mf_arr_appstr.c create mode 100644 mysys/mf_cache.c create mode 100644 mysys/mf_dirname.c create mode 100644 mysys/mf_fn_ext.c create mode 100644 mysys/mf_format.c create mode 100644 mysys/mf_getdate.c create mode 100644 mysys/mf_iocache.c create mode 100644 mysys/mf_iocache2.c create mode 100644 mysys/mf_keycache.c create mode 100644 mysys/mf_keycaches.c create mode 100644 mysys/mf_loadpath.c create mode 100644 mysys/mf_pack.c create mode 100644 mysys/mf_path.c create mode 100644 mysys/mf_qsort.c create mode 100644 mysys/mf_qsort2.c create mode 100644 mysys/mf_radix.c create mode 100644 mysys/mf_same.c create mode 100644 mysys/mf_sort.c create mode 100644 mysys/mf_soundex.c create mode 100644 mysys/mf_tempdir.c create mode 100644 mysys/mf_tempfile.c create mode 100644 mysys/mf_unixpath.c create mode 100644 mysys/mf_wcomp.c create mode 100644 mysys/mulalloc.c create mode 100644 mysys/my_access.c create mode 100644 mysys/my_addr_resolve.c create mode 100644 mysys/my_alarm.c create mode 100644 mysys/my_alloc.c create mode 100644 mysys/my_atomic_writes.c create mode 100644 mysys/my_basename.c create mode 100644 mysys/my_bit.c create mode 100644 mysys/my_bitmap.c create mode 100644 mysys/my_chmod.c create mode 100644 mysys/my_chsize.c create mode 100644 mysys/my_compare.c create mode 100644 mysys/my_compress.c create mode 100644 mysys/my_copy.c create mode 100644 mysys/my_cpu.c create mode 100644 mysys/my_create.c create mode 100644 mysys/my_default.c create mode 100644 mysys/my_delete.c create mode 100644 mysys/my_div.c create mode 100644 mysys/my_dlerror.c create mode 100644 mysys/my_error.c create mode 100644 mysys/my_file.c create mode 100644 mysys/my_fopen.c create mode 100644 mysys/my_fstream.c create mode 100644 mysys/my_gethwaddr.c create mode 100644 mysys/my_getncpus.c create mode 100644 mysys/my_getopt.c create mode 100644 mysys/my_getpagesize.c create mode 100644 mysys/my_getsystime.c create mode 100644 mysys/my_getwd.c create mode 100644 mysys/my_init.c create mode 100644 mysys/my_largepage.c create mode 100644 mysys/my_lib.c create mode 100644 mysys/my_libwrap.c create mode 100644 mysys/my_likely.c create mode 100644 mysys/my_lock.c create mode 100644 mysys/my_lockmem.c create mode 100644 mysys/my_malloc.c create mode 100644 mysys/my_memmem.c create mode 100644 mysys/my_mess.c create mode 100644 mysys/my_minidump.cc create mode 100644 mysys/my_mkdir.c create mode 100644 mysys/my_mmap.c create mode 100644 mysys/my_new.cc create mode 100644 mysys/my_once.c create mode 100644 mysys/my_open.c create mode 100644 mysys/my_port.c create mode 100644 mysys/my_pread.c create mode 100644 mysys/my_pthread.c create mode 100644 mysys/my_quick.c create mode 100644 mysys/my_rdtsc.c create mode 100644 mysys/my_read.c create mode 100644 mysys/my_redel.c create mode 100644 mysys/my_rename.c create mode 100644 mysys/my_rnd.c create mode 100644 mysys/my_safehash.c create mode 100644 mysys/my_safehash.h create mode 100644 mysys/my_seek.c create mode 100644 mysys/my_setuser.c create mode 100644 mysys/my_sleep.c create mode 100644 mysys/my_static.c create mode 100644 mysys/my_static.h create mode 100644 mysys/my_symlink.c create mode 100644 mysys/my_symlink2.c create mode 100644 mysys/my_sync.c create mode 100644 mysys/my_thr_init.c create mode 100644 mysys/my_uuid.c create mode 100644 mysys/my_win_popen.cc create mode 100644 mysys/my_wincond.c create mode 100644 mysys/my_winerr.c create mode 100644 mysys/my_winfile.c create mode 100644 mysys/my_winthread.c create mode 100644 mysys/my_wintoken.c create mode 100644 mysys/my_write.c create mode 100644 mysys/mysys_priv.h create mode 100644 mysys/psi_noop.c create mode 100644 mysys/ptr_cmp.c create mode 100644 mysys/queues.c create mode 100644 mysys/safemalloc.c create mode 100644 mysys/stacktrace.c create mode 100644 mysys/string.c create mode 100644 mysys/test_charset.c create mode 100644 mysys/test_dir.c create mode 100644 mysys/test_thr_mutex.c create mode 100644 mysys/test_xml.c create mode 100644 mysys/testhash.c create mode 100644 mysys/thr_alarm.c create mode 100644 mysys/thr_lock.c create mode 100644 mysys/thr_mutex.c create mode 100644 mysys/thr_rwlock.c create mode 100644 mysys/thr_timer.c create mode 100644 mysys/tree.c create mode 100644 mysys/typelib.c create mode 100644 mysys/waiting_threads.c create mode 100644 mysys/wqueue.c (limited to 'mysys') diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt new file mode 100644 index 00000000..758243df --- /dev/null +++ b/mysys/CMakeLists.txt @@ -0,0 +1,198 @@ +# Copyright (c) 2006, 2014, Oracle and/or its affiliates +# Copyright (c) 2009, 2018, MariaDB Corporation +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA + +INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys) + +SET(MYSYS_SOURCES array.c charset-def.c charset.c my_default.c + get_password.c + errors.c hash.c list.c + mf_cache.c mf_dirname.c mf_fn_ext.c + mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c + mf_keycaches.c mf_loadpath.c mf_pack.c mf_path.c mf_qsort.c mf_qsort2.c + mf_radix.c mf_same.c mf_sort.c mf_soundex.c mf_arr_appstr.c mf_tempdir.c + mf_tempfile.c mf_unixpath.c mf_wcomp.c mulalloc.c my_access.c + my_alloc.c my_bit.c my_bitmap.c my_chsize.c + my_compress.c my_copy.c my_create.c my_delete.c + my_div.c my_error.c my_file.c my_fopen.c my_fstream.c + my_gethwaddr.c my_getopt.c my_getsystime.c my_getwd.c my_compare.c my_init.c + my_lib.c my_lock.c my_malloc.c my_mess.c + my_mkdir.c my_mmap.c my_once.c my_open.c my_pread.c my_pthread.c + my_quick.c my_read.c my_redel.c my_rename.c my_seek.c my_sleep.c + my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c + my_basename.c + my_write.c ptr_cmp.c queues.c stacktrace.c + string.c thr_alarm.c thr_lock.c thr_mutex.c + thr_rwlock.c thr_timer.c + tree.c typelib.c base64.c my_memmem.c + my_getpagesize.c + guess_malloc_library.c + lf_alloc-pin.c lf_dynarray.c lf_hash.cc + safemalloc.c my_new.cc + my_getncpus.c my_safehash.c my_chmod.c my_rnd.c + my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c + my_rdtsc.c psi_noop.c + my_atomic_writes.c my_cpu.c my_likely.c my_largepage.c + file_logger.c my_dlerror.c crc32/crc32c.cc) + +IF (WIN32) + SET (MYSYS_SOURCES ${MYSYS_SOURCES} + my_winthread.c + my_wintoken.c + my_wincond.c + my_winerr.c + my_winfile.c + my_minidump.cc + my_win_popen.cc) +ENDIF() + +IF(MSVC_INTEL) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c) + IF(CMAKE_SIZEOF_VOID_P EQUAL 8) + SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc) + ENDIF() + ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL) + IF(CLANG_CL) + SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") + ENDIF() +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686") + MY_CHECK_CXX_COMPILER_FLAG(-msse4.2) + MY_CHECK_CXX_COMPILER_FLAG(-mpclmul) + CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) + CHECK_INCLUDE_FILE(x86intrin.h HAVE_X86INTRIN_H) + IF(have_CXX__msse4.2 AND HAVE_CPUID_H) + ADD_DEFINITIONS(-DHAVE_SSE42) + IF (have_CXX__mpclmul AND HAVE_X86INTRIN_H) + ADD_DEFINITIONS(-DHAVE_PCLMUL) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c) + SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") + IF(CMAKE_SIZEOF_VOID_P EQUAL 8) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc) + SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") + ENDIF() + ENDIF() + ENDIF() +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + IF(CMAKE_COMPILER_IS_GNUCC) + include(CheckCXXSourceCompiles) + + CHECK_CXX_SOURCE_COMPILES(" + #define CRC32CX(crc, value) __asm__(\"crc32cx %w[c], %w[c], %x[v]\":[c]\"+r\"(crc):[v]\"r\"(value)) + asm(\".arch_extension crc\"); + unsigned int foo(unsigned int ret) { + CRC32CX(ret, 0); + return ret; + } + #include + int main() { foo(0); + #ifdef __linux__ + getauxval(AT_HWCAP); + #else + unsigned long v; + elf_aux_info(AT_HWCAP, &v, sizeof(v)); + #endif + }" HAVE_ARMV8_CRC) + + CHECK_CXX_SOURCE_COMPILES(" + asm(\".arch_extension crypto\"); + unsigned int foo(unsigned int ret) { + __asm__(\"pmull v2.1q, v2.1d, v1.1d\"); + return ret; + } + #include + int main() { foo(0); + #ifdef __linux__ + getauxval(AT_HWCAP); + #else + unsigned long v; + elf_aux_info(AT_HWCAP, &v, sizeof(v)); + #endif + }" HAVE_ARMV8_CRYPTO) + + CHECK_C_COMPILER_FLAG(-march=armv8-a+crc+crypto HAVE_ARMV8_CRC_CRYPTO_MARCH) + + IF(HAVE_ARMV8_CRC_CRYPTO_MARCH) + CHECK_INCLUDE_FILE(arm_acle.h HAVE_ARM_ACLE_H -march=armv8-a+crc+crypto) + IF(HAVE_ARM_ACLE_H) + ADD_DEFINITIONS(-DHAVE_ARMV8_CRC_CRYPTO_INTRINSICS) + ENDIF() + IF(HAVE_ARMV8_CRC) + ADD_DEFINITIONS(-DHAVE_ARMV8_CRC) + ENDIF() + IF(HAVE_ARMV8_CRYPTO) + ADD_DEFINITIONS(-DHAVE_ARMV8_CRYPTO) + ENDIF() + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_arm64.c) + SET_SOURCE_FILES_PROPERTIES(crc32/crc32_arm64.c PROPERTIES + COMPILE_FLAGS "-march=armv8-a+crc+crypto") + ENDIF() + ENDIF() +ENDIF() + +IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64" OR CMAKE_SYSTEM_NAME MATCHES AIX) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_ppc64.c crc32/crc32c_ppc.c) + SET_SOURCE_FILES_PROPERTIES(crc32/crc32_ppc64.c crc32/crc32c_ppc.c PROPERTIES + COMPILE_FLAGS "${COMPILE_FLAGS} -maltivec -mvsx -mpower8-vector -mcrypto -mpower8-vector") + ADD_DEFINITIONS(-DHAVE_POWER8 -DHAS_ALTIVEC) +ELSE() + SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32ieee.cc) +ENDIF() + +IF(UNIX) + SET (MYSYS_SOURCES ${MYSYS_SOURCES} my_addr_resolve.c my_setuser.c) +ENDIF() + +IF(HAVE_ALARM) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} my_alarm.c) +ENDIF() + +IF(HAVE_MLOCK) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} my_lockmem.c) +ENDIF() + +ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES}) +MAYBE_DISABLE_IPO(mysys) +TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY} + ${LIBNSL} ${LIBM} ${LIBRT} ${CMAKE_DL_LIBS} ${LIBSOCKET} ${LIBEXECINFO}) +DTRACE_INSTRUMENT(mysys) + +IF (HAVE_GCC_C11_ATOMICS_WITH_LIBATOMIC) + TARGET_LINK_LIBRARIES(mysys atomic) +ENDIF() + +IF(HAVE_BFD_H) + TARGET_LINK_LIBRARIES(mysys bfd) +ENDIF(HAVE_BFD_H) + +IF (WIN32) + TARGET_LINK_LIBRARIES(mysys iphlpapi dbghelp) +ENDIF(WIN32) + +# Need explicit pthread for gcc -fsanitize=address +IF(CMAKE_USE_PTHREADS_INIT AND CMAKE_C_FLAGS MATCHES "-fsanitize=") + TARGET_LINK_LIBRARIES(mysys pthread) +ENDIF() + +ADD_EXECUTABLE(thr_lock thr_lock.c) +TARGET_LINK_LIBRARIES(thr_lock mysys) +SET_TARGET_PROPERTIES(thr_lock PROPERTIES COMPILE_FLAGS "-DMAIN") + +ADD_EXECUTABLE(thr_timer thr_timer.c) +TARGET_LINK_LIBRARIES(thr_timer mysys) +SET_TARGET_PROPERTIES(thr_timer PROPERTIES COMPILE_FLAGS "-DMAIN") + +ADD_EXECUTABLE(test_hash hash.c) +TARGET_LINK_LIBRARIES(test_hash mysys) +SET_TARGET_PROPERTIES(test_hash PROPERTIES COMPILE_FLAGS "-DMAIN") diff --git a/mysys/ChangeLog b/mysys/ChangeLog new file mode 100644 index 00000000..7a426106 --- /dev/null +++ b/mysys/ChangeLog @@ -0,0 +1,221 @@ +2000-02-16 Michael Widenius + +* Added an extra argument to the compare routine for queues to allow + more advanced key compare functions. + +2000-02-10 Michael Widenius + +* Added THR_READ_NO_INSERT lock privilege to thr_lock. + +1999-08-21 Michael Widenius + +* Fix that '-1.49 or -1.49' is true +* Allow negative hexadecimal numbers (like -0x0f). +* Fixed problem with auto_increment on float and double. + +Wed Dec 17 02:13:58 1997 + +* Faster flush of keycache. + +Sat Dec 2 21:36:20 1995 Michael Widenius (monty@bitch) + + * array.c push_element & alloc_element. + +Wed Mar 3 00:54:20 1993 Michael Widenius (monty@bitch) + + * Removed automatic O_TRUNC from my_create. + +Wed Oct 28 02:10:56 1992 Michael Widenius (monty@bitch) + + * Enabled ASNYNC_IO on SUN. + +Mon Aug 31 23:51:13 1992 Michael Widenius (monty@bitch) + + * Changed tree_insert to return element if ok. + * Added new define tree_set_pointer(). + * Chagned delete_queue() to not free if allready freed. + +Mon Aug 17 01:46:36 1992 Michael Widenius (monty@bitch) + + * Added ny cashing-rutine mf_iocash for quicker io. + +Wed Aug 12 13:41:18 1992 Michael Widenius (monty@bitch) + + * Added new function get_copy_of_memory for combined malloc/copy. + * Splitted my_malloc to three files. + +Thu Jan 23 22:02:37 1992 Michael Widenius (monty at LYNX) + + * Added range-checks and aligned checks on ptrs to + safe_malloc:free and safe_malloc:realloc to catch more + error nicely without core-dumps. + +Wed Nov 13 01:52:18 1991 Michael Widenius (monty at LYNX) + + * Added use of mysys as a shared library. + +Sat Nov 9 14:38:21 1991 Michael Widenius (monty at LYNX) + + * Added expand of ~username to unpack_dirname. + +Tue Sep 17 21:15:08 1991 Michael Widenius (monty at LYNX) + + * Don't free null-pointers if passed to my_free + +Fri May 17 20:11:27 1991 Michael Widenius (monty at LYNX) + + * Changed all char * to string. (Can't change const char * because + of bug in C-definition. + +Tue Apr 30 01:32:56 1991 Michael Widenius (monty at LYNX) + + * my_path now examines environment for posix variable "_" if + progname is given and has no path. + +Mon Apr 22 16:12:56 1991 Michael Widenius (monty at LYNX) + + * Added function my_load_path() to fix path to a hard-path. + +Mon Apr 15 22:08:58 1991 Michael Widenius (monty at LYNX) + + * Added more info on DBUG-stream when freeing unallocated data. + +Wed Apr 3 18:41:28 1991 Michael Widenius (monty at LYNX) + + * Added global flag sf_malloc_no_sanity to make it possibly + to avoid sanity-checks in right code with uses malloc a lot. + +Tue Mar 26 15:09:45 1991 Mikael WIDENIUS (monty at panther) + + * Made soundex look nicer + +Sat Mar 23 10:49:49 1991 Michael Widenius (monty at LYNX) + + * Added init of alarm variables to skip some warnings from gcc. + +Tue Mar 5 16:50:34 1991 Michael Widenius (monty at LYNX) + + * Our qsort now only test if compare() function returns >= 0 + and is optimized for compare() returning > 0. + +Fri Nov 23 23:53:46 1990 Michael Widenius (monty at LYNX) + + * Added function my_set_alarm_variable to get a variable set + on some time. + my_alarm.h added for functions who want to print stat after + a given time or after a number of loops. + Changed my_lock to use new function and use defines in my_alarm.h + +Mon Oct 1 13:16:15 1990 Michael Widenius (monty at LYNX) + + * Added use of asynchronic io in read_cash_record(). + * Added write_cash and flush_write_cash to record cashing. + +Sun Sep 16 22:05:25 1990 Michael Widenius (monty at LYNX) + + * Added optional alarm to my_lock if one has FCNTL_LOCK. Added new + defines to my_sys.h. + +Mon Aug 27 22:20:38 1990 Michael Widenius (monty at lynx) + + * my_end() now can print output about executed program. + * Added parameter-defines for my_end in my_sys.h + +Sun Apr 1 23:29:47 1990 Monty (monty at monty) + + * Changed mf_keydisk.c to have separate functions for read and write. + Read can now return pointer to intern key-buffer to skip + unessessary memcpy-s. + +Fri Mar 23 23:03:39 1990 Monty (monty at monty) + + * function test_if_hard_pathname() added in dirname.c + * my_getwd now only saves changed current dir if dir is a + hard pathname. + * changed my_path() to use test_if_hard_pathname() + +Thu Mar 1 14:47:59 1990 Monty (monty at monty) + + * New function my_path(). + +Sat Feb 24 02:54:35 1990 Monty (monty at monty) + + * Added print of my_progname in my_mess.c + +Sun Feb 11 17:55:58 1990 David Axmark (davida at isil) + + * Concatenated libarys my_func and my_sys because of to much + crosswise dependencies. + * Fixed varagrs code in mf_fixadr.c + +Mon Dec 4 17:36:16 1989 Monty (monty at monty) + + * Changed safemalloc() to use my_message() if out of memory and + to check MY_WME if we want this error-messages. + * Changed my_setwd() to use dos_setdrive() insted of system(). + +Wed Oct 25 02:56:07 1989 Monty (monty at monty) + + * Changed my_mktmp1() to work like tempnam() with default dirname. + * Changed name of my_mktmp1.c to my_tempnam.c + +Thu Oct 19 16:39:27 1989 David Axmark (davida at isil) + + * Removed libary mysysnc. Instead added a hook to my_error that + can call my_message if needed. + +Thu Oct 5 01:33:29 1989 David Axmark (davida at isil) + + * Use MY_SEEK_{SET,CUR,END} as arguments to my_seek + + * Added a a array of structs that holds properties of open files. + Removed include file extras.h + +Wed Jun 21 01:34:04 1989 Monty (monty at monty) + + * Added two new malloc-functions: my_once_alloc() and + my_once_free(). These give easyer and quicker startup. + +Mon May 22 14:03:44 1989 Monty (monty at monty) + + * Fixed my_getwd and my_setwd so they work. + * Added extern variabel curr_char[] with is set to current + directory after my_getwd() or my_setwd(); + +Mon Jan 23 03:38:50 1989 Monty (monty at monty) + + * Changed my_chsize to check if NO_CHSIZE is defined. If new file + should be shorter it fills unused part with null. + * Changed my_lock to not check for arg 0 (Functions should use + LK_TO_EOF to lock all file. + +Tue Dec 6 15:09:44 1988 Monty (monty at monty) + + * Added DBUG_PRINT if error in my_seek. + +Mon Dec 5 15:58:48 1988 Monty (monty at monty) + + * Added DBUG_PRINT if not all byte read/written in my_read(), + my_fread(), my_write() and my_fwrite(); + +Sat Dec 3 01:48:03 1988 Monty (monty at monty) + + * Fixed bug in Makefile; quick did't work. + * Changed safemalloc to use bmove, bfill and memcpy when handling + memoryblocks. + +Fri Dec 2 03:29:21 1988 Monty (monty at monty) + + * Added more defines under MEMORY in my_func.h + * Added functions to llib-lmysys. + * Removed RCS/* files and installed ewerything as stable. + (Because errors in old RCS-files. + +Wed Nov 9 00:32:33 1988 Monty (monty at monty) + + * Changed realloc for MSDOS; Previous version freed old block on + * error, new version (of compiler) dosn't. + +Wed Oct 26 21:07:27 1988 Monty (monty at monty) + + * Fixed missing updateing of my_stream_opened; diff --git a/mysys/array.c b/mysys/array.c new file mode 100644 index 00000000..6e871ee6 --- /dev/null +++ b/mysys/array.c @@ -0,0 +1,378 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Handling of arrays that can grow dynamicly. */ + +#include "mysys_priv.h" +#include "m_string.h" + +/* + Initiate dynamic array + + SYNOPSIS + init_dynamic_array2() + ps_key Key to register instrumented memory + array Pointer to an array + element_size Size of element + init_buffer Initial buffer pointer + init_alloc Number of initial elements + alloc_increment Increment for adding new elements + my_flags Flags to my_malloc + + DESCRIPTION + init_dynamic_array() initiates array and allocate space for + init_alloc eilements. + Array is usable even if space allocation failed, hence, the + function never returns TRUE. + + RETURN VALUE + FALSE Ok +*/ + +my_bool init_dynamic_array2(PSI_memory_key psi_key, DYNAMIC_ARRAY *array, + size_t element_size, void *init_buffer, + size_t init_alloc, size_t alloc_increment, + myf my_flags) +{ + DBUG_ENTER("init_dynamic_array2"); + if (!alloc_increment) + { + alloc_increment=MY_MAX((8192-MALLOC_OVERHEAD)/element_size,16); + if (init_alloc > 8 && alloc_increment > init_alloc * 2) + alloc_increment=init_alloc*2; + } + array->elements=0; + array->max_element=init_alloc; + array->alloc_increment=alloc_increment; + array->size_of_element=element_size; + array->m_psi_key= psi_key; + array->malloc_flags= my_flags; + DBUG_ASSERT((my_flags & MY_INIT_BUFFER_USED) == 0); + if ((array->buffer= init_buffer)) + { + array->malloc_flags|= MY_INIT_BUFFER_USED; + DBUG_RETURN(FALSE); + } + /* + Since the dynamic array is usable even if allocation fails here malloc + should not throw an error + */ + if (init_alloc && + !(array->buffer= (uchar*) my_malloc(psi_key, element_size*init_alloc, + MYF(my_flags)))) + array->max_element=0; + DBUG_RETURN(FALSE); +} + +/* + Insert element at the end of array. Allocate memory if needed. + + SYNOPSIS + insert_dynamic() + array + element + + RETURN VALUE + TRUE Insert failed + FALSE Ok +*/ + +my_bool insert_dynamic(DYNAMIC_ARRAY *array, const void * element) +{ + void *buffer; + if (array->elements == array->max_element) + { /* Call only when necessary */ + if (!(buffer=alloc_dynamic(array))) + return TRUE; + } + else + { + buffer=array->buffer+(array->elements * array->size_of_element); + array->elements++; + } + memcpy(buffer,element,(size_t) array->size_of_element); + return FALSE; +} + + +/* + Alloc space for next element(s) + + SYNOPSIS + alloc_dynamic() + array + + DESCRIPTION + alloc_dynamic() checks if there is empty space for at least + one element if not tries to allocate space for alloc_increment + elements at the end of array. + + RETURN VALUE + pointer Pointer to empty space for element + 0 Error +*/ + +void *alloc_dynamic(DYNAMIC_ARRAY *array) +{ + DBUG_ENTER("alloc_dynamic"); + + DBUG_ASSERT(array->size_of_element); /* Ensure init() is called */ + if (array->elements == array->max_element) + { + char *new_ptr; + if (array->malloc_flags & MY_INIT_BUFFER_USED) + { + /* + In this scenario, the buffer is statically preallocated, + so we have to create an all-new malloc since we overflowed + */ + if (!(new_ptr= (char *) my_malloc(array->m_psi_key, + (array->max_element+ + array->alloc_increment) * + array->size_of_element, + MYF(array->malloc_flags | MY_WME)))) + DBUG_RETURN(0); + if (array->elements) + memcpy(new_ptr, array->buffer, + array->elements * array->size_of_element); + array->malloc_flags&= ~MY_INIT_BUFFER_USED; + } + else if (!(new_ptr=(char*) + my_realloc(array->m_psi_key, array->buffer, + (array->max_element+ array->alloc_increment) * + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR | + array->malloc_flags)))) + DBUG_RETURN(0); + array->buffer= (uchar*) new_ptr; + array->max_element+=array->alloc_increment; + } + DBUG_RETURN(array->buffer+(array->elements++ * array->size_of_element)); +} + + +/* + Pop last element from array. + + SYNOPSIS + pop_dynamic() + array + + RETURN VALUE + pointer Ok + 0 Array is empty +*/ + +void *pop_dynamic(DYNAMIC_ARRAY *array) +{ + if (array->elements) + return array->buffer+(--array->elements * array->size_of_element); + return 0; +} + +/* + Replace element in array with given element and index + + SYNOPSIS + set_dynamic() + array + element Element to be inserted + idx Index where element is to be inserted + + DESCRIPTION + set_dynamic() replaces element in array. + If idx > max_element insert new element. Allocate memory if needed. + + RETURN VALUE + TRUE Idx was out of range and allocation of new memory failed + FALSE Ok +*/ + +my_bool set_dynamic(DYNAMIC_ARRAY *array, const void *element, size_t idx) +{ + if (idx >= array->elements) + { + if (idx >= array->max_element && allocate_dynamic(array, idx)) + return TRUE; + bzero((uchar*) (array->buffer+array->elements*array->size_of_element), + (idx - array->elements)*array->size_of_element); + array->elements=idx+1; + } + memcpy(array->buffer+(idx * array->size_of_element),element, + array->size_of_element); + return FALSE; +} + + +/* + Ensure that dynamic array has enough elements + + SYNOPSIS + allocate_dynamic() + array + max_elements Numbers of elements that is needed + + NOTES + Any new allocated element are NOT initialized + + RETURN VALUE + FALSE Ok + TRUE Allocation of new memory failed +*/ + +my_bool allocate_dynamic(DYNAMIC_ARRAY *array, size_t max_elements) +{ + DBUG_ENTER("allocate_dynamic"); + + if (max_elements >= array->max_element) + { + size_t size; + uchar *new_ptr; + size= (max_elements + array->alloc_increment)/array->alloc_increment; + size*= array->alloc_increment; + if (array->malloc_flags & MY_INIT_BUFFER_USED) + { + /* + In this senerio, the buffer is statically preallocated, + so we have to create an all-new malloc since we overflowed + */ + if (!(new_ptr= (uchar *) my_malloc(array->m_psi_key, size * + array->size_of_element, + MYF(array->malloc_flags | MY_WME)))) + DBUG_RETURN(0); + memcpy(new_ptr, array->buffer, + array->elements * array->size_of_element); + array->malloc_flags&= ~MY_INIT_BUFFER_USED; + } + else if (!(new_ptr= (uchar*) my_realloc(array->m_psi_key, + array->buffer,size * + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR | + array->malloc_flags)))) + DBUG_RETURN(TRUE); + array->buffer= new_ptr; + array->max_element= size; + } + DBUG_RETURN(FALSE); +} + + +/* + Get an element from array by given index + + SYNOPSIS + get_dynamic() + array + uchar* Element to be returned. If idx > elements contain zeroes. + idx Index of element wanted. +*/ + +void get_dynamic(DYNAMIC_ARRAY *array, void *element, size_t idx) +{ + if (idx >= array->elements) + { + DBUG_PRINT("warning",("To big array idx: %d, array size is %d", + idx,array->elements)); + bzero(element,array->size_of_element); + return; + } + memcpy(element,array->buffer+idx*array->size_of_element, + (size_t) array->size_of_element); +} + + +/* + Empty array by freeing all memory + + SYNOPSIS + delete_dynamic() + array Array to be deleted +*/ + +void delete_dynamic(DYNAMIC_ARRAY *array) +{ + /* + Just mark as empty if we are using a static buffer + */ + if (!(array->malloc_flags & MY_INIT_BUFFER_USED) && array->buffer) + my_free(array->buffer); + + array->buffer= 0; + array->elements= array->max_element= 0; +} + +/* + Delete element by given index + + SYNOPSIS + delete_dynamic_element() + array + idx Index of element to be deleted +*/ + +void delete_dynamic_element(DYNAMIC_ARRAY *array, size_t idx) +{ + char *ptr= (char*) array->buffer+array->size_of_element*idx; + array->elements--; + memmove(ptr,ptr+array->size_of_element, + (array->elements-idx)*array->size_of_element); +} + +/* + Wrapper around delete_dynamic, calling a FREE function on every + element, before releasing the memory + + SYNOPSIS + delete_dynamic_with_callback() + array + f The function to be called on every element before + deleting the array; +*/ +void delete_dynamic_with_callback(DYNAMIC_ARRAY *array, FREE_FUNC f) { + size_t i; + char *ptr= (char*) array->buffer; + for (i= 0; i < array->elements; i++, ptr+= array->size_of_element) { + f(ptr); + } + delete_dynamic(array); +} +/* + Free unused memory + + SYNOPSIS + freeze_size() + array Array to be freed + +*/ + +void freeze_size(DYNAMIC_ARRAY *array) +{ + size_t elements; + + /* + Do nothing if we are using a static buffer + */ + if (array->malloc_flags & MY_INIT_BUFFER_USED) + return; + + elements= MY_MAX(array->elements, 1); + if (array->buffer && array->max_element > elements) + { + array->buffer=(uchar*) my_realloc(array->m_psi_key, array->buffer, + elements * array->size_of_element, + MYF(MY_WME | array->malloc_flags)); + array->max_element= elements; + } +} diff --git a/mysys/base64.c b/mysys/base64.c new file mode 100644 index 00000000..d46c9a22 --- /dev/null +++ b/mysys/base64.c @@ -0,0 +1,443 @@ +/* Copyright (c) 2003, 2010, Oracle and/or its affiliates. + Copyright (c) 2013, MariaDB Foundation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include /* strchr() */ +#include /* my_isspace() */ + +#ifndef MAIN + +static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +/** + * Maximum length my_base64_needed_encoded_length() + * can handle without signed integer overflow. + */ +int +my_base64_encode_max_arg_length() +{ + /* + my_base64_needed_encoded_length(1589695686) -> 2147483646 (7FFFFFFE) + my_base64_needed_encoded_length(1589695687) -> -2147483645 + */ + return 0x5EC0D4C6; /* 1589695686 */ +} + + +int +my_base64_needed_encoded_length(int length_of_data) +{ + int nb_base64_chars; + nb_base64_chars= (length_of_data + 2) / 3 * 4; + + return + nb_base64_chars + /* base64 char incl padding */ + (nb_base64_chars - 1)/ 76 + /* newlines */ + 1; /* NUL termination of string */ +} + + +/** + * Maximum length supported by my_base64_decode(). + */ +int +my_base64_decode_max_arg_length() +{ + return 0x7FFFFFFF; +} + + +int +my_base64_needed_decoded_length(int length_of_encoded_data) +{ + return (int) ((longlong) length_of_encoded_data + 3) / 4 * 3; +} + + +/* + Encode a data as base64. + + Note: We require that dst is pre-allocated to correct size. + See my_base64_needed_encoded_length(). + + Note: We add line separators every 76 characters. + + Note: The output string is properly padded with the '=' character, + so the length of the output string is always divisable by 4. +*/ + +int +my_base64_encode(const void *src, size_t src_len, char *dst) +{ + const unsigned char *s= (const unsigned char*)src; + size_t i= 0; + size_t len= 0; + + for (; i < src_len; len += 4) + { + unsigned c; + + if (len == 76) + { + len= 0; + *dst++= '\n'; + } + + c= s[i++]; + c <<= 8; + + if (i < src_len) + c += s[i]; + c <<= 8; + i++; + + if (i < src_len) + c += s[i]; + i++; + + *dst++= base64_table[(c >> 18) & 0x3f]; + *dst++= base64_table[(c >> 12) & 0x3f]; + + if (i > (src_len + 1)) + *dst++= '='; + else + *dst++= base64_table[(c >> 6) & 0x3f]; + + if (i > src_len) + *dst++= '='; + else + *dst++= base64_table[(c >> 0) & 0x3f]; + } + *dst= '\0'; + + return 0; +} + + +/* + Base64 decoder stream +*/ +typedef struct my_base64_decoder_t +{ + const char *src; /* Pointer to the current input position */ + const char *end; /* Pointer to the end of input buffer */ + uint c; /* Collect bits into this number */ + int error; /* Error code */ + uchar state; /* Character number in the current group of 4 */ + uchar mark; /* Number of padding marks in the current group */ +} MY_BASE64_DECODER; + + +/* + Helper table for decoder. + -2 means "space character" + -1 means "bad character" + Non-negative values mean valid base64 encoding character. +*/ +static int8 +from_base64_table[]= +{ +/*00*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-2,-2,-2,-2,-2,-1,-1, +/*10*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*20*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* !"#$%&'()*+,-./ */ +/*30*/ 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 0123456789:;<=>? */ +/*40*/ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* @ABCDEFGHIJKLMNO */ +/*50*/ 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* PQRSTUVWXYZ[\]^_ */ +/*60*/ -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* `abcdefghijklmno */ +/*70*/ 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* pqrstuvwxyz{|}~ */ +/*80*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*90*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*A0*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*B0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*C0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*D0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*E0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*F0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 +}; + + +/** + * Skip leading spaces in a base64 encoded stream + * and stop on the first non-space character. + * decoder->src will point to the first non-space character, + * or to the end of the input string. + * In case when end-of-input met on unexpected position, + * decoder->error is also set to 1. + * + * See http://en.wikipedia.org/wiki/Base64 for the base64 encoding details + * + * @param decoder Pointer to MY_BASE64_DECODER + * + * @return + * FALSE on success (there are some more non-space input characters) + * TRUE on error (end-of-input found) + */ + +static inline my_bool +my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder) +{ + for ( ; decoder->src < decoder->end; decoder->src++) + { + if (from_base64_table[(uchar) *decoder->src] != -2) + return FALSE; + } + if (decoder->state > 0) + decoder->error= 1; /* Unexpected end-of-input found */ + return TRUE; +} + + +/** + * Convert the next character in a base64 encoded stream + * to a number in the range [0..63] + * and mix it with the previously collected value in decoder->c. + * + * @param decode base64 decoding stream + * + * @return + * FALSE on success + * TRUE on error (invalid base64 character found) + */ +static inline my_bool +my_base64_add(MY_BASE64_DECODER *decoder) +{ + int res; + decoder->c <<= 6; + if ((res= from_base64_table[(uchar) *decoder->src++]) < 0) + return (decoder->error= TRUE); + decoder->c+= (uint) res; + return FALSE; +} + + +/** + * Get the next character from a base64 encoded stream. + * Skip spaces, then scan the next base64 character or a pad character + * and collect bits into decoder->c. + * + * @param decoder Pointer to MY_BASE64_DECODER + * @return + * FALSE on success (a valid base64 encoding character found) + * TRUE on error (unexpected character or unexpected end-of-input found) + */ +static my_bool +my_base64_decoder_getch(MY_BASE64_DECODER *decoder) +{ + if (my_base64_decoder_skip_spaces(decoder)) + return TRUE; /* End-of-input */ + + if (!my_base64_add(decoder)) /* Valid base64 character found */ + { + if (decoder->mark) + { + /* If we have scanned '=' already, then only '=' is valid */ + DBUG_ASSERT(decoder->state == 3); + decoder->error= 1; + decoder->src--; + return TRUE; /* expected '=', but encoding character found */ + } + decoder->state++; + return FALSE; + } + + /* Process error */ + switch (decoder->state) + { + case 0: + case 1: + decoder->src--; + return TRUE; /* base64 character expected */ + break; + + case 2: + case 3: + if (decoder->src[-1] == '=') + { + decoder->error= 0; /* Not an error - it's a pad character */ + decoder->mark++; + } + else + { + decoder->src--; + return TRUE; /* base64 character or '=' expected */ + } + break; + + default: + DBUG_ASSERT(0); + return TRUE; /* Wrong state, should not happen */ + } + + decoder->state++; + return FALSE; +} + + +/** + * Decode a base64 string + * The base64-encoded data in the range ['src','*end_ptr') will be + * decoded and stored starting at 'dst'. The decoding will stop + * after 'len' characters have been read from 'src', or when padding + * occurs in the base64-encoded data. In either case: if 'end_ptr' is + * non-null, '*end_ptr' will be set to point to the character after + * the last read character, even in the presence of error. + * + * Note: 'dst' must have sufficient space to store the decoded data. + * Use my_base64_needed_decoded_length() to calculate the correct space size. + * + * Note: we allow spaces and line separators at any position. + * + * @param src Pointer to base64-encoded string + * @param len Length of string at 'src' + * @param dst Pointer to location where decoded data will be stored + * @param end_ptr Pointer to variable that will refer to the character + * after the end of the encoded data that were decoded. + * Can be NULL. + * @flags flags e.g. allow multiple chunks + * @return Number of bytes written at 'dst', or -1 in case of failure + */ +int +my_base64_decode(const char *src_base, size_t len, + void *dst, const char **end_ptr, int flags) +{ + char *d= (char*) dst; + MY_BASE64_DECODER decoder; + + decoder.src= src_base; + decoder.end= src_base + len; + decoder.error= 0; + decoder.mark= 0; + + for ( ; ; ) + { + decoder.c= 0; + decoder.state= 0; + + if (my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder)) + break; + + *d++= (decoder.c >> 16) & 0xff; + *d++= (decoder.c >> 8) & 0xff; + *d++= (decoder.c >> 0) & 0xff; + + if (decoder.mark) + { + d-= decoder.mark; + if (!(flags & MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS)) + break; + decoder.mark= 0; + } + } + + /* Return error if there are more non-space characters */ + decoder.state= 0; + if (!my_base64_decoder_skip_spaces(&decoder)) + decoder.error= 1; + + if (end_ptr != NULL) + *end_ptr= decoder.src; + + return decoder.error ? -1 : (int) (d - (char*) dst); +} + + +#else /* MAIN */ + +#define require(b) { \ + if (!(b)) { \ + printf("Require failed at %s:%d\n", __FILE__, __LINE__); \ + abort(); \ + } \ +} + + +int +main(void) +{ + int i; + size_t j; + size_t k, l; + size_t dst_len; + size_t needed_length; + + for (i= 0; i < 500; i++) + { + /* Create source data */ + const size_t src_len= rand() % 1000 + 1; + + char * src= (char *) malloc(src_len); + char * s= src; + char * str; + char * dst; + + require(src); + for (j= 0; jcoll_name.str; cs++) + add_compiled_extra_collation((struct charset_info_st *) cs); + + if (my_uca1400_collation_definitions_add()) + return TRUE; + + return FALSE; +} diff --git a/mysys/charset.c b/mysys/charset.c new file mode 100644 index 00000000..67abfe62 --- /dev/null +++ b/mysys/charset.c @@ -0,0 +1,1609 @@ +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include +#include +#include +#include +#ifdef HAVE_LANGINFO_H +#include +#endif +#ifdef HAVE_LOCALE_H +#include +#endif + +extern HASH charset_name_hash; + +/* + The code below implements this functionality: + + - Initializing charset related structures + - Loading dynamic charsets + - Searching for a proper CHARSET_INFO + using charset name, collation name or collation ID + - Setting server default character set +*/ + +static uint +get_collation_number_internal(const char *name) +{ + + CHARSET_INFO **cs; + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) + { + if (cs[0] && cs[0]->coll_name.str && + !my_strcasecmp(&my_charset_latin1, cs[0]->coll_name.str, name)) + return cs[0]->number; + } + return 0; +} + + +static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch) +{ + int chlen= my_ci_charlen(cs, &ch, &ch + 1); + return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE; +} + +static my_bool init_state_maps(struct charset_info_st *cs) +{ + uint i; + uchar *state_map; + uchar *ident_map; + + if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256*2, MYF(MY_WME)))) + return 1; + + cs->ident_map= ident_map= state_map + 256; + + /* Fill state_map with states to get a faster parser */ + for (i=0; i < 256 ; i++) + { + if (my_isalpha(cs,i)) + state_map[i]=(uchar) MY_LEX_IDENT; + else if (my_isdigit(cs,i)) + state_map[i]=(uchar) MY_LEX_NUMBER_IDENT; + else if (is_multi_byte_ident(cs, i)) + state_map[i]=(uchar) MY_LEX_IDENT; + else if (my_isspace(cs,i)) + state_map[i]=(uchar) MY_LEX_SKIP; + else + state_map[i]=(uchar) MY_LEX_CHAR; + } + state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT; + state_map[(uchar)'\'']=(uchar) MY_LEX_STRING; + state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT; + state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP; + state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP; + state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL; + state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT; + state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON; + state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR; + state_map[0]=(uchar) MY_LEX_EOL; + state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE; + state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT; + state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT; + state_map[(uchar)'@']= (uchar) MY_LEX_USER_END; + state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER; + state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER; + state_map[(uchar)'-']= (uchar) MY_LEX_MINUS_OR_COMMENT; + state_map[(uchar)',']= (uchar) MY_LEX_COMMA; + state_map[(uchar)'?']= (uchar) MY_LEX_PLACEHOLDER; + + /* + Create a second map to make it faster to find identifiers + */ + for (i=0; i < 256 ; i++) + { + ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT || + state_map[i] == MY_LEX_NUMBER_IDENT); + } + + /* Special handling of hex and binary strings */ + state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX; + state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN; + state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR; + return 0; +} + + +static MY_COLLATION_HANDLER *get_simple_collation_handler_by_flags(uint flags) +{ + return flags & MY_CS_BINSORT ? + (flags & MY_CS_NOPAD ? + &my_collation_8bit_nopad_bin_handler : + &my_collation_8bit_bin_handler) : + (flags & MY_CS_NOPAD ? + &my_collation_8bit_simple_nopad_ci_handler : + &my_collation_8bit_simple_ci_handler); +} + + +static void simple_cs_init_functions(struct charset_info_st *cs) +{ + cs->coll= get_simple_collation_handler_by_flags(cs->state); + cs->cset= &my_charset_8bit_handler; +} + + + +static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from) +{ + to->number= from->number ? from->number : to->number; + + /* Don't replace csname if already set */ + if (from->cs_name.str && !to->cs_name.str) + { + if (!(to->cs_name.str= my_once_memdup(from->cs_name.str, + from->cs_name.length + 1, + MYF(MY_WME)))) + goto err; + to->cs_name.length= from->cs_name.length; + } + + if (from->coll_name.str) + { + if (!(to->coll_name.str= my_once_memdup(from->coll_name.str, + from->coll_name.length + 1, + MYF(MY_WME)))) + goto err; + to->coll_name.length= from->coll_name.length; + } + + if (from->comment) + if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME)))) + goto err; + + if (from->m_ctype) + { + if (!(to->m_ctype= (uchar*) my_once_memdup((char*) from->m_ctype, + MY_CS_CTYPE_TABLE_SIZE, + MYF(MY_WME)))) + goto err; + if (init_state_maps(to)) + goto err; + } + if (from->to_lower) + if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower, + MY_CS_TO_LOWER_TABLE_SIZE, + MYF(MY_WME)))) + goto err; + + if (from->to_upper) + if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper, + MY_CS_TO_UPPER_TABLE_SIZE, + MYF(MY_WME)))) + goto err; + if (from->sort_order) + { + if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order, + MY_CS_SORT_ORDER_TABLE_SIZE, + MYF(MY_WME)))) + goto err; + + } + if (from->tab_to_uni) + { + uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16); + if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni, + sz, MYF(MY_WME)))) + goto err; + } + if (from->tailoring) + if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME)))) + goto err; + + return 0; + +err: + return 1; +} + + +static my_bool simple_8bit_charset_data_is_full(CHARSET_INFO *cs) +{ + return cs->m_ctype && cs->to_upper && cs->to_lower && cs->tab_to_uni; +} + + +/** + Inherit missing 8bit charset data from another collation. + Arrays pointed by refcs must be in the permanent memory already, + e.g. static memory, or allocated by my_once_xxx(). +*/ +static void +inherit_charset_data(struct charset_info_st *cs, CHARSET_INFO *refcs) +{ + if (!cs->to_upper) + cs->to_upper= refcs->to_upper; + if (!cs->to_lower) + cs->to_lower= refcs->to_lower; + if (!cs->m_ctype) + cs->m_ctype= refcs->m_ctype; + if (!cs->tab_to_uni) + cs->tab_to_uni= refcs->tab_to_uni; +} + + +static my_bool simple_8bit_collation_data_is_full(CHARSET_INFO *cs) +{ + return cs->sort_order || (cs->state & MY_CS_BINSORT); +} + + +/** + Inherit 8bit simple collation data from another collation. + refcs->sort_order must be in the permanent memory already, + e.g. static memory, or allocated by my_once_xxx(). +*/ +static void +inherit_collation_data(struct charset_info_st *cs, CHARSET_INFO *refcs) +{ + if (!simple_8bit_collation_data_is_full(cs)) + cs->sort_order= refcs->sort_order; +} + + +static my_bool simple_cs_is_full(CHARSET_INFO *cs) +{ + return cs->number && cs->cs_name.str && cs->coll_name.str && + simple_8bit_charset_data_is_full(cs) && + (simple_8bit_collation_data_is_full(cs) || cs->tailoring); +} + + +#if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8mb3)) +/** + Initialize a loaded collation. + @param [OUT] to - The new charset_info_st structure to initialize. + @param [IN] from - A template collation, to fill the missing data from. + @param [IN] loaded - The collation data loaded from the LDML file. + some data may be missing in "loaded". +*/ +static void +copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from, + CHARSET_INFO *loaded) +{ + to->cset= from->cset; + to->coll= from->coll; + /* + Single-level UCA collation have strnxfrm_multiple=8. + In case of a multi-level UCA collation we use strnxfrm_multiply=4. + That means MY_COLLATION_HANDLER::strnfrmlen() will request the caller + to allocate a buffer smaller size for each level, for performance purpose, + and to fit longer VARCHARs to @@max_sort_length. + This makes filesort produce non-precise order for some rare Unicode + characters that produce more than 4 weights (long expansions). + UCA requires 2 bytes per weight multiplied by the number of levels. + In case of a 2-level collation, each character requires 4*2=8 bytes. + Therefore, the longest VARCHAR that fits into the default @@max_sort_length + is 1024/8=VARCHAR(128). With strnxfrm_multiply==8, only VARCHAR(64) + would fit. + Note, the built-in collation utf8_thai_520_w2 also uses strnxfrm_multiply=4, + for the same purpose. + TODO: we could add a new LDML syntax to choose strxfrm_multiply value. + */ + to->strxfrm_multiply= loaded->levels_for_order > 1 ? + 4 : from->strxfrm_multiply; + to->min_sort_char= from->min_sort_char; + to->max_sort_char= from->max_sort_char; + to->mbminlen= from->mbminlen; + to->mbmaxlen= from->mbmaxlen; + to->state|= MY_CS_AVAILABLE | MY_CS_LOADED | + MY_CS_STRNXFRM | MY_CS_UNICODE; +} +#endif + + +static int add_collation(struct charset_info_st *cs) +{ + if (cs->coll_name.str && + (cs->number || + (cs->number=get_collation_number_internal(cs->coll_name.str))) && + cs->number < array_elements(all_charsets)) + { + struct charset_info_st *newcs; + if (!(newcs= (struct charset_info_st*) all_charsets[cs->number])) + { + if (!(all_charsets[cs->number]= newcs= + (struct charset_info_st*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) + return MY_XML_ERROR; + bzero(newcs,sizeof(CHARSET_INFO)); + } + else + { + /* Don't allow change of csname */ + if (newcs->cs_name.str && strcmp(newcs->cs_name.str, cs->cs_name.str)) + { + my_error(EE_DUPLICATE_CHARSET, MYF(ME_WARNING), + cs->number, cs->cs_name.str, newcs->cs_name.str); + /* + Continue parsing rest of Index.xml. We got an warning in the log + so the user can fix the wrong character set definition. + */ + return MY_XML_OK; + } + } + + if (cs->primary_number == cs->number) + cs->state |= MY_CS_PRIMARY; + + if (cs->binary_number == cs->number) + cs->state |= MY_CS_BINSORT; + + newcs->state|= cs->state; + + if (!(newcs->state & MY_CS_COMPILED)) + { + if (cs_copy_data(newcs,cs)) + return MY_XML_ERROR; + + newcs->levels_for_order= 1; + + if (!strcmp(cs->cs_name.str,"ucs2") ) + { +#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? + &my_charset_ucs2_unicode_nopad_ci : + &my_charset_ucs2_unicode_ci, + cs); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; +#endif + } + else if (!strcmp(cs->cs_name.str, "utf8") || + !strcmp(cs->cs_name.str, "utf8mb3")) + { +#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? + &my_charset_utf8mb3_unicode_nopad_ci : + &my_charset_utf8mb3_unicode_ci, + cs); + newcs->m_ctype= my_charset_utf8mb3_unicode_ci.m_ctype; + if (init_state_maps(newcs)) + return MY_XML_ERROR; +#endif + } + else if (!strcmp(cs->cs_name.str, "utf8mb4")) + { +#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? + &my_charset_utf8mb4_unicode_nopad_ci : + &my_charset_utf8mb4_unicode_ci, + cs); + newcs->m_ctype= my_charset_utf8mb4_unicode_ci.m_ctype; + if (init_state_maps(newcs)) + return MY_XML_ERROR; + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; +#endif + } + else if (!strcmp(cs->cs_name.str, "utf16")) + { +#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? + &my_charset_utf16_unicode_nopad_ci : + &my_charset_utf16_unicode_ci, + cs); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; +#endif + } + else if (!strcmp(cs->cs_name.str, "utf32")) + { +#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? + &my_charset_utf32_unicode_nopad_ci : + &my_charset_utf32_unicode_ci, + cs); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; +#endif + } + else + { + simple_cs_init_functions(newcs); + newcs->mbminlen= 1; + newcs->mbmaxlen= 1; + newcs->strxfrm_multiply= 1; + if (simple_cs_is_full(newcs)) + { + newcs->state |= MY_CS_LOADED; + } + } + add_compiled_extra_collation(newcs); + } + else + { + /* + We need the below to make get_charset_name() + and get_charset_number() working even if a + character set has not been really incompiled. + The above functions are used for example + in error message compiler extra/comp_err.c. + If a character set was compiled, this information + will get lost and overwritten in add_compiled_collation(). + */ + newcs->number= cs->number; + if (cs->comment) + if (!(newcs->comment= my_once_strdup(cs->comment,MYF(MY_WME)))) + return MY_XML_ERROR; + if (cs->cs_name.str && ! newcs->cs_name.str) + { + if (!(newcs->cs_name.str= my_once_memdup(cs->cs_name.str, + cs->cs_name.length+1, + MYF(MY_WME)))) + return MY_XML_ERROR; + newcs->cs_name.length= cs->cs_name.length; + } + if (cs->coll_name.str) + { + if (!(newcs->coll_name.str= my_once_memdup(cs->coll_name.str, + cs->coll_name.length+1, + MYF(MY_WME)))) + return MY_XML_ERROR; + newcs->coll_name.length= cs->coll_name.length; + } + } + cs->number= 0; + cs->primary_number= 0; + cs->binary_number= 0; + cs->coll_name.str= 0; + cs->coll_name.length= 0; + cs->state= 0; + cs->sort_order= NULL; + cs->tailoring= NULL; + } + return MY_XML_OK; +} + + +/** + Report character set initialization errors and warnings. + Be silent by default: no warnings on the client side. +*/ +static void +default_reporter(enum loglevel level __attribute__ ((unused)), + const char *format __attribute__ ((unused)), + ...) +{ +} +my_error_reporter my_charset_error_reporter= default_reporter; + + +/** + Wrappers for memory functions my_malloc (and friends) + with C-compatbile API without extra "myf" argument. +*/ +static void * +my_once_alloc_c(size_t size) +{ return my_once_alloc(size, MYF(MY_WME)); } + + +static void * +my_malloc_c(size_t size) +{ return my_malloc(key_memory_charset_loader, size, MYF(MY_WME)); } + + +static void * +my_realloc_c(void *old, size_t size) +{ return my_realloc(key_memory_charset_loader, old, size, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); } + + +/** + Initialize character set loader to use mysys memory management functions. + @param loader Loader to initialize +*/ +void +my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader) +{ + loader->error[0]= '\0'; + loader->once_alloc= my_once_alloc_c; + loader->malloc= my_malloc_c; + loader->realloc= my_realloc_c; + loader->free= my_free; + loader->reporter= my_charset_error_reporter; + loader->add_collation= add_collation; +} + + +#define MY_MAX_ALLOWED_BUF 1024*1024 +#define MY_CHARSET_INDEX "Index.xml" + +const char *charsets_dir= NULL; + + +static my_bool +my_read_charset_file(MY_CHARSET_LOADER *loader, + const char *filename, + myf myflags) +{ + uchar *buf; + int fd; + size_t len, tmp_len; + MY_STAT stat_info; + + if (!my_stat(filename, &stat_info, MYF(myflags)) || + ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) || + !(buf= (uchar*) my_malloc(key_memory_charset_loader,len,myflags))) + return TRUE; + + if ((fd= mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0) + goto error; + tmp_len= mysql_file_read(fd, buf, len, myflags); + mysql_file_close(fd, myflags); + if (tmp_len != len) + goto error; + + if (my_parse_charset_xml(loader, (char *) buf, len)) + { + my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n", + MYF(0), filename, loader->error); + goto error; + } + + my_free(buf); + return FALSE; + +error: + my_free(buf); + return TRUE; +} + + +char *get_charsets_dir(char *buf) +{ + const char *sharedir= SHAREDIR; + char *res; + DBUG_ENTER("get_charsets_dir"); + + if (charsets_dir != NULL) + strmake(buf, charsets_dir, FN_REFLEN-1); + else + { + if (test_if_hard_path(sharedir) || + is_prefix(sharedir, DEFAULT_CHARSET_HOME)) + strxmov(buf, sharedir, "/", CHARSET_DIR, NullS); + else + strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, + NullS); + } + res= convert_dirname(buf,buf,NullS); + DBUG_PRINT("info",("charsets dir: '%s'", buf)); + DBUG_RETURN(res); +} + +CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL}; +CHARSET_INFO *default_charset_info = &my_charset_latin1; + + +/* + Add standard character set compiled into the application + All related character sets should share same cname +*/ + +void add_compiled_collation(struct charset_info_st *cs) +{ + DBUG_ASSERT(cs->number < array_elements(all_charsets)); + all_charsets[cs->number]= cs; + cs->state|= MY_CS_AVAILABLE; + if ((my_hash_insert(&charset_name_hash, (uchar*) cs))) + { +#ifndef DBUG_OFF + CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash, + (uchar*) cs->cs_name.str, + cs->cs_name.length); + DBUG_ASSERT(org); + DBUG_ASSERT(org->cs_name.str == cs->cs_name.str); + DBUG_ASSERT(org->cs_name.length == strlen(cs->cs_name.str)); +#endif + } +} + + +/* + Add optional characters sets from ctype-extra.c + + If cname is already in use, replace csname in new object with a pointer to + the already used csname to ensure that all csname's points to the same string + for the same character set. +*/ + + +void add_compiled_extra_collation(struct charset_info_st *cs) +{ + DBUG_ASSERT(cs->number < array_elements(all_charsets)); + all_charsets[cs->number]= cs; + cs->state|= MY_CS_AVAILABLE; + if ((my_hash_insert(&charset_name_hash, (uchar*) cs))) + { + CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash, + (uchar*) cs->cs_name.str, + cs->cs_name.length); + cs->cs_name= org->cs_name; + } +} + + + +static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT; +static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT; + +typedef struct +{ + ulonglong use_count; +} MY_COLLATION_STATISTICS; + + +static MY_COLLATION_STATISTICS my_collation_statistics[MY_ALL_CHARSETS_SIZE]; + + +my_bool my_collation_is_known_id(uint id) +{ + return id > 0 && id < array_elements(all_charsets) && all_charsets[id] ? + TRUE : FALSE; +} + + +/* + Collation use statistics functions do not lock + counters to avoid mutex contention. This can lose + some counter increments with high thread concurrency. + But this should be Ok, as we don't need exact numbers. +*/ +static inline void my_collation_statistics_inc_use_count(uint id) +{ + DBUG_ASSERT(my_collation_is_known_id(id)); + my_collation_statistics[id].use_count++; +} + + +ulonglong my_collation_statistics_get_use_count(uint id) +{ + DBUG_ASSERT(my_collation_is_known_id(id)); + return my_collation_statistics[id].use_count; +} + + +const char *my_collation_get_tailoring(uint id) +{ + /* all_charsets[id]->tailoring is never changed after server startup. */ + DBUG_ASSERT(my_collation_is_known_id(id)); + return all_charsets[id]->tailoring; +} + + +HASH charset_name_hash; + +static uchar *get_charset_key(const uchar *object, + size_t *size, + my_bool not_used __attribute__((unused))) +{ + CHARSET_INFO *cs= (CHARSET_INFO*) object; + *size= cs->cs_name.length; + return (uchar*) cs->cs_name.str; +} + +static void init_available_charsets(void) +{ + char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; + struct charset_info_st **cs; + MY_CHARSET_LOADER loader; + DBUG_ENTER("init_available_charsets"); + + bzero((char*) &all_charsets,sizeof(all_charsets)); + bzero((char*) &my_collation_statistics, sizeof(my_collation_statistics)); + + my_hash_init2(key_memory_charsets, &charset_name_hash, 16, + &my_charset_latin1, 64, 0, 0, get_charset_key, + 0, 0, HASH_UNIQUE); + + init_compiled_charsets(MYF(0)); + + /* Copy compiled charsets */ + for (cs= (struct charset_info_st**) all_charsets; + cs < (struct charset_info_st**) all_charsets + + array_elements(all_charsets)-1 ; + cs++) + { + if (*cs) + { + DBUG_ASSERT(cs[0]->mbmaxlen <= MY_CS_MBMAXLEN); + if (cs[0]->m_ctype) + if (init_state_maps(*cs)) + *cs= NULL; + } + } + + my_charset_loader_init_mysys(&loader); + strmov(get_charsets_dir(fname), MY_CHARSET_INDEX); + my_read_charset_file(&loader, fname, MYF(0)); + DBUG_VOID_RETURN; +} + + +void free_charsets(void) +{ + charsets_initialized= charsets_template; + my_hash_free(&charset_name_hash); +} + + +static const char* +get_collation_name_alias(const char *name, char *buf, size_t bufsize, myf flags) +{ + if (!strncasecmp(name, "utf8_", 5)) + { + my_snprintf(buf, bufsize, "utf8mb%c_%s", + flags & MY_UTF8_IS_UTF8MB3 ? '3' : '4', name + 5); + return buf; + } + return NULL; +} + + +uint get_collation_number(const char *name, myf flags) +{ + uint id; + char alias[64]; + my_pthread_once(&charsets_initialized, init_available_charsets); + if ((id= get_collation_number_internal(name))) + return id; + if ((name= get_collation_name_alias(name, alias, sizeof(alias),flags))) + return get_collation_number_internal(name); + return 0; +} + + +static uint +get_charset_number_internal(const char *charset_name, uint cs_flags) +{ + CHARSET_INFO **cs; + + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) + { + if ( cs[0] && cs[0]->cs_name.str && (cs[0]->state & cs_flags) && + !my_strcasecmp(&my_charset_latin1, cs[0]->cs_name.str, charset_name)) + return cs[0]->number; + } + return 0; +} + + +uint get_charset_number(const char *charset_name, uint cs_flags, myf flags) +{ + uint id; + const char *new_charset_name= flags & MY_UTF8_IS_UTF8MB3 ? "utf8mb3" : + "utf8mb4"; + my_pthread_once(&charsets_initialized, init_available_charsets); + if ((id= get_charset_number_internal(charset_name, cs_flags))) + return id; + if ((charset_name= !my_strcasecmp(&my_charset_latin1, charset_name, "utf8") ? + new_charset_name : NULL)) + return get_charset_number_internal(charset_name, cs_flags); + return 0; +} + + +const char *get_charset_name(uint charset_number) +{ + my_pthread_once(&charsets_initialized, init_available_charsets); + + if (charset_number < array_elements(all_charsets)) + { + CHARSET_INFO *cs= all_charsets[charset_number]; + + if (cs && (cs->number == charset_number) && cs->coll_name.str) + return cs->coll_name.str; + } + + return "?"; /* this mimics find_type() */ +} + + +static CHARSET_INFO *inheritance_source_by_id(CHARSET_INFO *cs, uint refid) +{ + CHARSET_INFO *refcs; + return refid && refid != cs->number && + (refcs= all_charsets[refid]) && + (refcs->state & MY_CS_AVAILABLE) ? refcs : NULL; +} + + +static CHARSET_INFO *find_collation_data_inheritance_source(CHARSET_INFO *cs, myf flags) +{ + const char *beg, *end; + if (cs->tailoring && + !strncmp(cs->tailoring, "[import ", 8) && + (end= strchr(cs->tailoring + 8, ']')) && + (beg= cs->tailoring + 8) + MY_CS_COLLATION_NAME_SIZE > end) + { + char name[MY_CS_COLLATION_NAME_SIZE + 1]; + memcpy(name, beg, end - beg); + name[end - beg]= '\0'; + return inheritance_source_by_id(cs, get_collation_number(name,MYF(flags))); + } + return NULL; +} + + +static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs) +{ + uint refid= get_charset_number_internal(cs->cs_name.str, MY_CS_PRIMARY); + return inheritance_source_by_id(cs, refid); +} + + +static CHARSET_INFO * +get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags) +{ + char buf[FN_REFLEN]; + struct charset_info_st *cs; + + DBUG_ASSERT(cs_number < array_elements(all_charsets)); + + if ((cs= (struct charset_info_st*) all_charsets[cs_number])) + { + if (cs->state & MY_CS_READY) /* if CS is already initialized */ + { + my_collation_statistics_inc_use_count(cs_number); + return cs; + } + + /* + To make things thread safe we are not allowing other threads to interfere + while we may changing the cs_info_table + */ + mysql_mutex_lock(&THR_LOCK_charset); + + if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */ + { + MY_CHARSET_LOADER loader; + strxmov(get_charsets_dir(buf), cs->cs_name.str, ".xml", NullS); + my_charset_loader_init_mysys(&loader); + my_read_charset_file(&loader, buf, flags); + } + + if (cs->state & MY_CS_AVAILABLE) + { + if (!(cs->state & MY_CS_READY)) + { + if (!simple_8bit_charset_data_is_full(cs)) + { + CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs); + if (refcs) + inherit_charset_data(cs, refcs); + } + if (!simple_8bit_collation_data_is_full(cs)) + { + CHARSET_INFO *refcl= find_collation_data_inheritance_source(cs, flags); + if (refcl) + inherit_collation_data(cs, refcl); + } + + if (my_ci_init_charset(cs, loader) || + my_ci_init_collation(cs, loader)) + { + cs= NULL; + } + else + cs->state|= MY_CS_READY; + } + my_collation_statistics_inc_use_count(cs_number); + } + else + cs= NULL; + + mysql_mutex_unlock(&THR_LOCK_charset); + } + return cs; +} + + +CHARSET_INFO *get_charset(uint cs_number, myf flags) +{ + CHARSET_INFO *cs= NULL; + + if (cs_number == default_charset_info->number) + return default_charset_info; + + my_pthread_once(&charsets_initialized, init_available_charsets); + + if (cs_number < array_elements(all_charsets)) + { + MY_CHARSET_LOADER loader; + my_charset_loader_init_mysys(&loader); + cs= get_internal_charset(&loader, cs_number, flags); + } + + if (!cs && (flags & MY_WME)) + { + char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23]; + strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); + cs_string[0]='#'; + int10_to_str(cs_number, cs_string+1, 10); + my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); + } + return cs; +} + + +/** + Find collation by name: extended version of get_charset_by_name() + to return error messages to the caller. + @param loader Character set loader + @param name Collation name + @param flags Flags + @return NULL on error, pointer to collation on success +*/ + +CHARSET_INFO * +my_collation_get_by_name(MY_CHARSET_LOADER *loader, + const char *name, myf flags) +{ + uint cs_number; + CHARSET_INFO *cs; + my_pthread_once(&charsets_initialized, init_available_charsets); + + cs_number= get_collation_number(name,flags); + my_charset_loader_init_mysys(loader); + cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL; + + if (!cs && (flags & MY_WME)) + { + char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; + strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); + my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file); + } + return cs; +} + + +CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) +{ + MY_CHARSET_LOADER loader; + my_charset_loader_init_mysys(&loader); + return my_collation_get_by_name(&loader, cs_name, flags); +} + + +/** + Find character set by name: extended version of get_charset_by_csname() + to return error messages to the caller. + @param loader Character set loader + @param name Collation name + @param cs_flags Character set flags (e.g. default or binary collation) + @param flags Flags + @return NULL on error, pointer to collation on success +*/ +CHARSET_INFO * +my_charset_get_by_name(MY_CHARSET_LOADER *loader, + const char *cs_name, uint cs_flags, myf flags) +{ + uint cs_number; + CHARSET_INFO *cs; + DBUG_ENTER("get_charset_by_csname"); + DBUG_PRINT("enter",("name: '%s'", cs_name)); + + my_pthread_once(&charsets_initialized, init_available_charsets); + + cs_number= get_charset_number(cs_name, cs_flags, flags); + cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL; + + if (!cs && (flags & MY_WME)) + { + char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; + strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); + my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); + } + + DBUG_RETURN(cs); +} + + +CHARSET_INFO * +get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags) +{ + MY_CHARSET_LOADER loader; + my_charset_loader_init_mysys(&loader); + return my_charset_get_by_name(&loader, cs_name, cs_flags, flags); +} + + +/** + Resolve character set by the character set name (utf8, latin1, ...). + + The function tries to resolve character set by the specified name. If + there is character set with the given name, it is assigned to the "cs" + parameter and FALSE is returned. If there is no such character set, + "default_cs" is assigned to the "cs" and TRUE is returned. + + @param[in] cs_name Character set name. + @param[in] default_cs Default character set. + @param[out] cs Variable to store character set. + + @return FALSE if character set was resolved successfully; TRUE if there + is no character set with given name. +*/ + +my_bool resolve_charset(const char *cs_name, + CHARSET_INFO *default_cs, + CHARSET_INFO **cs, + myf flags) +{ + *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, flags); + + if (*cs == NULL) + { + *cs= default_cs; + return TRUE; + } + + return FALSE; +} + + +/** + Resolve collation by the collation name (utf8_general_ci, ...). + + The function tries to resolve collation by the specified name. If there + is collation with the given name, it is assigned to the "cl" parameter + and FALSE is returned. If there is no such collation, "default_cl" is + assigned to the "cl" and TRUE is returned. + + @param[out] cl Variable to store collation. + @param[in] cl_name Collation name. + @param[in] default_cl Default collation. + + @return FALSE if collation was resolved successfully; TRUE if there is no + collation with given name. +*/ + +my_bool resolve_collation(const char *cl_name, + CHARSET_INFO *default_cl, + CHARSET_INFO **cl, + myf my_flags) +{ + *cl= get_charset_by_name(cl_name, my_flags); + + if (*cl == NULL) + { + *cl= default_cl; + return TRUE; + } + + return FALSE; +} + + +/* + Escape string with backslashes (\) + + SYNOPSIS + escape_string_for_mysql() + charset_info Charset of the strings + to Buffer for escaped string + to_length Length of destination buffer, or 0 + from The string to escape + length The length of the string to escape + overflow Set to 1 if the escaped string did not fit in + the to buffer + + DESCRIPTION + This escapes the contents of a string by adding backslashes before special + characters, and turning others into specific escape sequences, such as + turning newlines into \n and null bytes into \0. + + NOTE + To maintain compatibility with the old C API, to_length may be 0 to mean + "big enough" + + RETURN VALUES + # The length of the escaped string +*/ + +size_t escape_string_for_mysql(CHARSET_INFO *charset_info, + char *to, size_t to_length, + const char *from, size_t length, + my_bool *overflow) +{ + const char *to_start= to; + const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); + *overflow= FALSE; + for (end= from + length; from < end; from++) + { + char escape= 0; +#ifdef USE_MB + int tmp_length= my_ci_charlen(charset_info, (const uchar *) from, (const uchar *) end); + if (tmp_length > 1) + { + if (to + tmp_length > to_end) + { + *overflow= TRUE; + break; + } + while (tmp_length--) + *to++= *from++; + from--; + continue; + } + /* + If the next character appears to begin a multi-byte character, we + escape that first byte of that apparent multi-byte character. (The + character just looks like a multi-byte character -- if it were actually + a multi-byte character, it would have been passed through in the test + above.) + + Without this check, we can create a problem by converting an invalid + multi-byte character into a valid one. For example, 0xbf27 is not + a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \) + */ + if (tmp_length < 1) /* Bad byte sequence */ + escape= *from; + else +#endif + switch (*from) { + case 0: /* Must be escaped for 'mysql' */ + escape= '0'; + break; + case '\n': /* Must be escaped for logs */ + escape= 'n'; + break; + case '\r': + escape= 'r'; + break; + case '\\': + escape= '\\'; + break; + case '\'': + escape= '\''; + break; + case '"': /* Better safe than sorry */ + escape= '"'; + break; + case '\032': /* This gives problems on Win32 */ + escape= 'Z'; + break; + } + if (escape) + { + if (to + 2 > to_end) + { + *overflow= TRUE; + break; + } + *to++= '\\'; + *to++= escape; + } + else + { + if (to + 1 > to_end) + { + *overflow= TRUE; + break; + } + *to++= *from; + } + } + *to= 0; + return (size_t) (to - to_start); +} + + +#ifdef BACKSLASH_MBTAIL +CHARSET_INFO *fs_character_set() +{ + static CHARSET_INFO *fs_cset_cache; + if (fs_cset_cache) + return fs_cset_cache; +#ifdef HAVE_CHARSET_cp932 + else if (GetACP() == 932) + return fs_cset_cache= &my_charset_cp932_japanese_ci; +#endif + else + return fs_cset_cache= &my_charset_bin; +} +#endif + +/* + Escape apostrophes by doubling them up + + SYNOPSIS + escape_quotes_for_mysql() + charset_info Charset of the strings + to Buffer for escaped string + to_length Length of destination buffer, or 0 + from The string to escape + length The length of the string to escape + overflow Set to 1 if the buffer overflows + + DESCRIPTION + This escapes the contents of a string by doubling up any apostrophes that + it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in + effect on the server. + + NOTE + To be consistent with escape_string_for_mysql(), to_length may be 0 to + mean "big enough" + + RETURN VALUES + The length of the escaped string +*/ + +size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info, + char *to, size_t to_length, + const char *from, size_t length, + my_bool *overflow) +{ + const char *to_start= to; + const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); +#ifdef USE_MB + my_bool use_mb_flag= my_ci_use_mb(charset_info); +#endif + *overflow= FALSE; + for (end= from + length; from < end; from++) + { +#ifdef USE_MB + int tmp_length; + if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) + { + if (to + tmp_length > to_end) + { + *overflow= TRUE; + break; + } + while (tmp_length--) + *to++= *from++; + from--; + continue; + } + /* + We don't have the same issue here with a non-multi-byte character being + turned into a multi-byte character by the addition of an escaping + character, because we are only escaping the ' character with itself. + */ +#endif + if (*from == '\'') + { + if (to + 2 > to_end) + { + *overflow= TRUE; + break; + } + *to++= '\''; + *to++= '\''; + } + else + { + if (to + 1 > to_end) + { + *overflow= TRUE; + break; + } + *to++= *from; + } + } + *to= 0; + return (size_t) (to - to_start); +} + + +typedef enum my_cs_match_type_enum +{ + /* MySQL and OS charsets are fully compatible */ + my_cs_exact, + /* MySQL charset is very close to OS charset */ + my_cs_approx, + /* + MySQL knows this charset, but it is not supported as client character set. + */ + my_cs_unsupp +} my_cs_match_type; + + +typedef struct str2str_st +{ + const char* os_name; + const char* my_name; + my_cs_match_type param; +} MY_CSET_OS_NAME; + +static const MY_CSET_OS_NAME charsets[] = +{ +#ifdef _WIN32 + {"cp437", "cp850", my_cs_approx}, + {"cp850", "cp850", my_cs_exact}, + {"cp852", "cp852", my_cs_exact}, + {"cp858", "cp850", my_cs_approx}, + {"cp866", "cp866", my_cs_exact}, + {"cp874", "tis620", my_cs_approx}, + {"cp932", "cp932", my_cs_exact}, + {"cp936", "gbk", my_cs_approx}, + {"cp949", "euckr", my_cs_approx}, + {"cp950", "big5", my_cs_exact}, + {"cp1200", "utf16le", my_cs_unsupp}, + {"cp1201", "utf16", my_cs_unsupp}, + {"cp1250", "cp1250", my_cs_exact}, + {"cp1251", "cp1251", my_cs_exact}, + {"cp1252", "latin1", my_cs_exact}, + {"cp1253", "greek", my_cs_exact}, + {"cp1254", "latin5", my_cs_exact}, + {"cp1255", "hebrew", my_cs_approx}, + {"cp1256", "cp1256", my_cs_exact}, + {"cp1257", "cp1257", my_cs_exact}, + {"cp10000", "macroman", my_cs_exact}, + {"cp10001", "sjis", my_cs_approx}, + {"cp10002", "big5", my_cs_approx}, + {"cp10008", "gb2312", my_cs_approx}, + {"cp10021", "tis620", my_cs_approx}, + {"cp10029", "macce", my_cs_exact}, + {"cp12001", "utf32", my_cs_unsupp}, + {"cp20107", "swe7", my_cs_exact}, + {"cp20127", "latin1", my_cs_approx}, + {"cp20866", "koi8r", my_cs_exact}, + {"cp20932", "ujis", my_cs_exact}, + {"cp20936", "gb2312", my_cs_approx}, + {"cp20949", "euckr", my_cs_approx}, + {"cp21866", "koi8u", my_cs_exact}, + {"cp28591", "latin1", my_cs_approx}, + {"cp28592", "latin2", my_cs_exact}, + {"cp28597", "greek", my_cs_exact}, + {"cp28598", "hebrew", my_cs_exact}, + {"cp28599", "latin5", my_cs_exact}, + {"cp28603", "latin7", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE + {"cp28605", "latin9", my_cs_exact}, +#endif + {"cp38598", "hebrew", my_cs_exact}, + {"cp51932", "ujis", my_cs_exact}, + {"cp51936", "gb2312", my_cs_exact}, + {"cp51949", "euckr", my_cs_exact}, + {"cp51950", "big5", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE + {"cp54936", "gb18030", my_cs_exact}, +#endif + {"cp65001", "utf8mb4", my_cs_exact}, + {"cp65001", "utf8mb3", my_cs_approx}, +#else /* not Windows */ + + {"646", "latin1", my_cs_approx}, /* Default on Solaris */ + {"ANSI_X3.4-1968", "latin1", my_cs_approx}, + {"ansi1251", "cp1251", my_cs_exact}, + {"armscii8", "armscii8", my_cs_exact}, + {"armscii-8", "armscii8", my_cs_exact}, + {"ASCII", "latin1", my_cs_approx}, + {"Big5", "big5", my_cs_exact}, + {"cp1251", "cp1251", my_cs_exact}, + {"cp1255", "hebrew", my_cs_approx}, + {"CP866", "cp866", my_cs_exact}, + {"eucCN", "gb2312", my_cs_exact}, + {"euc-CN", "gb2312", my_cs_exact}, + {"eucJP", "ujis", my_cs_exact}, + {"euc-JP", "ujis", my_cs_exact}, + {"eucKR", "euckr", my_cs_exact}, + {"euc-KR", "euckr", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE + {"gb18030", "gb18030", my_cs_exact}, +#endif + {"gb2312", "gb2312", my_cs_exact}, + {"gbk", "gbk", my_cs_exact}, + {"georgianps", "geostd8", my_cs_exact}, + {"georgian-ps", "geostd8", my_cs_exact}, + {"IBM-1252", "cp1252", my_cs_exact}, + + {"iso88591", "latin1", my_cs_approx}, + {"ISO_8859-1", "latin1", my_cs_approx}, + {"ISO8859-1", "latin1", my_cs_approx}, + {"ISO-8859-1", "latin1", my_cs_approx}, + + {"iso885913", "latin7", my_cs_exact}, + {"ISO_8859-13", "latin7", my_cs_exact}, + {"ISO8859-13", "latin7", my_cs_exact}, + {"ISO-8859-13", "latin7", my_cs_exact}, + +#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE + {"iso885915", "latin9", my_cs_exact}, + {"ISO_8859-15", "latin9", my_cs_exact}, + {"ISO8859-15", "latin9", my_cs_exact}, + {"ISO-8859-15", "latin9", my_cs_exact}, +#endif + + {"iso88592", "latin2", my_cs_exact}, + {"ISO_8859-2", "latin2", my_cs_exact}, + {"ISO8859-2", "latin2", my_cs_exact}, + {"ISO-8859-2", "latin2", my_cs_exact}, + + {"iso88597", "greek", my_cs_exact}, + {"ISO_8859-7", "greek", my_cs_exact}, + {"ISO8859-7", "greek", my_cs_exact}, + {"ISO-8859-7", "greek", my_cs_exact}, + + {"iso88598", "hebrew", my_cs_exact}, + {"ISO_8859-8", "hebrew", my_cs_exact}, + {"ISO8859-8", "hebrew", my_cs_exact}, + {"ISO-8859-8", "hebrew", my_cs_exact}, + + {"iso88599", "latin5", my_cs_exact}, + {"ISO_8859-9", "latin5", my_cs_exact}, + {"ISO8859-9", "latin5", my_cs_exact}, + {"ISO-8859-9", "latin5", my_cs_exact}, + + {"koi8r", "koi8r", my_cs_exact}, + {"KOI8-R", "koi8r", my_cs_exact}, + {"koi8u", "koi8u", my_cs_exact}, + {"KOI8-U", "koi8u", my_cs_exact}, + + {"roman8", "hp8", my_cs_exact}, /* Default on HP UX */ + + {"Shift_JIS", "sjis", my_cs_exact}, + {"SJIS", "sjis", my_cs_exact}, + {"shiftjisx0213", "sjis", my_cs_exact}, + + {"tis620", "tis620", my_cs_exact}, + {"tis-620", "tis620", my_cs_exact}, + + {"ujis", "ujis", my_cs_exact}, + + {"US-ASCII", "latin1", my_cs_approx}, + + {"utf8", "utf8", my_cs_exact}, + {"utf-8", "utf8", my_cs_exact}, +#endif + {NULL, NULL, 0} +}; + + +static const char* +my_os_charset_to_mysql_charset(const char* csname) +{ + const MY_CSET_OS_NAME* csp; + for (csp = charsets; csp->os_name; csp++) + { + if (!strcasecmp(csp->os_name, csname)) + { + switch (csp->param) + { + case my_cs_exact: + return csp->my_name; + + case my_cs_approx: + /* + Maybe we should print a warning eventually: + character set correspondence is not exact. + */ + return csp->my_name; + + default: + return NULL; + } + } + } + return NULL; +} + +const char* my_default_csname() +{ + const char* csname = NULL; +#ifdef _WIN32 + char cpbuf[64]; + UINT cp; + if (GetACP() == CP_UTF8) + cp= CP_UTF8; + else + { + cp= GetConsoleCP(); + if (cp == 0) + cp= GetACP(); + } + snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp); + csname = my_os_charset_to_mysql_charset(cpbuf); +#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO) + if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET))) + csname = my_os_charset_to_mysql_charset(csname); +#endif + return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME; +} + + +#ifdef _WIN32 +/** + Extract codepage number from "cpNNNN" string, + and check that this codepage is supported. + + @return 0 - invalid codepage(or unsupported) + > 0 - valid codepage number. +*/ +static UINT get_codepage(const char *s) +{ + UINT cp; + if (s[0] != 'c' || s[1] != 'p') + { + DBUG_ASSERT(0); + return 0; + } + cp= strtoul(s + 2, NULL, 10); + if (!IsValidCodePage(cp)) + { + /* + Can happen also with documented CP, i.e 51936 + Perhaps differs from one machine to another. + */ + return 0; + } + return cp; +} + +static UINT mysql_charset_to_codepage(const char *my_cs_name) +{ + const MY_CSET_OS_NAME *csp; + UINT cp=0,tmp; + for (csp= charsets; csp->os_name; csp++) + { + if (!strcasecmp(csp->my_name, my_cs_name)) + { + switch (csp->param) + { + case my_cs_exact: + tmp= get_codepage(csp->os_name); + if (tmp) + return tmp; + break; + case my_cs_approx: + /* + don't return just yet, perhaps there is a better + (exact) match later. + */ + if (!cp) + cp= get_codepage(csp->os_name); + continue; + + default: + return 0; + } + } + } + return cp; +} + +/** Set console codepage for MariaDB's charset name */ +int my_set_console_cp(const char *csname) +{ + UINT cp; + if (fileno(stdout) < 0 || !isatty(fileno(stdout))) + return 0; + cp= mysql_charset_to_codepage(csname); + if (!cp) + { + /* No compatible os charset.*/ + return -1; + } + + if (GetConsoleOutputCP() != cp && !SetConsoleOutputCP(cp)) + { + return -1; + } + + if (GetConsoleCP() != cp && !SetConsoleCP(cp)) + { + return -1; + } + return 0; +} +#endif diff --git a/mysys/crc32/clang_workaround.h b/mysys/crc32/clang_workaround.h new file mode 100644 index 00000000..915f7e52 --- /dev/null +++ b/mysys/crc32/clang_workaround.h @@ -0,0 +1,87 @@ +#ifndef CLANG_WORKAROUNDS_H +#define CLANG_WORKAROUNDS_H + +/* + * These stubs fix clang incompatibilities with GCC builtins. + */ + +#ifndef __builtin_crypto_vpmsumw +#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb +#endif +#ifndef __builtin_crypto_vpmsumd +#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb +#endif + +static inline +__vector unsigned long long __attribute__((overloadable)) +vec_ld(int __a, const __vector unsigned long long* __b) +{ + return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b); +} + +/* + * GCC __builtin_pack_vector_int128 returns a vector __int128_t but Clang + * does not recognize this type. On GCC this builtin is translated to a + * xxpermdi instruction that only moves the registers __a, __b instead generates + * a load. + * + * Clang has vec_xxpermdi intrinsics. It was implemented in 4.0.0. + */ +static inline +__vector unsigned long long __builtin_pack_vector (unsigned long __a, + unsigned long __b) +{ + #if defined(__BIG_ENDIAN__) + __vector unsigned long long __v = {__a, __b}; + #else + __vector unsigned long long __v = {__b, __a}; + #endif + return __v; +} + +/* + * Clang 7 changed the behavior of vec_xxpermdi in order to provide the same + * behavior of GCC. That means code adapted to Clang >= 7 does not work on + * Clang <= 6. So, fallback to __builtin_unpack_vector() on Clang <= 6. + */ +#if !defined vec_xxpermdi || __clang_major__ <= 6 + +static inline +unsigned long __builtin_unpack_vector (__vector unsigned long long __v, + int __o) +{ + return __v[__o]; +} + +#if defined(__BIG_ENDIAN__) +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0) +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1) +#else +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1) +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0) +#endif + +#else + +static inline +unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) +{ + #if defined(__BIG_ENDIAN__) + return vec_xxpermdi(__v, __v, 0x0)[0]; + #else + return vec_xxpermdi(__v, __v, 0x3)[0]; + #endif +} + +static inline +unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) +{ + #if defined(__BIG_ENDIAN__) + return vec_xxpermdi(__v, __v, 0x3)[0]; + #else + return vec_xxpermdi(__v, __v, 0x0)[0]; + #endif +} +#endif /* vec_xxpermdi */ + +#endif diff --git a/mysys/crc32/crc32_arm64.c b/mysys/crc32/crc32_arm64.c new file mode 100644 index 00000000..0e70c218 --- /dev/null +++ b/mysys/crc32/crc32_arm64.c @@ -0,0 +1,340 @@ +#include +#include +#include + +static int pmull_supported; + +#if defined(HAVE_ARMV8_CRC) + +#if defined(__APPLE__) +#include + +int crc32_aarch64_available(void) +{ + int ret; + size_t len = sizeof(ret); + if (sysctlbyname("hw.optional.armv8_crc32", &ret, &len, NULL, 0) == -1) + return 0; + return ret; +} + +const char *crc32c_aarch64_available(void) +{ + if (crc32_aarch64_available() == 0) + return NULL; + pmull_supported = 1; + return "Using ARMv8 crc32 + pmull instructions"; +} + +#else +#include +#if defined(__FreeBSD__) +static unsigned long getauxval(unsigned int key) +{ + unsigned long val; + if (elf_aux_info(key, (void *)&val, (int)sizeof(val) != 0) + return 0ul; + return val; +} +#else +# include +#endif + +#ifndef HWCAP_CRC32 +# define HWCAP_CRC32 (1 << 7) +#endif + +#ifndef HWCAP_PMULL +# define HWCAP_PMULL (1 << 4) +#endif + +/* ARM made crc32 default from ARMv8.1 but optional in ARMv8A + * Runtime check API. + */ +int crc32_aarch64_available(void) +{ + unsigned long auxv= getauxval(AT_HWCAP); + return (auxv & HWCAP_CRC32) != 0; +} + +const char *crc32c_aarch64_available(void) +{ + unsigned long auxv= getauxval(AT_HWCAP); + + if (!(auxv & HWCAP_CRC32)) + return NULL; + + pmull_supported= (auxv & HWCAP_PMULL) != 0; + if (pmull_supported) + return "Using ARMv8 crc32 + pmull instructions"; + else + return "Using ARMv8 crc32 instructions"; +} + +#endif /* __APPLE__ */ +#endif /* HAVE_ARMV8_CRC */ + +#ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS + +/* Request crc extension capabilities from the assembler */ +asm(".arch_extension crc"); + +# ifdef HAVE_ARMV8_CRYPTO +/* crypto extension */ +asm(".arch_extension crypto"); +# endif + +#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value)) +#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) +#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) +#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) + +#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value)) +#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) +#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) +#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) + + +#define CRC32C3X8(buffer, ITR) \ + __asm__("crc32cx %w[c1], %w[c1], %x[v]":[c1]"+r"(crc1):[v]"r"(*((const uint64_t *)buffer + 42*1 + (ITR))));\ + __asm__("crc32cx %w[c2], %w[c2], %x[v]":[c2]"+r"(crc2):[v]"r"(*((const uint64_t *)buffer + 42*2 + (ITR))));\ + __asm__("crc32cx %w[c0], %w[c0], %x[v]":[c0]"+r"(crc0):[v]"r"(*((const uint64_t *)buffer + 42*0 + (ITR)))); + +#else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ + +/* Intrinsics header*/ +#include +#include + +#define CRC32CX(crc, value) (crc) = __crc32cd((crc), (value)) +#define CRC32CW(crc, value) (crc) = __crc32cw((crc), (value)) +#define CRC32CH(crc, value) (crc) = __crc32ch((crc), (value)) +#define CRC32CB(crc, value) (crc) = __crc32cb((crc), (value)) + +#define CRC32X(crc, value) (crc) = __crc32d((crc), (value)) +#define CRC32W(crc, value) (crc) = __crc32w((crc), (value)) +#define CRC32H(crc, value) (crc) = __crc32h((crc), (value)) +#define CRC32B(crc, value) (crc) = __crc32b((crc), (value)) + +#define CRC32C3X8(buffer, ITR) \ + crc1 = __crc32cd(crc1, *((const uint64_t *)buffer + 42*1 + (ITR)));\ + crc2 = __crc32cd(crc2, *((const uint64_t *)buffer + 42*2 + (ITR)));\ + crc0 = __crc32cd(crc0, *((const uint64_t *)buffer + 42*0 + (ITR))); + +#endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ + +#define CRC32C7X3X8(buffer, ITR) do {\ + CRC32C3X8(buffer, ((ITR) * 7 + 0)) \ + CRC32C3X8(buffer, ((ITR) * 7 + 1)) \ + CRC32C3X8(buffer, ((ITR) * 7 + 2)) \ + CRC32C3X8(buffer, ((ITR) * 7 + 3)) \ + CRC32C3X8(buffer, ((ITR) * 7 + 4)) \ + CRC32C3X8(buffer, ((ITR) * 7 + 5)) \ + CRC32C3X8(buffer, ((ITR) * 7 + 6)) \ +} while(0) + +#define PREF4X64L1(buffer, PREF_OFFSET, ITR) \ + __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\ + __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\ + __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\ + __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64)); + +#define PREF1KL1(buffer, PREF_OFFSET) \ + PREF4X64L1(buffer,(PREF_OFFSET), 0) \ + PREF4X64L1(buffer,(PREF_OFFSET), 4) \ + PREF4X64L1(buffer,(PREF_OFFSET), 8) \ + PREF4X64L1(buffer,(PREF_OFFSET), 12) + +#define PREF4X64L2(buffer, PREF_OFFSET, ITR) \ + __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\ + __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\ + __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\ + __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64)); + +#define PREF1KL2(buffer, PREF_OFFSET) \ + PREF4X64L2(buffer,(PREF_OFFSET), 0) \ + PREF4X64L2(buffer,(PREF_OFFSET), 4) \ + PREF4X64L2(buffer,(PREF_OFFSET), 8) \ + PREF4X64L2(buffer,(PREF_OFFSET), 12) + +uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len) +{ + uint32_t crc0, crc1, crc2; + int64_t length= (int64_t)len; + + crc^= 0xffffffff; + + /* Pmull runtime check here. + * Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030). + * + * Consider the condition that the target platform does support hardware crc32 + * but not support PMULL. In this condition, it should leverage the aarch64 + * crc32 instruction (__crc32c) and just only skip parallel computation (pmull/vmull) + * rather than skip all hardware crc32 instruction of computation. + */ + if (pmull_supported) + { +/* The following Macro (HAVE_ARMV8_CRYPTO) is used for compiling check */ +#ifdef HAVE_ARMV8_CRYPTO + +/* Crypto extension Support + * Parallel computation with 1024 Bytes (per block) + * Intrinsics Support + */ +# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS + const poly64_t k1= 0xe417f38a, k2= 0x8f158014; + uint64_t t0, t1; + + /* Process per block size of 1024 Bytes + * A block size = 8 + 42*3*sizeof(uint64_t) + 8 + */ + while ((length-= 1024) >= 0) + { + /* Prefetch 3*1024 data for avoiding L2 cache miss */ + PREF1KL2(buffer, 1024*3); + /* Do first 8 bytes here for better pipelining */ + crc0= __crc32cd(crc, *(const uint64_t *)buffer); + crc1= 0; + crc2= 0; + buffer+= sizeof(uint64_t); + + /* Process block inline + * Process crc0 last to avoid dependency with above + */ + CRC32C7X3X8(buffer, 0); + CRC32C7X3X8(buffer, 1); + CRC32C7X3X8(buffer, 2); + CRC32C7X3X8(buffer, 3); + CRC32C7X3X8(buffer, 4); + CRC32C7X3X8(buffer, 5); + + buffer+= 42*3*sizeof(uint64_t); + /* Prefetch data for following block to avoid L1 cache miss */ + PREF1KL1(buffer, 1024); + + /* Last 8 bytes + * Merge crc0 and crc1 into crc2 + * crc1 multiply by K2 + * crc0 multiply by K1 + */ + t1= (uint64_t)vmull_p64(crc1, k2); + t0= (uint64_t)vmull_p64(crc0, k1); + crc= __crc32cd(crc2, *(const uint64_t *)buffer); + crc1= __crc32cd(0, t1); + crc^= crc1; + crc0= __crc32cd(0, t0); + crc^= crc0; + + buffer+= sizeof(uint64_t); + } + +# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ + + /*No intrinsics*/ + __asm__("mov x16, #0xf38a \n\t" + "movk x16, #0xe417, lsl 16 \n\t" + "mov v1.2d[0], x16 \n\t" + "mov x16, #0x8014 \n\t" + "movk x16, #0x8f15, lsl 16 \n\t" + "mov v0.2d[0], x16 \n\t" + :::"x16"); + + while ((length-= 1024) >= 0) + { + PREF1KL2(buffer, 1024*3); + __asm__("crc32cx %w[c0], %w[c], %x[v]\n\t" + :[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):); + crc1= 0; + crc2= 0; + buffer+= sizeof(uint64_t); + + CRC32C7X3X8(buffer, 0); + CRC32C7X3X8(buffer, 1); + CRC32C7X3X8(buffer, 2); + CRC32C7X3X8(buffer, 3); + CRC32C7X3X8(buffer, 4); + CRC32C7X3X8(buffer, 5); + + buffer+= 42*3*sizeof(uint64_t); + PREF1KL1(buffer, 1024); + __asm__("mov v2.2d[0], %x[c1] \n\t" + "pmull v2.1q, v2.1d, v0.1d \n\t" + "mov v3.2d[0], %x[c0] \n\t" + "pmull v3.1q, v3.1d, v1.1d \n\t" + "crc32cx %w[c], %w[c2], %x[v] \n\t" + "mov %x[c1], v2.2d[0] \n\t" + "crc32cx %w[c1], wzr, %x[c1] \n\t" + "eor %w[c], %w[c], %w[c1] \n\t" + "mov %x[c0], v3.2d[0] \n\t" + "crc32cx %w[c0], wzr, %x[c0] \n\t" + "eor %w[c], %w[c], %w[c0] \n\t" + :[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc) + :[v]"r"(*((const uint64_t *)buffer))); + buffer+= sizeof(uint64_t); + } +# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */ + + /* Done if Input data size is aligned with 1024 */ + if (!(length+= 1024)) + return ~crc; + +#endif /* HAVE_ARMV8_CRYPTO */ + + } // end if pmull_supported + + while ((length-= sizeof(uint64_t)) >= 0) + { + CRC32CX(crc, *(uint64_t *)buffer); + buffer+= sizeof(uint64_t); + } + + /* The following is more efficient than the straight loop */ + if (length & sizeof(uint32_t)) + { + CRC32CW(crc, *(uint32_t *)buffer); + buffer+= sizeof(uint32_t); + } + + if (length & sizeof(uint16_t)) + { + CRC32CH(crc, *(uint16_t *)buffer); + buffer+= sizeof(uint16_t); + } + + if (length & sizeof(uint8_t)) + CRC32CB(crc, *buffer); + + return ~crc; +} + +/* There are multiple approaches to calculate crc. +Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes +Approach-2: Process 8 bytes and remaining workload using 1 bytes +Apporach-3: Process 64 bytes at once by issuing 8 crc call and remaining + using 8/1 combination. + +Based on micro-benchmark testing we found that Approach-2 works best especially +given small chunk of variable data. */ +unsigned int crc32_aarch64(unsigned int crc, const void *buf, size_t len) +{ + const uint8_t *buf1= buf; + const uint64_t *buf8= (const uint64_t *) (((uintptr_t) buf + 7) & ~7); + + crc= ~crc; + + /* if start pointer is not 8 bytes aligned */ + while ((buf1 != (const uint8_t *) buf8) && len) + { + CRC32B(crc, *buf1++); + len--; + } + + for (; len >= 8; len-= 8) + CRC32X(crc, *buf8++); + + buf1= (const uint8_t *) buf8; + while (len--) + CRC32B(crc, *buf1++); + + return ~crc; +} diff --git a/mysys/crc32/crc32_ppc64.c b/mysys/crc32/crc32_ppc64.c new file mode 100644 index 00000000..76df88ee --- /dev/null +++ b/mysys/crc32/crc32_ppc64.c @@ -0,0 +1,5 @@ +#define CRC32_FUNCTION my_checksum +#define CRC_TABLE +#define POWER8_INTRINSICS +#include "pcc_crc32_constants.h" +#include "crc_ppc64.h" diff --git a/mysys/crc32/crc32_x86.c b/mysys/crc32/crc32_x86.c new file mode 100644 index 00000000..f077399c --- /dev/null +++ b/mysys/crc32/crc32_x86.c @@ -0,0 +1,334 @@ +/* Copyright (c) 2020, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Implementation of CRC32 (Ethernet) uing Intel PCLMULQDQ + Ported from Intels work, see https://github.com/intel/soft-crc +*/ + +/******************************************************************************* + Copyright (c) 2009-2018, Intel Corporation + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + + +#include +#include + +#include +#include +#include +#include +#include + +#ifdef __GNUC__ +#include +#elif defined(_MSC_VER) +#include +#else +#error "unknown compiler" +#endif + +/** + * @brief Shifts left 128 bit register by specified number of bytes + * + * @param reg 128 bit value + * @param num number of bytes to shift left \a reg by (0-16) + * + * @return \a reg << (\a num * 8) + */ +static inline __m128i xmm_shift_left(__m128i reg, const unsigned int num) +{ + static const MY_ALIGNED(16) uint8_t crc_xmm_shift_tab[48]= { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + const __m128i *p= (const __m128i *) (crc_xmm_shift_tab + 16 - num); + + return _mm_shuffle_epi8(reg, _mm_loadu_si128(p)); +} + +struct crcr_pclmulqdq_ctx +{ + uint64_t rk1; + uint64_t rk2; + uint64_t rk5; + uint64_t rk6; + uint64_t rk7; + uint64_t rk8; +}; + +/** + * @brief Performs one folding round + * + * Logically function operates as follows: + * DATA = READ_NEXT_16BYTES(); + * F1 = LSB8(FOLD) + * F2 = MSB8(FOLD) + * T1 = CLMUL(F1, RK1) + * T2 = CLMUL(F2, RK2) + * FOLD = XOR(T1, T2, DATA) + * + * @param data_block 16 byte data block + * @param precomp precomputed rk1 constanst + * @param fold running 16 byte folded data + * + * @return New 16 byte folded data + */ +static inline __m128i crcr32_folding_round(const __m128i data_block, + const __m128i precomp, const __m128i fold) +{ + __m128i tmp0= _mm_clmulepi64_si128(fold, precomp, 0x01); + __m128i tmp1= _mm_clmulepi64_si128(fold, precomp, 0x10); + + return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0)); +} + +/** + * @brief Performs reduction from 128 bits to 64 bits + * + * @param data128 128 bits data to be reduced + * @param precomp rk5 and rk6 precomputed constants + * + * @return data reduced to 64 bits + */ +static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i precomp) +{ + __m128i tmp0, tmp1, tmp2; + + /* 64b fold */ + tmp0= _mm_clmulepi64_si128(data128, precomp, 0x00); + tmp1= _mm_srli_si128(data128, 8); + tmp0= _mm_xor_si128(tmp0, tmp1); + + /* 32b fold */ + tmp2= _mm_slli_si128(tmp0, 4); + tmp1= _mm_clmulepi64_si128(tmp2, precomp, 0x10); + + return _mm_xor_si128(tmp1, tmp0); +} + +/** + * @brief Performs Barret's reduction from 64 bits to 32 bits + * + * @param data64 64 bits data to be reduced + * @param precomp rk7 precomputed constant + * + * @return data reduced to 32 bits + */ +static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i precomp) +{ + static const MY_ALIGNED(16) uint32_t mask1[4]= { + 0xffffffff, 0xffffffff, 0x00000000, 0x00000000}; + static const MY_ALIGNED(16) uint32_t mask2[4]= { + 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}; + __m128i tmp0, tmp1, tmp2; + + tmp0= _mm_and_si128(data64, _mm_load_si128((__m128i *) mask2)); + + tmp1= _mm_clmulepi64_si128(tmp0, precomp, 0x00); + tmp1= _mm_xor_si128(tmp1, tmp0); + tmp1= _mm_and_si128(tmp1, _mm_load_si128((__m128i *) mask1)); + + tmp2= _mm_clmulepi64_si128(tmp1, precomp, 0x10); + tmp2= _mm_xor_si128(tmp2, tmp1); + tmp2= _mm_xor_si128(tmp2, tmp0); + + return _mm_extract_epi32(tmp2, 2); +} + +/** + * @brief Calculates reflected 32-bit CRC for given \a data block + * by applying folding and reduction methods. + * + * Algorithm operates on 32 bit CRCs. + * Polynomials and initial values may need to be promoted to + * 32 bits where required. + * + * @param crc initial CRC value (32 bit value) + * @param data pointer to data block + * @param data_len length of \a data block in bytes + * @param params pointer to PCLMULQDQ CRC calculation context + * + * @return CRC for given \a data block (32 bits wide). + */ +static inline uint32_t crcr32_calc_pclmulqdq(const uint8_t *data, uint32_t data_len, + uint32_t crc, + const struct crcr_pclmulqdq_ctx *params) +{ + __m128i temp, fold, k; + uint32_t n; + + DBUG_ASSERT(data != NULL || data_len == 0); + DBUG_ASSERT(params); + + if (unlikely(data_len == 0)) + return crc; + + /** + * Get CRC init value + */ + temp= _mm_insert_epi32(_mm_setzero_si128(), crc, 0); + + /** + * ------------------------------------------------- + * Folding all data into single 16 byte data block + * Assumes: \a fold holds first 16 bytes of data + */ + + if (unlikely(data_len < 32)) + { + if (unlikely(data_len == 16)) + { + /* 16 bytes */ + fold= _mm_loadu_si128((__m128i *) data); + fold= _mm_xor_si128(fold, temp); + goto reduction_128_64; + } + if (unlikely(data_len < 16)) + { + /* 0 to 15 bytes */ + MY_ALIGNED(16) uint8_t buffer[16]; + + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, data, data_len); + + fold= _mm_load_si128((__m128i *) buffer); + fold= _mm_xor_si128(fold, temp); + if ((data_len < 4)) + { + fold= xmm_shift_left(fold, 8 - data_len); + goto barret_reduction; + } + fold= xmm_shift_left(fold, 16 - data_len); + goto reduction_128_64; + } + /* 17 to 31 bytes */ + fold= _mm_loadu_si128((__m128i *) data); + fold= _mm_xor_si128(fold, temp); + n= 16; + k= _mm_load_si128((__m128i *) (¶ms->rk1)); + goto partial_bytes; + } + + /** + * At least 32 bytes in the buffer + */ + + /** + * Apply CRC initial value + */ + fold= _mm_loadu_si128((const __m128i *) data); + fold= _mm_xor_si128(fold, temp); + + /** + * Main folding loop + * - the last 16 bytes is processed separately + */ + k= _mm_load_si128((__m128i *) (¶ms->rk1)); + for (n= 16; (n + 16) <= data_len; n+= 16) + { + temp= _mm_loadu_si128((__m128i *) &data[n]); + fold= crcr32_folding_round(temp, k, fold); + } + +partial_bytes: + if (likely(n < data_len)) + { + static const MY_ALIGNED(16) uint32_t mask3[4]= {0x80808080, 0x80808080, + 0x80808080, 0x80808080}; + static const MY_ALIGNED(16) uint8_t shf_table[32]= { + 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, + 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; + __m128i last16, a, b; + + last16= _mm_loadu_si128((const __m128i *) &data[data_len - 16]); + + temp= _mm_loadu_si128((const __m128i *) &shf_table[data_len & 15]); + a= _mm_shuffle_epi8(fold, temp); + + temp= _mm_xor_si128(temp, _mm_load_si128((const __m128i *) mask3)); + b= _mm_shuffle_epi8(fold, temp); + b= _mm_blendv_epi8(b, last16, temp); + + /* k = rk1 & rk2 */ + temp= _mm_clmulepi64_si128(a, k, 0x01); + fold= _mm_clmulepi64_si128(a, k, 0x10); + + fold= _mm_xor_si128(fold, temp); + fold= _mm_xor_si128(fold, b); + } + + /** + * ------------------------------------------------- + * Reduction 128 -> 32 + * Assumes: \a fold holds 128bit folded data + */ +reduction_128_64: + k= _mm_load_si128((__m128i *) (¶ms->rk5)); + fold= crcr32_reduce_128_to_64(fold, k); + +barret_reduction: + k= _mm_load_si128((__m128i *) (¶ms->rk7)); + n= crcr32_reduce_64_to_32(fold, k); + return n; +} + +static const MY_ALIGNED(16) struct crcr_pclmulqdq_ctx ether_crc32_clmul= { + 0xccaa009e, /**< rk1 */ + 0x1751997d0, /**< rk2 */ + 0xccaa009e, /**< rk5 */ + 0x163cd6124, /**< rk6 */ + 0x1f7011640, /**< rk7 */ + 0x1db710641 /**< rk8 */ +}; + +/** + * @brief Calculates Ethernet CRC32 using PCLMULQDQ method. + * + * @param data pointer to data block to calculate CRC for + * @param data_len size of data block + * + * @return New CRC value + */ +unsigned int crc32_pclmul(unsigned int crc32, const void *buf, size_t len) +{ + return ~crcr32_calc_pclmulqdq(buf, (uint32_t)len, ~crc32, ðer_crc32_clmul); +} diff --git a/mysys/crc32/crc32c.cc b/mysys/crc32/crc32c.cc new file mode 100644 index 00000000..2bec041e --- /dev/null +++ b/mysys/crc32/crc32c.cc @@ -0,0 +1,597 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// A portable implementation of crc32c, optimized to handle +// four bytes at a time. + +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + + +#include +#include +#include +#include +static inline uint32_t DecodeFixed32(const char *ptr) +{ + return uint4korr(ptr); +} + +#include +#ifdef _MSC_VER +#include +#endif + +#ifdef HAVE_SSE42 +# ifdef __GNUC__ +# include +# if __GNUC__ < 5 && !defined __clang__ +/* the headers do not really work in GCC before version 5 */ +# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data) +# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data) +# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data) +# else +# include +# endif +# define USE_SSE42 __attribute__((target("sse4.2"))) +# else +# define USE_SSE42 /* nothing */ +# endif +#endif + + +#ifdef __powerpc64__ +#include "crc32c_ppc.h" + +#if __linux__ +#include + +#ifndef PPC_FEATURE2_VEC_CRYPTO +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#endif /* __linux__ */ + +#endif + +namespace mysys_namespace { +namespace crc32c { + +#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) +#ifdef __powerpc64__ +static int arch_ppc_crc32 = 0; +#endif /* __powerpc64__ */ +#endif + +static const uint32_t table0_[256] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 +}; +static const uint32_t table1_[256] = { + 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, + 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945, + 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, + 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, + 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, + 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4, + 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, + 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, + 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, + 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, + 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, + 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, + 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, + 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, + 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, + 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, + 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, + 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41, + 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, + 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, + 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, + 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, + 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, + 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, + 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, + 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, + 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, + 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, + 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, + 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2, + 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, + 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a, + 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, + 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, + 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, + 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, + 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, + 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, + 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, + 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, + 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, + 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, + 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, + 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306, + 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, + 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, + 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, + 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287, + 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, + 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, + 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, + 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, + 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, + 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, + 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, + 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, + 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, + 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, + 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, + 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502, + 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, + 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, + 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, + 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483 +}; +static const uint32_t table2_[256] = { + 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, + 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469, + 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, + 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, + 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, + 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3, + 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, + 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, + 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, + 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, + 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, + 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, + 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, + 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, + 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, + 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, + 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, + 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0, + 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, + 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75, + 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, + 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, + 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, + 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, + 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, + 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, + 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, + 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, + 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, + 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e, + 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, + 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb, + 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, + 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, + 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, + 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, + 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, + 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, + 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, + 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, + 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, + 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, + 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, + 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb, + 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, + 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, + 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, + 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71, + 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, + 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, + 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, + 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, + 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, + 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, + 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, + 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, + 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, + 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, + 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, + 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622, + 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, + 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d, + 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, + 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8 +}; +static const uint32_t table3_[256] = { + 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, + 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca, + 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, + 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, + 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, + 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7, + 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, + 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, + 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, + 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, + 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, + 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, + 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, + 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, + 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, + 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, + 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, + 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d, + 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, + 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb, + 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, + 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, + 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, + 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, + 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, + 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, + 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, + 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, + 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, + 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b, + 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, + 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d, + 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, + 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, + 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, + 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, + 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, + 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, + 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, + 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, + 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, + 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, + 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, + 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698, + 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, + 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, + 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, + 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, + 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, + 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, + 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, + 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, + 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, + 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, + 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, + 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, + 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, + 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, + 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, + 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f, + 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, + 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4, + 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, + 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842 +}; + +// Used to fetch a naturally-aligned 32-bit word in little endian byte-order +static inline uint32_t LE_LOAD32(const uint8_t *p) { + return DecodeFixed32(reinterpret_cast(p)); +} + +static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) +{ + uint32_t c = static_cast(*l ^ LE_LOAD32(*p)); + *p += 4; + *l = table3_[c & 0xff] ^ + table2_[(c >> 8) & 0xff] ^ + table1_[(c >> 16) & 0xff] ^ + table0_[c >> 24]; + // DO it twice. + c = static_cast(*l ^ LE_LOAD32(*p)); + *p += 4; + *l = table3_[c & 0xff] ^ + table2_[(c >> 8) & 0xff] ^ + table1_[(c >> 16) & 0xff] ^ + table0_[c >> 24]; +} + +#ifdef ALIGN +#undef ALIGN +#endif + +// Align n to (1 << m) byte boundary +#define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1)) + +#define STEP1 do { \ + int c = (l & 0xff) ^ *p++; \ + l = table0_[c] ^ (l >> 8); \ +} while (0) + +static uint32_t crc32c_slow(uint32_t crc, const char* buf, size_t size) +{ + const uint8_t *p = reinterpret_cast(buf); + const uint8_t *e = p + size; + uint64_t l = crc ^ 0xffffffffu; + + // Point x at first 16-byte aligned byte in string. This might be + // just past the end of the string. + const uintptr_t pval = reinterpret_cast(p); + const uint8_t* x = reinterpret_cast(ALIGN(pval, 4)); + if (x <= e) + // Process bytes until finished or p is 16-byte aligned + while (p != x) + STEP1; + // Process bytes 16 at a time + while ((e-p) >= 16) + { + Slow_CRC32(&l, &p); + Slow_CRC32(&l, &p); + } + // Process bytes 8 at a time + while ((e-p) >= 8) + Slow_CRC32(&l, &p); + // Process the last few bytes + while (p != e) + STEP1; + return static_cast(l ^ 0xffffffffu); +} + +#if defined HAVE_POWER8 +#elif defined HAVE_ARMV8_CRC +#elif defined HAVE_SSE42 +constexpr uint32_t cpuid_ecx_SSE42= 1U << 20; +constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U<<1; + +static uint32_t cpuid_ecx() +{ +#ifdef __GNUC__ + uint32_t reax= 0, rebx= 0, recx= 0, redx= 0; + __cpuid(1, reax, rebx, recx, redx); + return recx; +#elif defined _MSC_VER + int regs[4]; + __cpuid(regs, 1); + return regs[2]; +#else +# error "unknown compiler" +#endif +} + +extern "C" int crc32_pclmul_enabled(void) +{ + return !(~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL); +} + +#if SIZEOF_SIZE_T == 8 +extern "C" uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len); + +USE_SSE42 +static inline uint64_t LE_LOAD64(const uint8_t *ptr) +{ + return uint8korr(reinterpret_cast(ptr)); +} +#endif + +USE_SSE42 +static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) +{ +# if (SIZEOF_SIZE_T == 8) + *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); + *p += 8; +# else + *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); + *p += 4; + *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); + *p += 4; +# endif +} + +USE_SSE42 +static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size) +{ + const uint8_t *p = reinterpret_cast(buf); + const uint8_t *e = p + size; + uint64_t l = crc ^ 0xffffffffu; + + // Point x at first 16-byte aligned byte in string. This might be + // just past the end of the string. + const uintptr_t pval = reinterpret_cast(p); + const uint8_t* x = reinterpret_cast(ALIGN(pval, 4)); + if (x <= e) + // Process bytes until finished or p is 16-byte aligned + while (p != x) + STEP1; + // Process bytes 16 at a time + while ((e-p) >= 16) + { + Fast_CRC32(&l, &p); + Fast_CRC32(&l, &p); + } + // Process bytes 8 at a time + while ((e-p) >= 8) + Fast_CRC32(&l, &p); + // Process the last few bytes + while (p != e) + STEP1; + return static_cast(l ^ 0xffffffffu); +} +#endif + +typedef uint32_t (*Function)(uint32_t, const char*, size_t); + +#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) +uint32_t ExtendPPCImpl(uint32_t crc, const char *buf, size_t size) { + return crc32c_ppc(crc, (const unsigned char *)buf, size); +} + +#if __linux__ +static int arch_ppc_probe(void) { + arch_ppc_crc32 = 0; + +#if defined(__powerpc64__) + if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1; +#endif /* __powerpc64__ */ + + return arch_ppc_crc32; +} +#elif __FreeBSD_version >= 1200000 +#include +#include +#include +static int arch_ppc_probe(void) { + unsigned long cpufeatures; + arch_ppc_crc32 = 0; + +#if defined(__powerpc64__) + elf_aux_info(AT_HWCAP2, &cpufeatures, sizeof(cpufeatures)); + if (cpufeatures & PPC_FEATURE2_HAS_VEC_CRYPTO) arch_ppc_crc32 = 1; +#endif /* __powerpc64__ */ + + return arch_ppc_crc32; +} +#elif defined(_AIX) || defined(__OpenBSD__) +static int arch_ppc_probe(void) { + arch_ppc_crc32 = 0; + +#if defined(__powerpc64__) + // AIX 7.1+/OpenBSD has vector crypto features on all POWER 8+ + arch_ppc_crc32 = 1; +#endif /* __powerpc64__ */ + + return arch_ppc_crc32; +} +#endif // __linux__ +#endif + +#if defined(HAVE_ARMV8_CRC) +extern "C" const char *crc32c_aarch64_available(void); +extern "C" uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len); + +static uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) { + return crc32c_aarch64(crc, (const unsigned char *)buf, (size_t) size); +} +#endif + +static inline Function Choose_Extend() +{ +#if defined HAVE_POWER8 && defined HAS_ALTIVEC + if (arch_ppc_probe()) + return ExtendPPCImpl; +#elif defined(HAVE_ARMV8_CRC) + if (crc32c_aarch64_available()) + return ExtendARMImpl; +#elif HAVE_SSE42 +# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8 + switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) { + case cpuid_ecx_SSE42_AND_PCLMUL: + return crc32c_3way; + case cpuid_ecx_SSE42: + return crc32c_sse42; + } +# else + if (cpuid_ecx() & cpuid_ecx_SSE42) + return crc32c_sse42; +# endif +#endif + return crc32c_slow; +} + +static const Function ChosenExtend= Choose_Extend(); + +static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size) +{ + return ChosenExtend(crc, buf, size); +} + +extern "C" const char *my_crc32c_implementation() +{ +#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC) + if (ChosenExtend == ExtendPPCImpl) + return "Using POWER8 crc32 instructions"; +#elif defined(HAVE_ARMV8_CRC) + if (const char *ret= crc32c_aarch64_available()) + return ret; +#elif HAVE_SSE42 +# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8 + if (ChosenExtend == crc32c_3way) + return "Using crc32 + pclmulqdq instructions"; +# endif + if (ChosenExtend == crc32c_sse42) + return "Using SSE4.2 crc32 instructions"; +#endif + return "Using generic crc32 instructions"; +} +} // namespace crc32c +} // namespace mysys_namespace + +extern "C" unsigned my_crc32c(unsigned int crc, const char *buf, size_t size) +{ + return mysys_namespace::crc32c::Extend(crc,buf, size); +} diff --git a/mysys/crc32/crc32c_amd64.cc b/mysys/crc32/crc32c_amd64.cc new file mode 100644 index 00000000..22c492b4 --- /dev/null +++ b/mysys/crc32/crc32c_amd64.cc @@ -0,0 +1,711 @@ +/* Copyright (c) 2020, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + * Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the author be held liable for any damages + * arising from the use of this software. + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + * Ferry Toth + * ftoth@exalondelft.nl + * + * https://github.com/htot/crc32c + * + * Modified by Facebook + * + * Original intel whitepaper: + * "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction" + * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf + * + * This version is from the folly library, created by Dave Watson + * +*/ + +#include +#include +#include + + +#define CRCtriplet(crc, buf, offset) \ + crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \ + crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \ + crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset)); + +#define CRCduplet(crc, buf, offset) \ + crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \ + crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); + +#define CRCsinglet(crc, buf, offset) \ + crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset)); + + +// Numbers taken directly from intel whitepaper. +// clang-format off +static const uint64_t clmul_constants alignas(16) [] = { + 0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6, + 0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e, + 0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da, + 0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8, + 0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296, + 0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2, + 0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6, + 0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092, + 0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0, + 0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456, + 0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e, + 0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a, + 0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574, + 0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832, + 0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124, + 0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86, + 0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e, + 0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a, + 0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46, + 0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a, + 0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a, + 0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4, + 0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56, + 0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2, + 0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c, + 0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac, + 0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64, + 0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e, + 0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c, + 0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28, + 0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26, + 0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c, + 0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c, + 0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c, + 0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4, + 0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844, + 0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c, + 0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730, + 0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c, + 0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2, + 0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2, + 0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e, + 0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a, + 0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a, + 0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a, + 0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768, + 0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4, + 0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c, + 0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba, + 0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312, + 0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544, + 0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a, + 0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e, + 0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a, + 0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c, + 0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a, + 0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6, + 0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca, + 0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888, + 0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e, + 0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528, + 0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a, + 0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e, + 0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa, +}; + +// Compute the crc32c value for buffer smaller than 8 +static inline void align_to_8( + size_t len, + uint64_t& crc0, // crc so far, updated on return + const unsigned char*& next) { // next data pointer, updated on return + uint32_t crc32bit = static_cast(crc0); + if (len & 0x04) { + crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next); + next += sizeof(uint32_t); + } + if (len & 0x02) { + crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next); + next += sizeof(uint16_t); + } + if (len & 0x01) { + crc32bit = _mm_crc32_u8(crc32bit, *(next)); + next++; + } + crc0 = crc32bit; +} + +// +// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well +// chosen constant and xor's these with the remaining CRC. +// +static inline uint64_t CombineCRC( + size_t block_size, + uint64_t crc0, + uint64_t crc1, + uint64_t crc2, + const uint64_t* next2) { + const auto multiplier = + *(reinterpret_cast(clmul_constants) + block_size - 1); + const auto crc0_xmm = _mm_set_epi64x(0, crc0); + const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00); + const auto crc1_xmm = _mm_set_epi64x(0, crc1); + const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10); + const auto res = _mm_xor_si128(res0, res1); + crc0 = _mm_cvtsi128_si64(res); + crc0 = crc0 ^ *((uint64_t*)next2 - 1); + crc2 = _mm_crc32_u64(crc2, crc0); + return crc2; +} + +// Compute CRC-32C using the Intel hardware instruction. +extern "C" +uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len) +{ + const unsigned char* next = (const unsigned char*)buf; + uint64_t count; + uint64_t crc0, crc1, crc2; + crc0 = crc ^ 0xffffffffu; + + + if (len >= 8) { + // if len > 216 then align and use triplets + if (len > 216) { + { + // Work on the bytes (< 8) before the first 8-byte alignment addr starts + auto align_bytes = (8 - (uintptr_t)next) & 7; + len -= align_bytes; + align_to_8(align_bytes, crc0, next); + } + + // Now work on the remaining blocks + count = len / 24; // number of triplets + len %= 24; // bytes remaining + uint64_t n = count >> 7; // #blocks = first block + full blocks + uint64_t block_size = count & 127; + if (block_size == 0) { + block_size = 128; + } else { + n++; + } + // points to the first byte of the next block + const uint64_t* next0 = (uint64_t*)next + block_size; + const uint64_t* next1 = next0 + block_size; + const uint64_t* next2 = next1 + block_size; + + crc1 = crc2 = 0; + // Use Duff's device, a for() loop inside a switch() + // statement. This needs to execute at least once, round len + // down to nearest triplet multiple + switch (block_size) { + case 128: + do { + // jumps here for a full block of len 128 + CRCtriplet(crc, next, -128); + /* fallthrough */ + case 127: + // jumps here or below for the first block smaller + CRCtriplet(crc, next, -127); + /* fallthrough */ + case 126: + CRCtriplet(crc, next, -126); // than 128 + /* fallthrough */ + case 125: + CRCtriplet(crc, next, -125); + /* fallthrough */ + case 124: + CRCtriplet(crc, next, -124); + /* fallthrough */ + case 123: + CRCtriplet(crc, next, -123); + /* fallthrough */ + case 122: + CRCtriplet(crc, next, -122); + /* fallthrough */ + case 121: + CRCtriplet(crc, next, -121); + /* fallthrough */ + case 120: + CRCtriplet(crc, next, -120); + /* fallthrough */ + case 119: + CRCtriplet(crc, next, -119); + /* fallthrough */ + case 118: + CRCtriplet(crc, next, -118); + /* fallthrough */ + case 117: + CRCtriplet(crc, next, -117); + /* fallthrough */ + case 116: + CRCtriplet(crc, next, -116); + /* fallthrough */ + case 115: + CRCtriplet(crc, next, -115); + /* fallthrough */ + case 114: + CRCtriplet(crc, next, -114); + /* fallthrough */ + case 113: + CRCtriplet(crc, next, -113); + /* fallthrough */ + case 112: + CRCtriplet(crc, next, -112); + /* fallthrough */ + case 111: + CRCtriplet(crc, next, -111); + /* fallthrough */ + case 110: + CRCtriplet(crc, next, -110); + /* fallthrough */ + case 109: + CRCtriplet(crc, next, -109); + /* fallthrough */ + case 108: + CRCtriplet(crc, next, -108); + /* fallthrough */ + case 107: + CRCtriplet(crc, next, -107); + /* fallthrough */ + case 106: + CRCtriplet(crc, next, -106); + /* fallthrough */ + case 105: + CRCtriplet(crc, next, -105); + /* fallthrough */ + case 104: + CRCtriplet(crc, next, -104); + /* fallthrough */ + case 103: + CRCtriplet(crc, next, -103); + /* fallthrough */ + case 102: + CRCtriplet(crc, next, -102); + /* fallthrough */ + case 101: + CRCtriplet(crc, next, -101); + /* fallthrough */ + case 100: + CRCtriplet(crc, next, -100); + /* fallthrough */ + case 99: + CRCtriplet(crc, next, -99); + /* fallthrough */ + case 98: + CRCtriplet(crc, next, -98); + /* fallthrough */ + case 97: + CRCtriplet(crc, next, -97); + /* fallthrough */ + case 96: + CRCtriplet(crc, next, -96); + /* fallthrough */ + case 95: + CRCtriplet(crc, next, -95); + /* fallthrough */ + case 94: + CRCtriplet(crc, next, -94); + /* fallthrough */ + case 93: + CRCtriplet(crc, next, -93); + /* fallthrough */ + case 92: + CRCtriplet(crc, next, -92); + /* fallthrough */ + case 91: + CRCtriplet(crc, next, -91); + /* fallthrough */ + case 90: + CRCtriplet(crc, next, -90); + /* fallthrough */ + case 89: + CRCtriplet(crc, next, -89); + /* fallthrough */ + case 88: + CRCtriplet(crc, next, -88); + /* fallthrough */ + case 87: + CRCtriplet(crc, next, -87); + /* fallthrough */ + case 86: + CRCtriplet(crc, next, -86); + /* fallthrough */ + case 85: + CRCtriplet(crc, next, -85); + /* fallthrough */ + case 84: + CRCtriplet(crc, next, -84); + /* fallthrough */ + case 83: + CRCtriplet(crc, next, -83); + /* fallthrough */ + case 82: + CRCtriplet(crc, next, -82); + /* fallthrough */ + case 81: + CRCtriplet(crc, next, -81); + /* fallthrough */ + case 80: + CRCtriplet(crc, next, -80); + /* fallthrough */ + case 79: + CRCtriplet(crc, next, -79); + /* fallthrough */ + case 78: + CRCtriplet(crc, next, -78); + /* fallthrough */ + case 77: + CRCtriplet(crc, next, -77); + /* fallthrough */ + case 76: + CRCtriplet(crc, next, -76); + /* fallthrough */ + case 75: + CRCtriplet(crc, next, -75); + /* fallthrough */ + case 74: + CRCtriplet(crc, next, -74); + /* fallthrough */ + case 73: + CRCtriplet(crc, next, -73); + /* fallthrough */ + case 72: + CRCtriplet(crc, next, -72); + /* fallthrough */ + case 71: + CRCtriplet(crc, next, -71); + /* fallthrough */ + case 70: + CRCtriplet(crc, next, -70); + /* fallthrough */ + case 69: + CRCtriplet(crc, next, -69); + /* fallthrough */ + case 68: + CRCtriplet(crc, next, -68); + /* fallthrough */ + case 67: + CRCtriplet(crc, next, -67); + /* fallthrough */ + case 66: + CRCtriplet(crc, next, -66); + /* fallthrough */ + case 65: + CRCtriplet(crc, next, -65); + /* fallthrough */ + case 64: + CRCtriplet(crc, next, -64); + /* fallthrough */ + case 63: + CRCtriplet(crc, next, -63); + /* fallthrough */ + case 62: + CRCtriplet(crc, next, -62); + /* fallthrough */ + case 61: + CRCtriplet(crc, next, -61); + /* fallthrough */ + case 60: + CRCtriplet(crc, next, -60); + /* fallthrough */ + case 59: + CRCtriplet(crc, next, -59); + /* fallthrough */ + case 58: + CRCtriplet(crc, next, -58); + /* fallthrough */ + case 57: + CRCtriplet(crc, next, -57); + /* fallthrough */ + case 56: + CRCtriplet(crc, next, -56); + /* fallthrough */ + case 55: + CRCtriplet(crc, next, -55); + /* fallthrough */ + case 54: + CRCtriplet(crc, next, -54); + /* fallthrough */ + case 53: + CRCtriplet(crc, next, -53); + /* fallthrough */ + case 52: + CRCtriplet(crc, next, -52); + /* fallthrough */ + case 51: + CRCtriplet(crc, next, -51); + /* fallthrough */ + case 50: + CRCtriplet(crc, next, -50); + /* fallthrough */ + case 49: + CRCtriplet(crc, next, -49); + /* fallthrough */ + case 48: + CRCtriplet(crc, next, -48); + /* fallthrough */ + case 47: + CRCtriplet(crc, next, -47); + /* fallthrough */ + case 46: + CRCtriplet(crc, next, -46); + /* fallthrough */ + case 45: + CRCtriplet(crc, next, -45); + /* fallthrough */ + case 44: + CRCtriplet(crc, next, -44); + /* fallthrough */ + case 43: + CRCtriplet(crc, next, -43); + /* fallthrough */ + case 42: + CRCtriplet(crc, next, -42); + /* fallthrough */ + case 41: + CRCtriplet(crc, next, -41); + /* fallthrough */ + case 40: + CRCtriplet(crc, next, -40); + /* fallthrough */ + case 39: + CRCtriplet(crc, next, -39); + /* fallthrough */ + case 38: + CRCtriplet(crc, next, -38); + /* fallthrough */ + case 37: + CRCtriplet(crc, next, -37); + /* fallthrough */ + case 36: + CRCtriplet(crc, next, -36); + /* fallthrough */ + case 35: + CRCtriplet(crc, next, -35); + /* fallthrough */ + case 34: + CRCtriplet(crc, next, -34); + /* fallthrough */ + case 33: + CRCtriplet(crc, next, -33); + /* fallthrough */ + case 32: + CRCtriplet(crc, next, -32); + /* fallthrough */ + case 31: + CRCtriplet(crc, next, -31); + /* fallthrough */ + case 30: + CRCtriplet(crc, next, -30); + /* fallthrough */ + case 29: + CRCtriplet(crc, next, -29); + /* fallthrough */ + case 28: + CRCtriplet(crc, next, -28); + /* fallthrough */ + case 27: + CRCtriplet(crc, next, -27); + /* fallthrough */ + case 26: + CRCtriplet(crc, next, -26); + /* fallthrough */ + case 25: + CRCtriplet(crc, next, -25); + /* fallthrough */ + case 24: + CRCtriplet(crc, next, -24); + /* fallthrough */ + case 23: + CRCtriplet(crc, next, -23); + /* fallthrough */ + case 22: + CRCtriplet(crc, next, -22); + /* fallthrough */ + case 21: + CRCtriplet(crc, next, -21); + /* fallthrough */ + case 20: + CRCtriplet(crc, next, -20); + /* fallthrough */ + case 19: + CRCtriplet(crc, next, -19); + /* fallthrough */ + case 18: + CRCtriplet(crc, next, -18); + /* fallthrough */ + case 17: + CRCtriplet(crc, next, -17); + /* fallthrough */ + case 16: + CRCtriplet(crc, next, -16); + /* fallthrough */ + case 15: + CRCtriplet(crc, next, -15); + /* fallthrough */ + case 14: + CRCtriplet(crc, next, -14); + /* fallthrough */ + case 13: + CRCtriplet(crc, next, -13); + /* fallthrough */ + case 12: + CRCtriplet(crc, next, -12); + /* fallthrough */ + case 11: + CRCtriplet(crc, next, -11); + /* fallthrough */ + case 10: + CRCtriplet(crc, next, -10); + /* fallthrough */ + case 9: + CRCtriplet(crc, next, -9); + /* fallthrough */ + case 8: + CRCtriplet(crc, next, -8); + /* fallthrough */ + case 7: + CRCtriplet(crc, next, -7); + /* fallthrough */ + case 6: + CRCtriplet(crc, next, -6); + /* fallthrough */ + case 5: + CRCtriplet(crc, next, -5); + /* fallthrough */ + case 4: + CRCtriplet(crc, next, -4); + /* fallthrough */ + case 3: + CRCtriplet(crc, next, -3); + /* fallthrough */ + case 2: + CRCtriplet(crc, next, -2); + /* fallthrough */ + case 1: + CRCduplet(crc, next, -1); // the final triplet is actually only 2 + //{ CombineCRC(); } + crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2); + if (--n > 0) { + crc1 = crc2 = 0; + block_size = 128; + // points to the first byte of the next block + next0 = next2 + 128; + next1 = next0 + 128; // from here on all blocks are 128 long + next2 = next1 + 128; + } + /* fallthrough */ + case 0:; + } while (n > 0); + } + next = (const unsigned char*)next2; + } + uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets + len = len & 7; + next += (count2 * 8); + switch (count2) { + case 27: + CRCsinglet(crc0, next, -27 * 8); + /* fallthrough */ + case 26: + CRCsinglet(crc0, next, -26 * 8); + /* fallthrough */ + case 25: + CRCsinglet(crc0, next, -25 * 8); + /* fallthrough */ + case 24: + CRCsinglet(crc0, next, -24 * 8); + /* fallthrough */ + case 23: + CRCsinglet(crc0, next, -23 * 8); + /* fallthrough */ + case 22: + CRCsinglet(crc0, next, -22 * 8); + /* fallthrough */ + case 21: + CRCsinglet(crc0, next, -21 * 8); + /* fallthrough */ + case 20: + CRCsinglet(crc0, next, -20 * 8); + /* fallthrough */ + case 19: + CRCsinglet(crc0, next, -19 * 8); + /* fallthrough */ + case 18: + CRCsinglet(crc0, next, -18 * 8); + /* fallthrough */ + case 17: + CRCsinglet(crc0, next, -17 * 8); + /* fallthrough */ + case 16: + CRCsinglet(crc0, next, -16 * 8); + /* fallthrough */ + case 15: + CRCsinglet(crc0, next, -15 * 8); + /* fallthrough */ + case 14: + CRCsinglet(crc0, next, -14 * 8); + /* fallthrough */ + case 13: + CRCsinglet(crc0, next, -13 * 8); + /* fallthrough */ + case 12: + CRCsinglet(crc0, next, -12 * 8); + /* fallthrough */ + case 11: + CRCsinglet(crc0, next, -11 * 8); + /* fallthrough */ + case 10: + CRCsinglet(crc0, next, -10 * 8); + /* fallthrough */ + case 9: + CRCsinglet(crc0, next, -9 * 8); + /* fallthrough */ + case 8: + CRCsinglet(crc0, next, -8 * 8); + /* fallthrough */ + case 7: + CRCsinglet(crc0, next, -7 * 8); + /* fallthrough */ + case 6: + CRCsinglet(crc0, next, -6 * 8); + /* fallthrough */ + case 5: + CRCsinglet(crc0, next, -5 * 8); + /* fallthrough */ + case 4: + CRCsinglet(crc0, next, -4 * 8); + /* fallthrough */ + case 3: + CRCsinglet(crc0, next, -3 * 8); + /* fallthrough */ + case 2: + CRCsinglet(crc0, next, -2 * 8); + /* fallthrough */ + case 1: + CRCsinglet(crc0, next, -1 * 8); + /* fallthrough */ + case 0:; + } + } + { + align_to_8(len, crc0, next); + return (uint32_t)crc0 ^ 0xffffffffu; + } +} diff --git a/mysys/crc32/crc32c_ppc.c b/mysys/crc32/crc32c_ppc.c new file mode 100644 index 00000000..72f24283 --- /dev/null +++ b/mysys/crc32/crc32c_ppc.c @@ -0,0 +1,5 @@ +#define CRC32_FUNCTION crc32c_ppc +#define CRC_TABLE +#define POWER8_INTRINSICS +#include "pcc_crc32c_constants.h" +#include "crc_ppc64.h" diff --git a/mysys/crc32/crc32c_ppc.h b/mysys/crc32/crc32c_ppc.h new file mode 100644 index 00000000..c359061c --- /dev/null +++ b/mysys/crc32/crc32c_ppc.h @@ -0,0 +1,19 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// Copyright (c) 2017 International Business Machines Corp. +// All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +extern uint32_t crc32c_ppc(uint32_t crc, unsigned char const *buffer, + unsigned len); + +#ifdef __cplusplus +} +#endif diff --git a/mysys/crc32/crc_ppc64.h b/mysys/crc32/crc_ppc64.h new file mode 100644 index 00000000..eb9379ab --- /dev/null +++ b/mysys/crc32/crc_ppc64.h @@ -0,0 +1,664 @@ +/* + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. + * + * The first step is to reduce it to 1024 bits. We do this in 8 parallel + * chunks in order to mask the latency of the vpmsum instructions. If we + * have more than 32 kB of data to checksum we repeat this step multiple + * times, passing in the previous 1024 bits. + * + * The next step is to reduce the 1024 bits to 64 bits. This step adds + * 32 bits of 0s to the end - this matches what a CRC does. We just + * calculate constants that land the data in this 32 bits. + * + * We then use fixed point Barrett reduction to compute a mod n over GF(2) + * for n = CRC using POWER8 instructions. We use x = 32. + * + * http://en.wikipedia.org/wiki/Barrett_reduction + * + * This code uses gcc vector builtins instead using assembly directly. + * + * Copyright (C) 2017 Rogerio Alves , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of either: + * + * a) the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) + * any later version, or + * b) the Apache License, Version 2.0 + */ + +#include + + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#ifdef REFLECT +static unsigned int crc32_align(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + while (len--) + crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); + return crc; +} +#else +static unsigned int crc32_align(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + while (len--) + crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8); + return crc; +} +#endif + +static unsigned int __attribute__ ((aligned (32))) +__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); + + +unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + unsigned int prealign; + unsigned int tail; + +#ifdef CRC_XOR + crc ^= 0xffffffff; +#endif + + if (len < VMX_ALIGN + VMX_ALIGN_MASK) { + crc = crc32_align(crc, p, len); + goto out; + } + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc32_align(crc, p, prealign); + len -= prealign; + p += prealign; + } + + crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc32_align(crc, p, tail); + } + +out: +#ifdef CRC_XOR + crc ^= 0xffffffff; +#endif + + return crc; +} + +#if defined (__clang__) +#include "clang_workaround.h" +#else +#define __builtin_pack_vector(a, b) __builtin_pack_vector_int128 ((a), (b)) +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 0) +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 1) +#endif + +/* When we have a load-store in a single-dispatch group and address overlap + * such that foward is not allowed (load-hit-store) the group must be flushed. + * A group ending NOP prevents the flush. + */ +#define GROUP_ENDING_NOP asm("ori 2,2,0" ::: "memory") + +#if defined(__BIG_ENDIAN__) && defined (REFLECT) +#define BYTESWAP_DATA +#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) +#define BYTESWAP_DATA +#endif + +#ifdef BYTESWAP_DATA +#define VEC_PERM(vr, va, vb, vc) vr = vec_perm(va, vb,\ + (__vector unsigned char) vc) +#if defined(__LITTLE_ENDIAN__) +/* Byte reverse permute constant LE. */ +static const __vector unsigned long long vperm_const + __attribute__ ((aligned(16))) = { 0x08090A0B0C0D0E0FUL, + 0x0001020304050607UL }; +#else +static const __vector unsigned long long vperm_const + __attribute__ ((aligned(16))) = { 0x0F0E0D0C0B0A0908UL, + 0X0706050403020100UL }; +#endif +#else +#define VEC_PERM(vr, va, vb, vc) +#endif + +static unsigned int __attribute__ ((aligned (32))) +__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { + + const __vector unsigned long long vzero = {0,0}; + const __vector unsigned long long vones = {0xffffffffffffffffUL, + 0xffffffffffffffffUL}; + +#ifdef REFLECT + __vector unsigned char vsht_splat; + const __vector unsigned long long vmask_32bit = + (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, + (__vector unsigned char)vones, 4); +#endif + + const __vector unsigned long long vmask_64bit = + (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, + (__vector unsigned char)vones, 8); + + __vector unsigned long long vcrc; + + __vector unsigned long long vconst1, vconst2; + + /* vdata0-vdata7 will contain our data (p). */ + __vector unsigned long long vdata0, vdata1, vdata2, vdata3, vdata4, + vdata5, vdata6, vdata7; + + /* v0-v7 will contain our checksums */ + __vector unsigned long long v0 = {0,0}; + __vector unsigned long long v1 = {0,0}; + __vector unsigned long long v2 = {0,0}; + __vector unsigned long long v3 = {0,0}; + __vector unsigned long long v4 = {0,0}; + __vector unsigned long long v5 = {0,0}; + __vector unsigned long long v6 = {0,0}; + __vector unsigned long long v7 = {0,0}; + + + /* Vector auxiliary variables. */ + __vector unsigned long long va0, va1, va2, va3, va4, va5, va6, va7; + + unsigned int result = 0; + unsigned int offset; /* Constant table offset. */ + + unsigned long i; /* Counter. */ + unsigned long chunks; + + unsigned long block_size; + int next_block = 0; + + /* Align by 128 bits. The last 128 bit block will be processed at end. */ + unsigned long length = len & 0xFFFFFFFFFFFFFF80UL; + +#ifdef REFLECT + vcrc = (__vector unsigned long long)__builtin_pack_vector(0UL, crc); +#else + vcrc = (__vector unsigned long long)__builtin_pack_vector(crc, 0UL); + + /* Shift into top 32 bits */ + vcrc = (__vector unsigned long long)vec_sld((__vector unsigned char)vcrc, + (__vector unsigned char)vzero, 4); +#endif + + /* Short version. */ + if (len < 256) { + /* Calculate where in the constant table we need to start. */ + offset = 256 - len; + + vconst1 = vec_ld(offset, vcrc_short_const); + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vconst1, vperm_const); + + /* xor initial value*/ + vdata0 = vec_xor(vdata0, vcrc); + + vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw + ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); + v0 = vec_xor(v0, vdata0); + + for (i = 16; i < len; i += 16) { + vconst1 = vec_ld(offset + i, vcrc_short_const); + vdata0 = vec_ld(i, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vconst1, vperm_const); + vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw + ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); + v0 = vec_xor(v0, vdata0); + } + } else { + + /* Load initial values. */ + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + vdata1 = vec_ld(16, (__vector unsigned long long*) p); + + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + VEC_PERM(vdata1, vdata1, vdata1, vperm_const); + + vdata2 = vec_ld(32, (__vector unsigned long long*) p); + vdata3 = vec_ld(48, (__vector unsigned long long*) p); + + VEC_PERM(vdata2, vdata2, vdata2, vperm_const); + VEC_PERM(vdata3, vdata3, vdata3, vperm_const); + + vdata4 = vec_ld(64, (__vector unsigned long long*) p); + vdata5 = vec_ld(80, (__vector unsigned long long*) p); + + VEC_PERM(vdata4, vdata4, vdata4, vperm_const); + VEC_PERM(vdata5, vdata5, vdata5, vperm_const); + + vdata6 = vec_ld(96, (__vector unsigned long long*) p); + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + + VEC_PERM(vdata6, vdata6, vdata6, vperm_const); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); + + /* xor in initial value */ + vdata0 = vec_xor(vdata0, vcrc); + + p = (char *)p + 128; + + do { + /* Checksum in blocks of MAX_SIZE. */ + block_size = length; + if (block_size > MAX_SIZE) { + block_size = MAX_SIZE; + } + + length = length - block_size; + + /* + * Work out the offset into the constants table to start at. Each + * constant is 16 bytes, and it is used against 128 bytes of input + * data - 128 / 16 = 8 + */ + offset = (MAX_SIZE/8) - (block_size/8); + /* We reduce our final 128 bytes in a separate step */ + chunks = (block_size/128)-1; + + vconst1 = vec_ld(offset, vcrc_const); + + va0 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata0, + (__vector unsigned long long)vconst1); + va1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata1, + (__vector unsigned long long)vconst1); + va2 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata2, + (__vector unsigned long long)vconst1); + va3 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata3, + (__vector unsigned long long)vconst1); + va4 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata4, + (__vector unsigned long long)vconst1); + va5 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata5, + (__vector unsigned long long)vconst1); + va6 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata6, + (__vector unsigned long long)vconst1); + va7 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata7, + (__vector unsigned long long)vconst1); + + if (chunks > 1) { + offset += 16; + vconst2 = vec_ld(offset, vcrc_const); + GROUP_ENDING_NOP; + + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + + vdata1 = vec_ld(16, (__vector unsigned long long*) p); + VEC_PERM(vdata1, vdata1, vdata1, vperm_const); + + vdata2 = vec_ld(32, (__vector unsigned long long*) p); + VEC_PERM(vdata2, vdata2, vdata2, vperm_const); + + vdata3 = vec_ld(48, (__vector unsigned long long*) p); + VEC_PERM(vdata3, vdata3, vdata3, vperm_const); + + vdata4 = vec_ld(64, (__vector unsigned long long*) p); + VEC_PERM(vdata4, vdata4, vdata4, vperm_const); + + vdata5 = vec_ld(80, (__vector unsigned long long*) p); + VEC_PERM(vdata5, vdata5, vdata5, vperm_const); + + vdata6 = vec_ld(96, (__vector unsigned long long*) p); + VEC_PERM(vdata6, vdata6, vdata6, vperm_const); + + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); + + p = (char *)p + 128; + + /* + * main loop. We modulo schedule it such that it takes three + * iterations to complete - first iteration load, second + * iteration vpmsum, third iteration xor. + */ + for (i = 0; i < chunks-2; i++) { + vconst1 = vec_ld(offset, vcrc_const); + offset += 16; + GROUP_ENDING_NOP; + + v0 = vec_xor(v0, va0); + va0 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata0, (__vector unsigned long long)vconst2); + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + GROUP_ENDING_NOP; + + v1 = vec_xor(v1, va1); + va1 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata1, (__vector unsigned long long)vconst2); + vdata1 = vec_ld(16, (__vector unsigned long long*) p); + VEC_PERM(vdata1, vdata1, vdata1, vperm_const); + GROUP_ENDING_NOP; + + v2 = vec_xor(v2, va2); + va2 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata2, (__vector unsigned long long)vconst2); + vdata2 = vec_ld(32, (__vector unsigned long long*) p); + VEC_PERM(vdata2, vdata2, vdata2, vperm_const); + GROUP_ENDING_NOP; + + v3 = vec_xor(v3, va3); + va3 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata3, (__vector unsigned long long)vconst2); + vdata3 = vec_ld(48, (__vector unsigned long long*) p); + VEC_PERM(vdata3, vdata3, vdata3, vperm_const); + + vconst2 = vec_ld(offset, vcrc_const); + GROUP_ENDING_NOP; + + v4 = vec_xor(v4, va4); + va4 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata4, (__vector unsigned long long)vconst1); + vdata4 = vec_ld(64, (__vector unsigned long long*) p); + VEC_PERM(vdata4, vdata4, vdata4, vperm_const); + GROUP_ENDING_NOP; + + v5 = vec_xor(v5, va5); + va5 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata5, (__vector unsigned long long)vconst1); + vdata5 = vec_ld(80, (__vector unsigned long long*) p); + VEC_PERM(vdata5, vdata5, vdata5, vperm_const); + GROUP_ENDING_NOP; + + v6 = vec_xor(v6, va6); + va6 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata6, (__vector unsigned long long)vconst1); + vdata6 = vec_ld(96, (__vector unsigned long long*) p); + VEC_PERM(vdata6, vdata6, vdata6, vperm_const); + GROUP_ENDING_NOP; + + v7 = vec_xor(v7, va7); + va7 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata7, (__vector unsigned long long)vconst1); + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); + + p = (char *)p + 128; + } + + /* First cool down*/ + vconst1 = vec_ld(offset, vcrc_const); + offset += 16; + + v0 = vec_xor(v0, va0); + va0 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata0, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v1 = vec_xor(v1, va1); + va1 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata1, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v2 = vec_xor(v2, va2); + va2 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata2, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v3 = vec_xor(v3, va3); + va3 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata3, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v4 = vec_xor(v4, va4); + va4 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata4, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v5 = vec_xor(v5, va5); + va5 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata5, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v6 = vec_xor(v6, va6); + va6 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata6, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v7 = vec_xor(v7, va7); + va7 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata7, (__vector unsigned long long)vconst1); + }/* else */ + + /* Second cool down. */ + v0 = vec_xor(v0, va0); + v1 = vec_xor(v1, va1); + v2 = vec_xor(v2, va2); + v3 = vec_xor(v3, va3); + v4 = vec_xor(v4, va4); + v5 = vec_xor(v5, va5); + v6 = vec_xor(v6, va6); + v7 = vec_xor(v7, va7); + +#ifdef REFLECT + /* + * vpmsumd produces a 96 bit result in the least significant bits + * of the register. Since we are bit reflected we have to shift it + * left 32 bits so it occupies the least significant bits in the + * bit reflected domain. + */ + v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, + (__vector unsigned char)vzero, 4); + v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v1, + (__vector unsigned char)vzero, 4); + v2 = (__vector unsigned long long)vec_sld((__vector unsigned char)v2, + (__vector unsigned char)vzero, 4); + v3 = (__vector unsigned long long)vec_sld((__vector unsigned char)v3, + (__vector unsigned char)vzero, 4); + v4 = (__vector unsigned long long)vec_sld((__vector unsigned char)v4, + (__vector unsigned char)vzero, 4); + v5 = (__vector unsigned long long)vec_sld((__vector unsigned char)v5, + (__vector unsigned char)vzero, 4); + v6 = (__vector unsigned long long)vec_sld((__vector unsigned char)v6, + (__vector unsigned char)vzero, 4); + v7 = (__vector unsigned long long)vec_sld((__vector unsigned char)v7, + (__vector unsigned char)vzero, 4); +#endif + + /* xor with the last 1024 bits. */ + va0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(va0, va0, va0, vperm_const); + + va1 = vec_ld(16, (__vector unsigned long long*) p); + VEC_PERM(va1, va1, va1, vperm_const); + + va2 = vec_ld(32, (__vector unsigned long long*) p); + VEC_PERM(va2, va2, va2, vperm_const); + + va3 = vec_ld(48, (__vector unsigned long long*) p); + VEC_PERM(va3, va3, va3, vperm_const); + + va4 = vec_ld(64, (__vector unsigned long long*) p); + VEC_PERM(va4, va4, va4, vperm_const); + + va5 = vec_ld(80, (__vector unsigned long long*) p); + VEC_PERM(va5, va5, va5, vperm_const); + + va6 = vec_ld(96, (__vector unsigned long long*) p); + VEC_PERM(va6, va6, va6, vperm_const); + + va7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(va7, va7, va7, vperm_const); + + p = (char *)p + 128; + + vdata0 = vec_xor(v0, va0); + vdata1 = vec_xor(v1, va1); + vdata2 = vec_xor(v2, va2); + vdata3 = vec_xor(v3, va3); + vdata4 = vec_xor(v4, va4); + vdata5 = vec_xor(v5, va5); + vdata6 = vec_xor(v6, va6); + vdata7 = vec_xor(v7, va7); + + /* Check if we have more blocks to process */ + next_block = 0; + if (length != 0) { + next_block = 1; + + /* zero v0-v7 */ + v0 = vec_xor(v0, v0); + v1 = vec_xor(v1, v1); + v2 = vec_xor(v2, v2); + v3 = vec_xor(v3, v3); + v4 = vec_xor(v4, v4); + v5 = vec_xor(v5, v5); + v6 = vec_xor(v6, v6); + v7 = vec_xor(v7, v7); + } + length = length + 128; + + } while (next_block); + + /* Calculate how many bytes we have left. */ + length = (len & 127); + + /* Calculate where in (short) constant table we need to start. */ + offset = 128 - length; + + v0 = vec_ld(offset, vcrc_short_const); + v1 = vec_ld(offset + 16, vcrc_short_const); + v2 = vec_ld(offset + 32, vcrc_short_const); + v3 = vec_ld(offset + 48, vcrc_short_const); + v4 = vec_ld(offset + 64, vcrc_short_const); + v5 = vec_ld(offset + 80, vcrc_short_const); + v6 = vec_ld(offset + 96, vcrc_short_const); + v7 = vec_ld(offset + 112, vcrc_short_const); + + offset += 128; + + v0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata0,(__vector unsigned int)v0); + v1 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata1,(__vector unsigned int)v1); + v2 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata2,(__vector unsigned int)v2); + v3 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata3,(__vector unsigned int)v3); + v4 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata4,(__vector unsigned int)v4); + v5 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata5,(__vector unsigned int)v5); + v6 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata6,(__vector unsigned int)v6); + v7 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata7,(__vector unsigned int)v7); + + /* Now reduce the tail (0-112 bytes). */ + for (i = 0; i < length; i+=16) { + vdata0 = vec_ld(i,(__vector unsigned long long*)p); + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + va0 = vec_ld(offset + i,vcrc_short_const); + va0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata0,(__vector unsigned int)va0); + v0 = vec_xor(v0, va0); + } + + /* xor all parallel chunks together. */ + v0 = vec_xor(v0, v1); + v2 = vec_xor(v2, v3); + v4 = vec_xor(v4, v5); + v6 = vec_xor(v6, v7); + + v0 = vec_xor(v0, v2); + v4 = vec_xor(v4, v6); + + v0 = vec_xor(v0, v4); + } + + /* Barrett Reduction */ + vconst1 = vec_ld(0, v_Barrett_const); + vconst2 = vec_ld(16, v_Barrett_const); + + v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, + (__vector unsigned char)v0, 8); + v0 = vec_xor(v1,v0); + +#ifdef REFLECT + /* shift left one bit */ + vsht_splat = vec_splat_u8 (1); + v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0, + vsht_splat); +#endif + + v0 = vec_and(v0, vmask_64bit); + +#ifndef REFLECT + + /* + * Now for the actual algorithm. The idea is to calculate q, + * the multiple of our polynomial that we need to subtract. By + * doing the computation 2x bits higher (ie 64 bits) and shifting the + * result back down 2x bits, we round down to the nearest multiple. + */ + + /* ma */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v0, + (__vector unsigned long long)vconst1); + /* q = floor(ma/(2^64)) */ + v1 = (__vector unsigned long long)vec_sld ((__vector unsigned char)vzero, + (__vector unsigned char)v1, 8); + /* qn */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, + (__vector unsigned long long)vconst2); + /* a - qn, subtraction is xor in GF(2) */ + v0 = vec_xor (v0, v1); + /* + * Get the result into r3. We need to shift it left 8 bytes: + * V0 [ 0 1 2 X ] + * V0 [ 0 X 2 3 ] + */ + result = __builtin_unpack_vector_1 (v0); +#else + + /* + * The reflected version of Barrett reduction. Instead of bit + * reflecting our data (which is expensive to do), we bit reflect our + * constants and our algorithm, which means the intermediate data in + * our vector registers goes from 0-63 instead of 63-0. We can reflect + * the algorithm because we don't carry in mod 2 arithmetic. + */ + + /* bottom 32 bits of a */ + v1 = vec_and(v0, vmask_32bit); + + /* ma */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, + (__vector unsigned long long)vconst1); + + /* bottom 32bits of ma */ + v1 = vec_and(v1, vmask_32bit); + /* qn */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, + (__vector unsigned long long)vconst2); + /* a - qn, subtraction is xor in GF(2) */ + v0 = vec_xor (v0, v1); + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in + * the high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + + /* shift result into top 64 bits of */ + v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, + (__vector unsigned char)vzero, 4); + + result = __builtin_unpack_vector_0 (v0); +#endif + + return result; +} diff --git a/mysys/crc32/pcc_crc32_constants.h b/mysys/crc32/pcc_crc32_constants.h new file mode 100644 index 00000000..2e07d257 --- /dev/null +++ b/mysys/crc32/pcc_crc32_constants.h @@ -0,0 +1,1206 @@ +/* +* +* THIS FILE IS GENERATED WITH +./crc32_constants -c -x -r 0x4c11db7 + +* This is from https://github.com/antonblanchard/crc32-vpmsum/ +* DO NOT MODIFY IT MANUALLY! +* +*/ + +#define CRC 0x4c11db7 +#define CRC_XOR +#define REFLECT +#define MAX_SIZE 32768 + +#ifndef __ASSEMBLER__ +#ifdef CRC_TABLE +static const unsigned int crc_table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,}; + +#endif /* CRC_TABLE */ +#ifdef POWER8_INTRINSICS + +/* Constants */ + +/* Reduce 262144 kbits to 1024 bits */ +static const __vector unsigned long long vcrc_const[255] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x0000000099ea94a8, 0x00000001651797d2 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000000945a8420, 0x0000000021e0d56c }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x0000000030762706, 0x000000000f95ecaa }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001a52fc582, 0x00000001ebd224ac }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000001a4a7167a, 0x000000000ccb97ca }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x000000000c18249a, 0x00000001006ec8a8 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000000a924ae7c, 0x000000014f58f196 }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x00000001e12ccc12, 0x00000001a7192ca6 }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x00000000a0b9d4ac, 0x000000019a64bab2 }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x0000000095e8ddfe, 0x0000000014f4ed2e }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x00000000233fddc4, 0x000000011092b6a2 }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x00000001b4529b62, 0x00000000c8a1629c }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000001a7fa0e64, 0x000000017bf32e8e }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x00000001b5334592, 0x00000001f8cc6582 }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x000000011f8ee1b4, 0x000000008631ddf0 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000006252e632, 0x000000007e5a76d0 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x00000000ab973e84, 0x000000002b09b31c }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x000000007734f5ec, 0x00000001b2df1f84 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x000000007c547798, 0x00000001d6f56afc }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x000000007ec40210, 0x00000001b9b5e70c }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000001ab1695a8, 0x0000000034b626d2 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x0000000090494bba, 0x000000014c53479a }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x00000001123fb816, 0x00000001a6d179a4 }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x00000001e188c74c, 0x000000015abd16b4 }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x00000001c2d3451c, 0x00000000018f9852 }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x00000000f55cf1ca, 0x000000001fb3084a }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x00000001a0531540, 0x00000000c53dfb04 }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x0000000132cd7ebc, 0x00000000e10c9ad6 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x0000000073ab7f36, 0x0000000025aa994a }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x0000000041aed1c2, 0x00000000fa3a74c4 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000136c53800, 0x0000000033eb3f40 }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x0000000126835a30, 0x000000017193f296 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x000000006241b502, 0x0000000043f6c86a }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x00000000d5196ad4, 0x000000016b513ec6 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x000000009cfa769a, 0x00000000c8f25b4e }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x00000000920e5df4, 0x00000001a45048ec }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x0000000169dc310e, 0x000000000c441004 }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x0000000009fc331c, 0x000000000e17cad6 }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x000000010d94a81e, 0x00000001253ae964 }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x0000000027a20ab2, 0x00000001d7c88ebc }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x0000000114f87504, 0x00000001e7ca913a }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x000000004b076d96, 0x0000000033ed078a }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x00000000da4d1e74, 0x00000000e1839c78 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x000000001b81f672, 0x00000001322b267e }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x000000009367c988, 0x00000000638231b6 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x00000001717214ca, 0x00000001ee7f16f4 }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x000000009f47d820, 0x0000000117d9924a }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x000000010d9a47d2, 0x00000000e1a9e0c4 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000000a696c58c, 0x00000001403731dc }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x000000002aa28ec6, 0x00000001a5ea9682 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x00000001fe18fd9a, 0x0000000101c5c578 }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x000000019d4fc1ae, 0x00000000dddf6494 }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x00000001ba0e3dea, 0x00000000f1c3db28 }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x0000000074b59a5e, 0x000000013112fb9c }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x00000000f2b5ea98, 0x00000000b680b906 }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x0000000187132676, 0x000000001a282932 }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x000000010a8c6ad4, 0x0000000089406e7e }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x00000001e21dfe70, 0x00000001def6be8c }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x00000001da0050e4, 0x0000000075258728 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x00000000772172ae, 0x000000019536090a }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x00000000e47724aa, 0x00000000f2455bfc }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x000000003cd63ac4, 0x000000018c40baf4 }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000001bf47d352, 0x000000004cd390d4 }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x000000018dc1d708, 0x00000001e4ece95a }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x000000002d4620a4, 0x000000001a3ee918 }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x0000000058fd1740, 0x000000007c652fb8 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000000dadd9bfc, 0x000000011c67842c }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x00000001ea2140be, 0x00000000254f759c }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x000000009de128ba, 0x000000007ece94ca }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x000000013ac3aa8e, 0x0000000038f258c2 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x0000000099980562, 0x00000001cdf17b00 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000001c1579c86, 0x000000011f882c16 }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x0000000068dbbf94, 0x0000000100093fc8 }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x000000004509fb04, 0x00000001cd684f16 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x00000001202f6398, 0x000000004bc6a70a }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x000000013aea243e, 0x000000004fc7e8e4 }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001b4052ae6, 0x0000000130103f1c }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x00000001cd2a0ae8, 0x0000000111b0024c }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x00000001fe4aa8b4, 0x000000010b3079da }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x00000001d1559a42, 0x000000010192bcc2 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000001f3e05ecc, 0x0000000074838d50 }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x0000000104ddd2cc, 0x000000001b20f520 }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x000000015393153c, 0x0000000050c3590a }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x0000000057e942c6, 0x00000000b41cac8e }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x000000012c633850, 0x000000000c72cc78 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x00000000ebcaae4c, 0x0000000030cdb032 }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x000000013ee532a6, 0x000000013e09fc32 }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x00000001bf0cbc7e, 0x000000001ed624d2 }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x00000000d50b7a5a, 0x00000000781aee1a }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x0000000002fca6e8, 0x00000001c4d8348c }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x000000007af40044, 0x0000000057a40336 }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x0000000016178744, 0x0000000085544940 }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x000000014c177458, 0x000000019cd21e80 }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x000000011b6ddf04, 0x000000013eb95bc0 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x00000001f3e29ccc, 0x00000001dfc9fdfc }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x0000000135ae7562, 0x00000000cd028bc2 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x0000000190ef812c, 0x0000000090db8c44 }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x0000000067a2c786, 0x000000010010a4ce }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x0000000048b9496c, 0x00000001c8f4c72c }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x000000015a422de6, 0x000000001c26170c }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x00000001ef0e3640, 0x00000000e3fccf68 }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x00000001006d2d26, 0x00000000d513ed24 }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x00000001170d56d6, 0x00000000141beada }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x00000000a5fb613c, 0x000000011071aea0 }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x0000000040bbf7fc, 0x000000012e19080a }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x000000016ac3a5b2, 0x0000000100ecf826 }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x00000000abf16230, 0x0000000069b09412 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x00000001ebe23fac, 0x0000000122297bac }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x000000008b6a0894, 0x00000000e9e4b068 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x00000001288ea478, 0x000000004b38651a }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x000000016619c442, 0x00000001468360e2 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x0000000086230038, 0x00000000121c2408 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x000000017746a756, 0x00000000da7e7d08 }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x0000000191b8f8f8, 0x00000001058d7652 }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x000000008e167708, 0x000000014a098a90 }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x0000000148b22d54, 0x0000000020dbe72e }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x0000000044ba2c3c, 0x000000011e7323e8 }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x00000000b54d2b52, 0x00000000d5d4bf94 }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x0000000005a4fd8a, 0x0000000199d8746c }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x0000000139f9fc46, 0x00000000ce9ca8a0 }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x000000015a1fa824, 0x00000000136edece }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x000000000a61ae4c, 0x000000019b92a068 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x0000000145e9113e, 0x0000000071d62206 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x000000006a348448, 0x00000000dfc50158 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x000000004d80a08c, 0x00000001517626bc }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x000000014b6837a0, 0x0000000148d1e4fa }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x000000016896a7fc, 0x0000000094d8266e }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x000000014f187140, 0x00000000606c5e34 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x000000019581b9da, 0x000000019766beaa }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000001091bc984, 0x00000001d80c506c }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x000000001067223c, 0x000000001e73837c }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000001ab16ea02, 0x0000000064d587de }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x000000013c4598a8, 0x00000000f4a507b0 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000000b3735430, 0x0000000040e342fc }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x00000001bb3fc0c0, 0x00000001d5ad9c3a }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x00000001570ae19c, 0x0000000094a691a4 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x00000001ea910712, 0x00000001271ecdfa }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x0000000167127128, 0x000000009e54475a }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x0000000019e790a2, 0x00000000c9c099ee }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x000000003788f710, 0x000000009a2f736c }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x00000001682a160e, 0x00000000bb9f4996 }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x000000007f0ebd2e, 0x00000001db688050 }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x000000002b032080, 0x00000000e9b10af4 }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000000cfd1664a, 0x000000012d4545e4 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x00000000aa1181c2, 0x000000000361139c }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x00000000ddd08002, 0x00000001a5a1a3a8 }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x00000000e8dd0446, 0x000000006844e0b0 }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x00000001bbd94a00, 0x00000000c3762f28 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x00000000ab6cd180, 0x00000001d26287a2 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x0000000031803ce2, 0x00000001f6f0bba8 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x0000000024f40b0c, 0x000000002ffabd62 }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x00000001ba1d9834, 0x00000000fb4516b8 }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x0000000104de61aa, 0x000000018cfa961c }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x0000000113e40d46, 0x000000019e588d52 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x00000001415598a0, 0x00000001180f0bbc }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000000bf6c8c90, 0x00000000e1d9177a }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x00000001788b0504, 0x0000000105abc27c }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x0000000038385d02, 0x00000000972e4a58 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x00000001b6c83844, 0x0000000183499a5e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000051061a8a, 0x00000001c96a8cca }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x000000017351388a, 0x00000001a1a5b60c }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x0000000132928f92, 0x00000000e4b6ac9c }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x00000000e6b4f48a, 0x00000001807e7f5a }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000039d15e90, 0x000000017a7e3bc8 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x00000000312d6074, 0x00000000d73975da }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x000000017bbb2cc4, 0x000000017375d038 }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x000000016ded3e18, 0x00000000193680bc }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x00000000f1638b16, 0x00000000999b06f6 }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x00000001d38b9ecc, 0x00000001f685d2b8 }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x000000018b8d09dc, 0x00000001f4ecbed2 }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x00000000e7bc27d2, 0x00000000ba16f1a0 }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000000275e1e96, 0x0000000115aceac4 }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x00000000e2e3031e, 0x00000001aeff6292 }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x00000001041c84d8, 0x000000009640124c }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000706ce672, 0x0000000114f41f02 }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x000000015d5070da, 0x000000009c5f3586 }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x0000000038f9493a, 0x00000001878275fa }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x00000000a3348a76, 0x00000000ddc42ce8 }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x00000001ad0aab92, 0x0000000181d2c73a }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x000000019e85f712, 0x0000000141c9320a }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x000000005a871e76, 0x000000015235719a }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000017249c662, 0x00000000be27d804 }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x000000003a084712, 0x000000006242d45a }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x00000000ed438478, 0x000000009a53638e }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x00000000abac34cc, 0x00000001001ecfb6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x000000005f35ef3e, 0x000000016d7c2d64 }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x0000000047d6608c, 0x00000001d0ce46c0 }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x000000002d01470e, 0x0000000124c907b4 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x0000000158bbc7b0, 0x0000000018a555ca }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x00000000c0a23e8e, 0x000000006b0980bc }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x00000001ebd85c88, 0x000000008bbba964 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x000000019ee20bb2, 0x00000001070a5a1e }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x00000001acabf2d6, 0x000000002204322a }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x00000001b7963d56, 0x00000000a27524d0 }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x000000017bffa1fe, 0x0000000020b1e4ba }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x000000001f15333e, 0x0000000032cc27fc }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x000000018593129e, 0x0000000044dd22b8 }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x000000019cb32602, 0x00000000dffc9e0a }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x0000000142b05cc8, 0x00000001b7a0ed14 }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001be49e7a4, 0x00000000c7842488 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x0000000108f69d6c, 0x00000001c02a4fee }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x000000006c0971f0, 0x000000003c273778 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x000000005b16467a, 0x00000001d63f8894 }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000001551a628e, 0x000000006be557d6 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x000000019e42ea92, 0x000000006a7806ea }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x000000012fa83ff2, 0x000000016155aa0c }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x000000011ca9cde0, 0x00000000908650ac }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x00000000c8e5cd74, 0x00000000aa5a8084 }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x0000000096c27f0c, 0x0000000191bb500a }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x000000002baed926, 0x0000000064e9bed0 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x000000017c8de8d2, 0x000000009444f302 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x00000000d43d6068, 0x000000019db07d3c }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x00000000cb2c4b26, 0x00000001359e3e6e }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x0000000145b8da26, 0x00000001e4f10dd2 }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x000000018fff4b08, 0x0000000124f5735e }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x0000000150b58ed0, 0x0000000124760a4c }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x00000001549f39bc, 0x000000000f1fc186 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000ef4d2f42, 0x00000000150e4cc4 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x00000001b1468572, 0x000000002a6204e8 }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x000000013d7403b2, 0x00000000beb1d432 }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x00000001a4681842, 0x0000000135f3f1f0 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x0000000167714492, 0x0000000074fe2232 }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000001e599099a, 0x000000001ac6e2ba }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x00000000fe128194, 0x0000000013fca91e }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000077e8b990, 0x0000000183f4931e }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x00000001a267f63a, 0x00000000b6d9b4e4 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000001945c245a, 0x00000000b5188656 }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x0000000149002e76, 0x0000000027a81a84 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x00000001bb8310a4, 0x0000000125699258 }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x000000019ec60bcc, 0x00000001b23de796 }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x000000012d8590ae, 0x00000000fe4365dc }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x0000000065b00684, 0x00000000c68f497a }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x000000015e5aeadc, 0x00000000fbf521ee }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x00000000b77ff2b0, 0x000000015eac3378 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x0000000188da2ff6, 0x0000000134914b90 }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x0000000063da929a, 0x0000000016335cfe }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x00000001389caa80, 0x000000010372d10c }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000013db599d2, 0x000000015097b908 }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x0000000122505a86, 0x00000001227a7572 }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x000000016bd72746, 0x000000009a8f75c0 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x00000001c3faf1d4, 0x00000000682c77a2 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x00000001111c826c, 0x00000000231f091c }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x00000000153e9fb2, 0x000000007d4439f2 }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x000000002b1f7b60, 0x000000017e221efc }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x00000000b1dba570, 0x0000000167457c38 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000001f6397b76, 0x00000000bdf081c4 }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x0000000156335214, 0x000000016286d6b0 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x00000001d70e3986, 0x00000000c84f001c }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x000000003701a774, 0x0000000064efe7c0 }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000ac81ef72, 0x000000000ac2d904 }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x0000000133212464, 0x00000000fd226d14 }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x00000000e4e45610, 0x000000011cfd42e0 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x000000000c1bd370, 0x000000016e5a5678 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x00000001a7b9e7a6, 0x00000001d888fe22 }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x000000007d657a10, 0x00000001af77fcd4 } +#else /* __LITTLE_ENDIAN__ */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x00000001651797d2, 0x0000000099ea94a8 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x0000000021e0d56c, 0x00000000945a8420 }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x000000000f95ecaa, 0x0000000030762706 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001ebd224ac, 0x00000001a52fc582 }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x000000000ccb97ca, 0x00000001a4a7167a }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x00000001006ec8a8, 0x000000000c18249a }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x000000014f58f196, 0x00000000a924ae7c }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x00000001a7192ca6, 0x00000001e12ccc12 }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x000000019a64bab2, 0x00000000a0b9d4ac }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x0000000014f4ed2e, 0x0000000095e8ddfe }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x000000011092b6a2, 0x00000000233fddc4 }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x00000000c8a1629c, 0x00000001b4529b62 }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x000000017bf32e8e, 0x00000001a7fa0e64 }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x00000001f8cc6582, 0x00000001b5334592 }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x000000008631ddf0, 0x000000011f8ee1b4 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000007e5a76d0, 0x000000006252e632 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x000000002b09b31c, 0x00000000ab973e84 }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x00000001b2df1f84, 0x000000007734f5ec }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x00000001d6f56afc, 0x000000007c547798 }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x00000001b9b5e70c, 0x000000007ec40210 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x0000000034b626d2, 0x00000001ab1695a8 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x000000014c53479a, 0x0000000090494bba }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x00000001a6d179a4, 0x00000001123fb816 }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x000000015abd16b4, 0x00000001e188c74c }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x00000000018f9852, 0x00000001c2d3451c }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x000000001fb3084a, 0x00000000f55cf1ca }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x00000000c53dfb04, 0x00000001a0531540 }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x00000000e10c9ad6, 0x0000000132cd7ebc }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x0000000025aa994a, 0x0000000073ab7f36 }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x00000000fa3a74c4, 0x0000000041aed1c2 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000033eb3f40, 0x0000000136c53800 }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000017193f296, 0x0000000126835a30 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000043f6c86a, 0x000000006241b502 }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x000000016b513ec6, 0x00000000d5196ad4 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x00000000c8f25b4e, 0x000000009cfa769a }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x00000001a45048ec, 0x00000000920e5df4 }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x000000000c441004, 0x0000000169dc310e }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x000000000e17cad6, 0x0000000009fc331c }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x00000001253ae964, 0x000000010d94a81e }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x00000001d7c88ebc, 0x0000000027a20ab2 }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000001e7ca913a, 0x0000000114f87504 }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000033ed078a, 0x000000004b076d96 }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x00000000e1839c78, 0x00000000da4d1e74 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x00000001322b267e, 0x000000001b81f672 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x00000000638231b6, 0x000000009367c988 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x00000001ee7f16f4, 0x00000001717214ca }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000117d9924a, 0x000000009f47d820 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x00000000e1a9e0c4, 0x000000010d9a47d2 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001403731dc, 0x00000000a696c58c }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x00000001a5ea9682, 0x000000002aa28ec6 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x0000000101c5c578, 0x00000001fe18fd9a }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x00000000dddf6494, 0x000000019d4fc1ae }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x00000000f1c3db28, 0x00000001ba0e3dea }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x000000013112fb9c, 0x0000000074b59a5e }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x00000000b680b906, 0x00000000f2b5ea98 }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x000000001a282932, 0x0000000187132676 }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x0000000089406e7e, 0x000000010a8c6ad4 }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x00000001def6be8c, 0x00000001e21dfe70 }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x0000000075258728, 0x00000001da0050e4 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x000000019536090a, 0x00000000772172ae }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x00000000f2455bfc, 0x00000000e47724aa }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x000000018c40baf4, 0x000000003cd63ac4 }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x000000004cd390d4, 0x00000001bf47d352 }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000001e4ece95a, 0x000000018dc1d708 }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x000000001a3ee918, 0x000000002d4620a4 }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x000000007c652fb8, 0x0000000058fd1740 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x000000011c67842c, 0x00000000dadd9bfc }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x00000000254f759c, 0x00000001ea2140be }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x000000007ece94ca, 0x000000009de128ba }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x0000000038f258c2, 0x000000013ac3aa8e }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x00000001cdf17b00, 0x0000000099980562 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x000000011f882c16, 0x00000001c1579c86 }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x0000000100093fc8, 0x0000000068dbbf94 }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x00000001cd684f16, 0x000000004509fb04 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x000000004bc6a70a, 0x00000001202f6398 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x000000004fc7e8e4, 0x000000013aea243e }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x0000000130103f1c, 0x00000001b4052ae6 }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x0000000111b0024c, 0x00000001cd2a0ae8 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x000000010b3079da, 0x00000001fe4aa8b4 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x000000010192bcc2, 0x00000001d1559a42 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x0000000074838d50, 0x00000001f3e05ecc }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x000000001b20f520, 0x0000000104ddd2cc }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x0000000050c3590a, 0x000000015393153c }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x00000000b41cac8e, 0x0000000057e942c6 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x000000000c72cc78, 0x000000012c633850 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x0000000030cdb032, 0x00000000ebcaae4c }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x000000013e09fc32, 0x000000013ee532a6 }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x000000001ed624d2, 0x00000001bf0cbc7e }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x00000000781aee1a, 0x00000000d50b7a5a }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001c4d8348c, 0x0000000002fca6e8 }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x0000000057a40336, 0x000000007af40044 }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x0000000085544940, 0x0000000016178744 }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x000000019cd21e80, 0x000000014c177458 }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x000000013eb95bc0, 0x000000011b6ddf04 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x00000001dfc9fdfc, 0x00000001f3e29ccc }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000cd028bc2, 0x0000000135ae7562 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x0000000090db8c44, 0x0000000190ef812c }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x000000010010a4ce, 0x0000000067a2c786 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000001c8f4c72c, 0x0000000048b9496c }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x000000001c26170c, 0x000000015a422de6 }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x00000000e3fccf68, 0x00000001ef0e3640 }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x00000000d513ed24, 0x00000001006d2d26 }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x00000000141beada, 0x00000001170d56d6 }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x000000011071aea0, 0x00000000a5fb613c }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x000000012e19080a, 0x0000000040bbf7fc }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x0000000100ecf826, 0x000000016ac3a5b2 }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x0000000069b09412, 0x00000000abf16230 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x0000000122297bac, 0x00000001ebe23fac }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x00000000e9e4b068, 0x000000008b6a0894 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x000000004b38651a, 0x00000001288ea478 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x00000001468360e2, 0x000000016619c442 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x00000000121c2408, 0x0000000086230038 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000000da7e7d08, 0x000000017746a756 }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x00000001058d7652, 0x0000000191b8f8f8 }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x000000014a098a90, 0x000000008e167708 }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x0000000020dbe72e, 0x0000000148b22d54 }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x000000011e7323e8, 0x0000000044ba2c3c }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x00000000d5d4bf94, 0x00000000b54d2b52 }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x0000000199d8746c, 0x0000000005a4fd8a }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x00000000ce9ca8a0, 0x0000000139f9fc46 }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x00000000136edece, 0x000000015a1fa824 }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x000000019b92a068, 0x000000000a61ae4c }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x0000000071d62206, 0x0000000145e9113e }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x00000000dfc50158, 0x000000006a348448 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x00000001517626bc, 0x000000004d80a08c }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x0000000148d1e4fa, 0x000000014b6837a0 }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000094d8266e, 0x000000016896a7fc }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x00000000606c5e34, 0x000000014f187140 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x000000019766beaa, 0x000000019581b9da }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000001d80c506c, 0x00000001091bc984 }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x000000001e73837c, 0x000000001067223c }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x0000000064d587de, 0x00000001ab16ea02 }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x00000000f4a507b0, 0x000000013c4598a8 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x0000000040e342fc, 0x00000000b3735430 }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x00000001d5ad9c3a, 0x00000001bb3fc0c0 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x0000000094a691a4, 0x00000001570ae19c }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x00000001271ecdfa, 0x00000001ea910712 }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x000000009e54475a, 0x0000000167127128 }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x00000000c9c099ee, 0x0000000019e790a2 }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x000000009a2f736c, 0x000000003788f710 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x00000000bb9f4996, 0x00000001682a160e }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x00000001db688050, 0x000000007f0ebd2e }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x00000000e9b10af4, 0x000000002b032080 }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x000000012d4545e4, 0x00000000cfd1664a }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x000000000361139c, 0x00000000aa1181c2 }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x00000001a5a1a3a8, 0x00000000ddd08002 }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000006844e0b0, 0x00000000e8dd0446 }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x00000000c3762f28, 0x00000001bbd94a00 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x00000001d26287a2, 0x00000000ab6cd180 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x00000001f6f0bba8, 0x0000000031803ce2 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000002ffabd62, 0x0000000024f40b0c }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x00000000fb4516b8, 0x00000001ba1d9834 }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x000000018cfa961c, 0x0000000104de61aa }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x000000019e588d52, 0x0000000113e40d46 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x00000001180f0bbc, 0x00000001415598a0 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000000e1d9177a, 0x00000000bf6c8c90 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x0000000105abc27c, 0x00000001788b0504 }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x00000000972e4a58, 0x0000000038385d02 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x0000000183499a5e, 0x00000001b6c83844 }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x00000001c96a8cca, 0x0000000051061a8a }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x00000001a1a5b60c, 0x000000017351388a }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x00000000e4b6ac9c, 0x0000000132928f92 }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x00000001807e7f5a, 0x00000000e6b4f48a }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x000000017a7e3bc8, 0x0000000039d15e90 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x00000000d73975da, 0x00000000312d6074 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x000000017375d038, 0x000000017bbb2cc4 }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x00000000193680bc, 0x000000016ded3e18 }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x00000000999b06f6, 0x00000000f1638b16 }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x00000001f685d2b8, 0x00000001d38b9ecc }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x00000001f4ecbed2, 0x000000018b8d09dc }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x00000000ba16f1a0, 0x00000000e7bc27d2 }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x0000000115aceac4, 0x00000000275e1e96 }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x00000001aeff6292, 0x00000000e2e3031e }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x000000009640124c, 0x00000001041c84d8 }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x0000000114f41f02, 0x00000000706ce672 }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x000000009c5f3586, 0x000000015d5070da }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000001878275fa, 0x0000000038f9493a }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x00000000ddc42ce8, 0x00000000a3348a76 }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x0000000181d2c73a, 0x00000001ad0aab92 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x0000000141c9320a, 0x000000019e85f712 }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x000000015235719a, 0x000000005a871e76 }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x00000000be27d804, 0x000000017249c662 }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x000000006242d45a, 0x000000003a084712 }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000009a53638e, 0x00000000ed438478 }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x00000001001ecfb6, 0x00000000abac34cc }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x000000016d7c2d64, 0x000000005f35ef3e }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x00000001d0ce46c0, 0x0000000047d6608c }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x0000000124c907b4, 0x000000002d01470e }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x0000000018a555ca, 0x0000000158bbc7b0 }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x000000006b0980bc, 0x00000000c0a23e8e }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x000000008bbba964, 0x00000001ebd85c88 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000001070a5a1e, 0x000000019ee20bb2 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x000000002204322a, 0x00000001acabf2d6 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x00000000a27524d0, 0x00000001b7963d56 }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x0000000020b1e4ba, 0x000000017bffa1fe }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x0000000032cc27fc, 0x000000001f15333e }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x0000000044dd22b8, 0x000000018593129e }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x00000000dffc9e0a, 0x000000019cb32602 }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x00000001b7a0ed14, 0x0000000142b05cc8 }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000000c7842488, 0x00000001be49e7a4 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000001c02a4fee, 0x0000000108f69d6c }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x000000003c273778, 0x000000006c0971f0 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x00000001d63f8894, 0x000000005b16467a }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x000000006be557d6, 0x00000001551a628e }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x000000006a7806ea, 0x000000019e42ea92 }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x000000016155aa0c, 0x000000012fa83ff2 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x00000000908650ac, 0x000000011ca9cde0 }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x00000000aa5a8084, 0x00000000c8e5cd74 }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x0000000191bb500a, 0x0000000096c27f0c }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x0000000064e9bed0, 0x000000002baed926 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x000000009444f302, 0x000000017c8de8d2 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x000000019db07d3c, 0x00000000d43d6068 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x00000001359e3e6e, 0x00000000cb2c4b26 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000001e4f10dd2, 0x0000000145b8da26 }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x0000000124f5735e, 0x000000018fff4b08 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x0000000124760a4c, 0x0000000150b58ed0 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x000000000f1fc186, 0x00000001549f39bc }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000150e4cc4, 0x00000000ef4d2f42 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x000000002a6204e8, 0x00000001b1468572 }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x00000000beb1d432, 0x000000013d7403b2 }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x0000000135f3f1f0, 0x00000001a4681842 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x0000000074fe2232, 0x0000000167714492 }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x000000001ac6e2ba, 0x00000001e599099a }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x0000000013fca91e, 0x00000000fe128194 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000183f4931e, 0x0000000077e8b990 }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x00000000b6d9b4e4, 0x00000001a267f63a }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000000b5188656, 0x00000001945c245a }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x0000000027a81a84, 0x0000000149002e76 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x0000000125699258, 0x00000001bb8310a4 }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001b23de796, 0x000000019ec60bcc }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x00000000fe4365dc, 0x000000012d8590ae }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x00000000c68f497a, 0x0000000065b00684 }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x00000000fbf521ee, 0x000000015e5aeadc }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x000000015eac3378, 0x00000000b77ff2b0 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x0000000134914b90, 0x0000000188da2ff6 }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x0000000016335cfe, 0x0000000063da929a }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x000000010372d10c, 0x00000001389caa80 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000015097b908, 0x000000013db599d2 }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001227a7572, 0x0000000122505a86 }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x000000009a8f75c0, 0x000000016bd72746 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x00000000682c77a2, 0x00000001c3faf1d4 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x00000000231f091c, 0x00000001111c826c }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000007d4439f2, 0x00000000153e9fb2 }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x000000017e221efc, 0x000000002b1f7b60 }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x0000000167457c38, 0x00000000b1dba570 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000000bdf081c4, 0x00000001f6397b76 }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x000000016286d6b0, 0x0000000156335214 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x00000000c84f001c, 0x00000001d70e3986 }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x0000000064efe7c0, 0x000000003701a774 }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x000000000ac2d904, 0x00000000ac81ef72 }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x00000000fd226d14, 0x0000000133212464 }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x000000011cfd42e0, 0x00000000e4e45610 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x000000016e5a5678, 0x000000000c1bd370 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x00000001d888fe22, 0x00000001a7b9e7a6 }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x00000001af77fcd4, 0x000000007d657a10 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + +static const __vector unsigned long long vcrc_short_const[16] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x99168a18ec447f11, 0xed837b2613e8221e }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0xe23e954e8fd2cd3c, 0xc8acdd8147b9ce5a }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x92f8befe6b1d2b53, 0xd9ad6d87d4277e25 }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0xf38a3556291ea462, 0xc10ec5e033fbca3b }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x974ac56262b6ca4b, 0xc0b55b0e82e02e2f }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x855712b3784d2a56, 0x71aa1df0e172334d }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0xa5abe9f80eaee722, 0xfee3053e3969324d }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x1fa0943ddb54814c, 0xf44779b93eb2bd08 }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0xa53ff440d7bbfe6a, 0xf5449b3f00cc3374 }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xebe7e3566325605c, 0x6f8346e1d777606e }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0xc65a272ce5b592b8, 0xe3ab4f2ac0b95347 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x5705a9ca4721589f, 0xaa2215ea329ecc11 }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0xe3720acb88d14467, 0x1ed8f66ed95efd26 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0xba1aca0315141c31, 0x78ed02d5a700e96a }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0xad2a31b3ed627dae, 0xba8ccbe832b39da3 }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0x6655004fa06a2517, 0xedb88320b1e6b092 } +#else /* __LITTLE_ENDIAN__ */ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0xed837b2613e8221e, 0x99168a18ec447f11 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0xc8acdd8147b9ce5a, 0xe23e954e8fd2cd3c }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0xd9ad6d87d4277e25, 0x92f8befe6b1d2b53 }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0xc10ec5e033fbca3b, 0xf38a3556291ea462 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0xc0b55b0e82e02e2f, 0x974ac56262b6ca4b }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x71aa1df0e172334d, 0x855712b3784d2a56 }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0xfee3053e3969324d, 0xa5abe9f80eaee722 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0xf44779b93eb2bd08, 0x1fa0943ddb54814c }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0xf5449b3f00cc3374, 0xa53ff440d7bbfe6a }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0x6f8346e1d777606e, 0xebe7e3566325605c }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0xe3ab4f2ac0b95347, 0xc65a272ce5b592b8 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0xaa2215ea329ecc11, 0x5705a9ca4721589f }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x1ed8f66ed95efd26, 0xe3720acb88d14467 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0x78ed02d5a700e96a, 0xba1aca0315141c31 }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0xba8ccbe832b39da3, 0xad2a31b3ed627dae }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0xedb88320b1e6b092, 0x6655004fa06a2517 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Barrett constants */ +/* 33 bit reflected Barrett constant m - (4^32)/n */ + +static const __vector unsigned long long v_Barrett_const[2] + __attribute__((aligned (16))) = { + /* x^64 div p(x) */ +#ifdef __LITTLE_ENDIAN__ + { 0x00000001f7011641, 0x0000000000000000 }, + { 0x00000001db710641, 0x0000000000000000 } +#else /* __LITTLE_ENDIAN__ */ + { 0x0000000000000000, 0x00000001f7011641 }, + { 0x0000000000000000, 0x00000001db710641 } +#endif /* __LITTLE_ENDIAN__ */ + }; +#endif /* POWER8_INTRINSICS */ + +#endif /* __ASSEMBLER__ */ diff --git a/mysys/crc32/pcc_crc32c_constants.h b/mysys/crc32/pcc_crc32c_constants.h new file mode 100644 index 00000000..40b216b6 --- /dev/null +++ b/mysys/crc32/pcc_crc32c_constants.h @@ -0,0 +1,1206 @@ +/* +* +* THIS FILE IS GENERATED WITH +./crc32_constants -c -x -r 0x1edc6f41 + +* This is from https://github.com/antonblanchard/crc32-vpmsum/ +* DO NOT MODIFY IT MANUALLY! +* +*/ + +#define CRC 0x1edc6f41 +#define CRC_XOR +#define REFLECT +#define MAX_SIZE 32768 + +#ifndef __ASSEMBLER__ +#ifdef CRC_TABLE +static const unsigned int crc_table[] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,}; + +#endif /* CRC_TABLE */ +#ifdef POWER8_INTRINSICS + +/* Constants */ + +/* Reduce 262144 kbits to 1024 bits */ +static const __vector unsigned long long vcrc_const[255] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x000000009c37c408, 0x00000000b6ca9e20 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000001b51df26c, 0x00000000350249a8 }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x000000000724b9d0, 0x00000001862dac54 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001c00532fe, 0x00000001d87fb48c }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000000f05a9362, 0x00000001f39b699e }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x00000001e1007970, 0x0000000101da11b4 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000000a57366ee, 0x00000001cab571e0 }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x0000000192011284, 0x00000000c7020cfe }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x0000000162716d9a, 0x00000000cdaed1ae }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x00000000cd97ecde, 0x00000001e804effc }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x0000000058812bc0, 0x0000000077c3ea3a }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x0000000088b8c12e, 0x0000000068df31b4 }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000001230b234c, 0x00000000b059b6c2 }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x00000001120b416e, 0x0000000145fb8ed8 }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x00000001974aecb0, 0x00000000cbc09168 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000008ee3f226, 0x000000005ceeedc2 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x00000001089aba9a, 0x0000000047d74e86 }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x0000000065113872, 0x00000001407e9e22 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x000000005c07ec10, 0x00000001da967bda }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x0000000187590924, 0x000000006c898368 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000000e35da7c6, 0x00000000f2d14c98 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x000000000415855a, 0x00000001993c6ad4 }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x0000000073617758, 0x000000014683d1ac }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x0000000176021d28, 0x00000001a7c93e6c }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x00000001c358fd0a, 0x000000010211e90a }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x00000001ff7a2c18, 0x000000001119403e }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x00000000f2d9f7e4, 0x000000001c3261aa }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x000000016cf1f9c8, 0x000000014e37a634 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x000000010af9279a, 0x0000000073786c0c }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x0000000004f101e8, 0x000000011dc037f8 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000070bcf184, 0x0000000031433dfc }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000000a8de642, 0x000000009cde8348 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000062ea130c, 0x0000000038d3c2a6 }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x00000001eb31cbb2, 0x000000011b25f260 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x0000000170783448, 0x000000001629e6f0 }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x00000001a684b4c6, 0x0000000160838b4c }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x00000000253ca5b4, 0x000000007a44011c }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x0000000057b4b1e2, 0x00000000226f417a }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x00000000b6bd084c, 0x0000000045eb2eb4 }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x0000000123c2d592, 0x000000014459d70c }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000000159dafce, 0x00000001d406ed82 }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000127e1a64e, 0x0000000160c8e1a8 }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x0000000056860754, 0x0000000027ba8098 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x00000001e661aae8, 0x000000006d92d018 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x00000000f82c6166, 0x000000012ed7e3f2 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x00000000c4f9c7ae, 0x000000002dc87788 }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000074203d20, 0x0000000018240bb8 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x0000000198173052, 0x000000001ad38158 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001ce8aba54, 0x00000001396b78f2 }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x00000001850d5d94, 0x000000011a681334 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x00000001d609239c, 0x000000012104732e }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x000000001595f048, 0x00000000a140d90c }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x0000000042ccee08, 0x00000001b7215eda }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x000000010a389d74, 0x00000001aaf1df3c }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x000000012a840da6, 0x0000000029d15b8a }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x000000001d181c0c, 0x00000000f1a96922 }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x0000000068b7d1f6, 0x00000001ac80d03c }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x000000005b0f14fc, 0x000000000f11d56a }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x0000000179e9e730, 0x00000001f1c022a2 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x00000001ce1368d6, 0x0000000173d00ae2 }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x0000000112c3a84c, 0x00000001d4ffe4ac }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x00000000de940fee, 0x000000016edc5ae4 }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000000fe896b7e, 0x00000001f1a02140 }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000001f797431c, 0x00000000ca0b28a0 }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x0000000053e989ba, 0x00000001928e30a2 }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x000000003920cd16, 0x0000000097b1b002 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000001e6f579b8, 0x00000000b15bf906 }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x000000007493cb0a, 0x00000000411c5d52 }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x00000001bdd376d8, 0x00000001c36f3300 }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x000000016badfee6, 0x00000001119227e0 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x0000000071de5c58, 0x00000000114d4702 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000000453f317c, 0x00000000458b5b98 }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x0000000121675cce, 0x000000012e31fb8e }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x00000001f409ee92, 0x000000005cf619d8 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x00000000f36b9c88, 0x0000000063f4d8b2 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x0000000036b398f4, 0x000000004138dc8a }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001748f9adc, 0x00000001d29ee8e0 }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x00000001be94ec00, 0x000000006a08ace8 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x00000000b74370d6, 0x0000000127d42010 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x00000001174d0b98, 0x0000000019d76b62 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000000befc06a4, 0x00000001b1471f6e }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x00000001ae125288, 0x00000001f64c19cc }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x0000000095c19b34, 0x00000000003c0ea0 }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x00000001a78496f2, 0x000000014d73abf6 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x00000001ac5390a0, 0x00000001620eb844 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x000000002a80ed6e, 0x0000000147655048 }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x00000001fa9b0128, 0x0000000067b5077e }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x00000001ea94929e, 0x0000000010ffe206 }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x0000000125f4305c, 0x000000000fee8f1e }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001471e2002, 0x00000001da26fbae }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x0000000132d2253a, 0x00000001b3a8bd88 }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x00000000f26b3592, 0x00000000e8f3898e }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x00000000bc8b67b0, 0x00000000b0d0d28c }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x000000013a826ef2, 0x0000000030f2a798 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x0000000081482c84, 0x000000000fba1002 }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000e77307c2, 0x00000000bdb9bd72 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x00000000d4a07ec8, 0x0000000075d3bf5a }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x0000000017102100, 0x00000000ef1f98a0 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000000db406486, 0x00000000689c7602 }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x0000000192db7f88, 0x000000016d5fa5fe }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x000000018bf67b1e, 0x00000001d0d2b9ca }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x000000007c09163e, 0x0000000041e7b470 }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x000000000adac060, 0x00000001cbb6495e }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x00000000bd8316ae, 0x000000010052a0b0 }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x000000019f09ab54, 0x00000001d8effb5c }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x0000000125155542, 0x00000001d969853c }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x000000018fdb5882, 0x00000000523ccce2 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x00000000e794b3f4, 0x000000001e2436bc }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x000000016f9bb022, 0x00000000ddd1c3a2 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x00000000290c9978, 0x0000000019fcfe38 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x0000000083c0f350, 0x00000001ce95db64 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x0000000173ea6628, 0x00000000af582806 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000001c8b4e00a, 0x00000001006388f6 }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x00000000de95d6aa, 0x0000000179eca00a }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x000000010b7f7248, 0x0000000122410a6a }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x00000001326e3a06, 0x000000004288e87c }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x00000000bb62c2e6, 0x000000016c5490da }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x0000000156a4b2c2, 0x00000000d1c71f6e }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x000000011dfe763a, 0x00000001b4ce08a6 }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x000000007bcca8e2, 0x00000001466ba60c }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x0000000186118faa, 0x00000001f6c488a4 }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x0000000111a65a88, 0x000000013bfb0682 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x000000003565e1c4, 0x00000000690e9e54 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x000000012ed02a82, 0x00000000281346b6 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x00000000c486ecfc, 0x0000000156464024 }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x0000000001b951b2, 0x000000016063a8dc }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000048143916, 0x0000000116a66362 }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x00000001dc2ae124, 0x000000017e8aa4d2 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x00000001416c58d6, 0x00000001728eb10c }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000000a479744a, 0x00000001b08fd7fa }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x0000000096ca3a26, 0x00000001092a16e8 }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000000ff223d4e, 0x00000000a505637c }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x000000010e84da42, 0x00000000d94869b2 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000001b61ba3d0, 0x00000001c8b203ae }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x00000000680f2de8, 0x000000005704aea0 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x000000008772a9a8, 0x000000012e295fa2 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x0000000155f295bc, 0x000000011d0908bc }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x00000000595f9282, 0x0000000193ed97ea }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x0000000164b1c25a, 0x000000013a0f1c52 }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x00000000fbd67c50, 0x000000010c2c40c0 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x0000000096076268, 0x00000000ff6fac3e }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x00000001d288e4cc, 0x000000017b3609c0 }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x00000001eaac1bdc, 0x0000000088c8c922 }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000001f1ea39e2, 0x00000001751baae6 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x00000001eb6506fc, 0x0000000107952972 }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x000000010f806ffe, 0x0000000162b00abe }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000010408481e, 0x000000000d7b404c }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x0000000188260534, 0x00000000763b13d4 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x0000000058fc73e0, 0x00000000f6dc22d8 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x00000000391c59b8, 0x000000007daae060 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000018b638400, 0x000000013359ab7c }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x000000011738f5c4, 0x000000008add438a }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x000000008cf7c6da, 0x00000001edbefdea }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x00000001ef97fb16, 0x000000004104e0f8 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x0000000102130e20, 0x00000000b48a8222 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000000db968898, 0x00000001bcb46844 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x00000000b5047b5e, 0x000000013293ce0a }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x000000010b90fdb2, 0x00000001710d0844 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x000000004834a32e, 0x0000000117907f6e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000059c8f2b0, 0x0000000087ddf93e }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x0000000122cec508, 0x000000005970e9b0 }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x000000000a330cda, 0x0000000185b2b7d0 }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x000000014a47148c, 0x00000001dcee0efc }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000042c61cb8, 0x0000000030da2722 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x0000000012fe6960, 0x000000012f925a18 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x00000000dbda2c20, 0x00000000dd2e357c }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x000000011122410c, 0x00000000071c80de }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x00000000977b2070, 0x000000011513140a }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x000000014050438e, 0x00000001df876e8e }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x0000000147c840e8, 0x000000015f81d6ce }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x00000001cc7c88ce, 0x000000019dd94dbe }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000001476b35a4, 0x00000001373d206e }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x000000013d52d508, 0x00000000668ccade }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x000000008e4be32e, 0x00000001b192d268 }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000024120fe, 0x00000000e30f3a78 }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x00000000ddecddb4, 0x000000010ef1f7bc }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000000d4d403bc, 0x00000001f5ac7380 }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x00000001734b89aa, 0x000000011822ea70 }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x000000010e7a58d6, 0x00000000c3a33848 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x00000001f9f04e9c, 0x00000001bd151c24 }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x00000000b692225e, 0x0000000056002d76 }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000019b8d3f3e, 0x000000014657c4f4 }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x00000001a874f11e, 0x0000000113742d7c }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000010d5a4254, 0x000000019c5920ba }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x00000000bbb2f5d6, 0x000000005216d2d6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x0000000179cc0e36, 0x0000000136f5ad8a }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x00000001dca1da4a, 0x000000018b07beb6 }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x00000000feb1a192, 0x00000000db1e93b0 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x00000000d1eeedd6, 0x000000000b96fa3a }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x000000008fad9bb4, 0x00000001d9968af0 }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x00000001884938e4, 0x000000000e4a77a2 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000001bc2e9bc0, 0x00000000508c2ac8 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x00000001f9658a68, 0x0000000021572a80 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x000000001b9224fc, 0x00000001b859daf2 }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x0000000055b2fb84, 0x000000016f788474 }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x000000018b090348, 0x00000001b438810e }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x000000011ccbd5ea, 0x0000000095ddc6f2 }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x0000000007ae47f8, 0x00000001d977c20c }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x0000000172acbec0, 0x00000000ebedb99a }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001c6e3ff20, 0x00000001df9e9e92 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000000e1b38744, 0x00000001a4a3f952 }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x00000000791585b2, 0x00000000e2f51220 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x00000000ac53b894, 0x000000004aa01f3e }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000001ed5f2cf4, 0x00000000b3e90a58 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x00000001df48b2e0, 0x000000000c9ca2aa }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x00000000049c1c62, 0x0000000151682316 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x000000017c460c12, 0x0000000036fce78c }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x000000015be4da7e, 0x000000009037dc10 }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x000000010f38f668, 0x00000000d3298582 }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x0000000039f40a00, 0x00000001b42e8ad6 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x00000000bd4c10c4, 0x00000000142a9838 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x0000000042db1d98, 0x0000000109c7f190 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x00000001c905bae6, 0x0000000056ff9310 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000000069d40ea, 0x00000001594513aa }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x000000008e4fbad0, 0x00000001e3b5b1e8 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x0000000047bedd46, 0x000000011dd5fc08 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x0000000026396bf8, 0x00000001675f0cc2 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000379beb92, 0x00000000d1c8dd44 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x000000000abae54a, 0x0000000115ebd3d8 }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x0000000007e6a128, 0x00000001ecbd0dac }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x000000000ade29d2, 0x00000000cdf67af2 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x00000000f974c45c, 0x000000004c01ff4c }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000000e77ac60a, 0x00000000f2d8657e }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x0000000145895816, 0x000000006bae74c4 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000038e362be, 0x0000000152af8aa0 }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x000000007f991a64, 0x0000000004663802 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000000fa366d3a, 0x00000001ab2f5afc }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x00000001a2bb34f0, 0x0000000074a4ebd4 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x0000000028a9981e, 0x00000001d7ab3a4c }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001dbc672be, 0x00000001a8da60c6 }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x00000000b04d77f6, 0x000000013cf63820 }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x0000000124400d96, 0x00000000bec12e1e }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x000000014ca4b414, 0x00000001c6368010 }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x000000012fe2c938, 0x00000001e6e78758 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x00000001faed01e6, 0x000000008d7f2b3c }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x000000007e80ecfe, 0x000000016b4a156e }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x0000000098daee94, 0x00000001c63cfeb6 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000010a04edea, 0x000000015f902670 }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001c00b4524, 0x00000001cd5de11e }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x0000000170296550, 0x000000001acaec54 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x0000000181afaa48, 0x000000002bd0ca78 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x0000000185a31ffa, 0x0000000032d63d5c }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000002469f608, 0x000000001c6d4e4c }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x000000006980102a, 0x0000000106a60b92 }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x0000000111ea9ca8, 0x00000000d3855e12 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000001bd1d29ce, 0x00000000e3125636 }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x00000001b34b9580, 0x000000009e8f7ea4 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x000000003076054e, 0x00000001c82e562c }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x000000012a608ea4, 0x00000000ca9f09ce }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000784d05fe, 0x00000000c63764e6 }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x000000016ef0d82a, 0x0000000168d2e49e }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x0000000075bda454, 0x00000000e986c148 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x000000003dc0a1c4, 0x00000000cfb65894 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x00000000e9a5d8be, 0x0000000111cadee4 }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x00000001609bc4b4, 0x0000000171fb63ce } +#else /* __LITTLE_ENDIAN__ */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x00000000b6ca9e20, 0x000000009c37c408 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000000350249a8, 0x00000001b51df26c }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x00000001862dac54, 0x000000000724b9d0 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001d87fb48c, 0x00000001c00532fe }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000001f39b699e, 0x00000000f05a9362 }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x0000000101da11b4, 0x00000001e1007970 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000001cab571e0, 0x00000000a57366ee }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x00000000c7020cfe, 0x0000000192011284 }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x00000000cdaed1ae, 0x0000000162716d9a }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x00000001e804effc, 0x00000000cd97ecde }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x0000000077c3ea3a, 0x0000000058812bc0 }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x0000000068df31b4, 0x0000000088b8c12e }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000000b059b6c2, 0x00000001230b234c }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x0000000145fb8ed8, 0x00000001120b416e }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x00000000cbc09168, 0x00000001974aecb0 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000005ceeedc2, 0x000000008ee3f226 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x0000000047d74e86, 0x00000001089aba9a }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x00000001407e9e22, 0x0000000065113872 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x00000001da967bda, 0x000000005c07ec10 }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x000000006c898368, 0x0000000187590924 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000000f2d14c98, 0x00000000e35da7c6 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x00000001993c6ad4, 0x000000000415855a }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x000000014683d1ac, 0x0000000073617758 }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x00000001a7c93e6c, 0x0000000176021d28 }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x000000010211e90a, 0x00000001c358fd0a }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x000000001119403e, 0x00000001ff7a2c18 }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x000000001c3261aa, 0x00000000f2d9f7e4 }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x000000014e37a634, 0x000000016cf1f9c8 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x0000000073786c0c, 0x000000010af9279a }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x000000011dc037f8, 0x0000000004f101e8 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000031433dfc, 0x0000000070bcf184 }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000009cde8348, 0x000000000a8de642 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000038d3c2a6, 0x0000000062ea130c }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x000000011b25f260, 0x00000001eb31cbb2 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x000000001629e6f0, 0x0000000170783448 }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x0000000160838b4c, 0x00000001a684b4c6 }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x000000007a44011c, 0x00000000253ca5b4 }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x00000000226f417a, 0x0000000057b4b1e2 }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x0000000045eb2eb4, 0x00000000b6bd084c }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x000000014459d70c, 0x0000000123c2d592 }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000001d406ed82, 0x00000000159dafce }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000160c8e1a8, 0x0000000127e1a64e }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x0000000027ba8098, 0x0000000056860754 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x000000006d92d018, 0x00000001e661aae8 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x000000012ed7e3f2, 0x00000000f82c6166 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x000000002dc87788, 0x00000000c4f9c7ae }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000018240bb8, 0x0000000074203d20 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x000000001ad38158, 0x0000000198173052 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001396b78f2, 0x00000001ce8aba54 }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x000000011a681334, 0x00000001850d5d94 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x000000012104732e, 0x00000001d609239c }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x00000000a140d90c, 0x000000001595f048 }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x00000001b7215eda, 0x0000000042ccee08 }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x00000001aaf1df3c, 0x000000010a389d74 }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x0000000029d15b8a, 0x000000012a840da6 }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x00000000f1a96922, 0x000000001d181c0c }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x00000001ac80d03c, 0x0000000068b7d1f6 }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x000000000f11d56a, 0x000000005b0f14fc }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x00000001f1c022a2, 0x0000000179e9e730 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x0000000173d00ae2, 0x00000001ce1368d6 }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x00000001d4ffe4ac, 0x0000000112c3a84c }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x000000016edc5ae4, 0x00000000de940fee }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000001f1a02140, 0x00000000fe896b7e }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000000ca0b28a0, 0x00000001f797431c }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x00000001928e30a2, 0x0000000053e989ba }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x0000000097b1b002, 0x000000003920cd16 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000000b15bf906, 0x00000001e6f579b8 }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x00000000411c5d52, 0x000000007493cb0a }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x00000001c36f3300, 0x00000001bdd376d8 }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x00000001119227e0, 0x000000016badfee6 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x00000000114d4702, 0x0000000071de5c58 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000000458b5b98, 0x00000000453f317c }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x000000012e31fb8e, 0x0000000121675cce }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x000000005cf619d8, 0x00000001f409ee92 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x0000000063f4d8b2, 0x00000000f36b9c88 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x000000004138dc8a, 0x0000000036b398f4 }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001d29ee8e0, 0x00000001748f9adc }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x000000006a08ace8, 0x00000001be94ec00 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x0000000127d42010, 0x00000000b74370d6 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x0000000019d76b62, 0x00000001174d0b98 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000001b1471f6e, 0x00000000befc06a4 }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x00000001f64c19cc, 0x00000001ae125288 }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x00000000003c0ea0, 0x0000000095c19b34 }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x000000014d73abf6, 0x00000001a78496f2 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x00000001620eb844, 0x00000001ac5390a0 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x0000000147655048, 0x000000002a80ed6e }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x0000000067b5077e, 0x00000001fa9b0128 }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x0000000010ffe206, 0x00000001ea94929e }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x000000000fee8f1e, 0x0000000125f4305c }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001da26fbae, 0x00000001471e2002 }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x00000001b3a8bd88, 0x0000000132d2253a }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x00000000e8f3898e, 0x00000000f26b3592 }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x00000000b0d0d28c, 0x00000000bc8b67b0 }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x0000000030f2a798, 0x000000013a826ef2 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x000000000fba1002, 0x0000000081482c84 }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000bdb9bd72, 0x00000000e77307c2 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x0000000075d3bf5a, 0x00000000d4a07ec8 }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x00000000ef1f98a0, 0x0000000017102100 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000000689c7602, 0x00000000db406486 }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x000000016d5fa5fe, 0x0000000192db7f88 }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x00000001d0d2b9ca, 0x000000018bf67b1e }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x0000000041e7b470, 0x000000007c09163e }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x00000001cbb6495e, 0x000000000adac060 }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x000000010052a0b0, 0x00000000bd8316ae }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x00000001d8effb5c, 0x000000019f09ab54 }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x00000001d969853c, 0x0000000125155542 }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x00000000523ccce2, 0x000000018fdb5882 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x000000001e2436bc, 0x00000000e794b3f4 }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x00000000ddd1c3a2, 0x000000016f9bb022 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x0000000019fcfe38, 0x00000000290c9978 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x00000001ce95db64, 0x0000000083c0f350 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x00000000af582806, 0x0000000173ea6628 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000001006388f6, 0x00000001c8b4e00a }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x0000000179eca00a, 0x00000000de95d6aa }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x0000000122410a6a, 0x000000010b7f7248 }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x000000004288e87c, 0x00000001326e3a06 }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x000000016c5490da, 0x00000000bb62c2e6 }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x00000000d1c71f6e, 0x0000000156a4b2c2 }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x00000001b4ce08a6, 0x000000011dfe763a }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x00000001466ba60c, 0x000000007bcca8e2 }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x00000001f6c488a4, 0x0000000186118faa }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x000000013bfb0682, 0x0000000111a65a88 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x00000000690e9e54, 0x000000003565e1c4 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x00000000281346b6, 0x000000012ed02a82 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x0000000156464024, 0x00000000c486ecfc }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x000000016063a8dc, 0x0000000001b951b2 }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000116a66362, 0x0000000048143916 }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x000000017e8aa4d2, 0x00000001dc2ae124 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x00000001728eb10c, 0x00000001416c58d6 }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000001b08fd7fa, 0x00000000a479744a }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x00000001092a16e8, 0x0000000096ca3a26 }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000000a505637c, 0x00000000ff223d4e }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x00000000d94869b2, 0x000000010e84da42 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000001c8b203ae, 0x00000001b61ba3d0 }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x000000005704aea0, 0x00000000680f2de8 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x000000012e295fa2, 0x000000008772a9a8 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x000000011d0908bc, 0x0000000155f295bc }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x0000000193ed97ea, 0x00000000595f9282 }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x000000013a0f1c52, 0x0000000164b1c25a }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x000000010c2c40c0, 0x00000000fbd67c50 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x00000000ff6fac3e, 0x0000000096076268 }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x000000017b3609c0, 0x00000001d288e4cc }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x0000000088c8c922, 0x00000001eaac1bdc }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000001751baae6, 0x00000001f1ea39e2 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x0000000107952972, 0x00000001eb6506fc }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x0000000162b00abe, 0x000000010f806ffe }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000000d7b404c, 0x000000010408481e }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x00000000763b13d4, 0x0000000188260534 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x00000000f6dc22d8, 0x0000000058fc73e0 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x000000007daae060, 0x00000000391c59b8 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000013359ab7c, 0x000000018b638400 }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x000000008add438a, 0x000000011738f5c4 }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x00000001edbefdea, 0x000000008cf7c6da }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x000000004104e0f8, 0x00000001ef97fb16 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x00000000b48a8222, 0x0000000102130e20 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000001bcb46844, 0x00000000db968898 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x000000013293ce0a, 0x00000000b5047b5e }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x00000001710d0844, 0x000000010b90fdb2 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x0000000117907f6e, 0x000000004834a32e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000087ddf93e, 0x0000000059c8f2b0 }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x000000005970e9b0, 0x0000000122cec508 }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x0000000185b2b7d0, 0x000000000a330cda }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x00000001dcee0efc, 0x000000014a47148c }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000030da2722, 0x0000000042c61cb8 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x000000012f925a18, 0x0000000012fe6960 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x00000000dd2e357c, 0x00000000dbda2c20 }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x00000000071c80de, 0x000000011122410c }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x000000011513140a, 0x00000000977b2070 }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x00000001df876e8e, 0x000000014050438e }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x000000015f81d6ce, 0x0000000147c840e8 }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x000000019dd94dbe, 0x00000001cc7c88ce }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000001373d206e, 0x00000001476b35a4 }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x00000000668ccade, 0x000000013d52d508 }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x00000001b192d268, 0x000000008e4be32e }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000e30f3a78, 0x00000000024120fe }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x000000010ef1f7bc, 0x00000000ddecddb4 }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000001f5ac7380, 0x00000000d4d403bc }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x000000011822ea70, 0x00000001734b89aa }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x00000000c3a33848, 0x000000010e7a58d6 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x00000001bd151c24, 0x00000001f9f04e9c }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x0000000056002d76, 0x00000000b692225e }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000014657c4f4, 0x000000019b8d3f3e }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x0000000113742d7c, 0x00000001a874f11e }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000019c5920ba, 0x000000010d5a4254 }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x000000005216d2d6, 0x00000000bbb2f5d6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x0000000136f5ad8a, 0x0000000179cc0e36 }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x000000018b07beb6, 0x00000001dca1da4a }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x00000000db1e93b0, 0x00000000feb1a192 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x000000000b96fa3a, 0x00000000d1eeedd6 }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x00000001d9968af0, 0x000000008fad9bb4 }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x000000000e4a77a2, 0x00000001884938e4 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000000508c2ac8, 0x00000001bc2e9bc0 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x0000000021572a80, 0x00000001f9658a68 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x00000001b859daf2, 0x000000001b9224fc }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x000000016f788474, 0x0000000055b2fb84 }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x00000001b438810e, 0x000000018b090348 }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x0000000095ddc6f2, 0x000000011ccbd5ea }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x00000001d977c20c, 0x0000000007ae47f8 }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x00000000ebedb99a, 0x0000000172acbec0 }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001df9e9e92, 0x00000001c6e3ff20 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000001a4a3f952, 0x00000000e1b38744 }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x00000000e2f51220, 0x00000000791585b2 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x000000004aa01f3e, 0x00000000ac53b894 }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000000b3e90a58, 0x00000001ed5f2cf4 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x000000000c9ca2aa, 0x00000001df48b2e0 }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x0000000151682316, 0x00000000049c1c62 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x0000000036fce78c, 0x000000017c460c12 }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x000000009037dc10, 0x000000015be4da7e }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x00000000d3298582, 0x000000010f38f668 }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x00000001b42e8ad6, 0x0000000039f40a00 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x00000000142a9838, 0x00000000bd4c10c4 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x0000000109c7f190, 0x0000000042db1d98 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x0000000056ff9310, 0x00000001c905bae6 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000001594513aa, 0x00000000069d40ea }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x00000001e3b5b1e8, 0x000000008e4fbad0 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x000000011dd5fc08, 0x0000000047bedd46 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x00000001675f0cc2, 0x0000000026396bf8 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000d1c8dd44, 0x00000000379beb92 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x0000000115ebd3d8, 0x000000000abae54a }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x00000001ecbd0dac, 0x0000000007e6a128 }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x00000000cdf67af2, 0x000000000ade29d2 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x000000004c01ff4c, 0x00000000f974c45c }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000000f2d8657e, 0x00000000e77ac60a }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x000000006bae74c4, 0x0000000145895816 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000152af8aa0, 0x0000000038e362be }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x0000000004663802, 0x000000007f991a64 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000001ab2f5afc, 0x00000000fa366d3a }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x0000000074a4ebd4, 0x00000001a2bb34f0 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x00000001d7ab3a4c, 0x0000000028a9981e }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001a8da60c6, 0x00000001dbc672be }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x000000013cf63820, 0x00000000b04d77f6 }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x00000000bec12e1e, 0x0000000124400d96 }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x00000001c6368010, 0x000000014ca4b414 }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x00000001e6e78758, 0x000000012fe2c938 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x000000008d7f2b3c, 0x00000001faed01e6 }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x000000016b4a156e, 0x000000007e80ecfe }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x00000001c63cfeb6, 0x0000000098daee94 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000015f902670, 0x000000010a04edea }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001cd5de11e, 0x00000001c00b4524 }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x000000001acaec54, 0x0000000170296550 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x000000002bd0ca78, 0x0000000181afaa48 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x0000000032d63d5c, 0x0000000185a31ffa }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000001c6d4e4c, 0x000000002469f608 }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x0000000106a60b92, 0x000000006980102a }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x00000000d3855e12, 0x0000000111ea9ca8 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000000e3125636, 0x00000001bd1d29ce }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x000000009e8f7ea4, 0x00000001b34b9580 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x00000001c82e562c, 0x000000003076054e }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x00000000ca9f09ce, 0x000000012a608ea4 }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000c63764e6, 0x00000000784d05fe }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x0000000168d2e49e, 0x000000016ef0d82a }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x00000000e986c148, 0x0000000075bda454 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x00000000cfb65894, 0x000000003dc0a1c4 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x0000000111cadee4, 0x00000000e9a5d8be }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x0000000171fb63ce, 0x00000001609bc4b4 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + +static const __vector unsigned long long vcrc_short_const[16] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x5cf015c388e56f72, 0x7fec2963e5bf8048 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0x963a18920246e2e6, 0x38e888d4844752a9 }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x419a441956993a31, 0x42316c00730206ad }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0x924752ba2b830011, 0x543d5c543e65ddf9 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x55bd7f9518e4a304, 0x78e87aaf56767c92 }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x6d76739fe0553f1e, 0x8f68fcec1903da7f }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0xc133722b1fe0b5c3, 0x3f4840246791d588 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x64b67ee0e55ef1f3, 0x34c96751b04de25a }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0x069db049b8fdb1e7, 0x156c8e180b4a395b }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xa11bfaf3c9e90b9e, 0xe0b99ccbe661f7be }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0x817cdc5119b29a35, 0x041d37768cd75659 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x1ce9d94b36c41f1c, 0x3a0777818cfaa965 }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x4f256efcb82be955, 0x0e148e8252377a55 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0xec1631edb2dea967, 0x9c25531d19e65dde }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0x5d27e147510ac59a, 0x790606ff9957c0a6 }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0xa66805eb18b8ea18, 0x82f63b786ea2d55c } +#else /* __LITTLE_ENDIAN__ */ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x7fec2963e5bf8048, 0x5cf015c388e56f72 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0x38e888d4844752a9, 0x963a18920246e2e6 }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x42316c00730206ad, 0x419a441956993a31 }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0x543d5c543e65ddf9, 0x924752ba2b830011 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x78e87aaf56767c92, 0x55bd7f9518e4a304 }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x8f68fcec1903da7f, 0x6d76739fe0553f1e }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0x3f4840246791d588, 0xc133722b1fe0b5c3 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x34c96751b04de25a, 0x64b67ee0e55ef1f3 }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0x156c8e180b4a395b, 0x069db049b8fdb1e7 }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xe0b99ccbe661f7be, 0xa11bfaf3c9e90b9e }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0x041d37768cd75659, 0x817cdc5119b29a35 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x3a0777818cfaa965, 0x1ce9d94b36c41f1c }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x0e148e8252377a55, 0x4f256efcb82be955 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0x9c25531d19e65dde, 0xec1631edb2dea967 }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0x790606ff9957c0a6, 0x5d27e147510ac59a }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0x82f63b786ea2d55c, 0xa66805eb18b8ea18 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Barrett constants */ +/* 33 bit reflected Barrett constant m - (4^32)/n */ + +static const __vector unsigned long long v_Barrett_const[2] + __attribute__((aligned (16))) = { + /* x^64 div p(x) */ +#ifdef __LITTLE_ENDIAN__ + { 0x00000000dea713f1, 0x0000000000000000 }, + { 0x0000000105ec76f1, 0x0000000000000000 } +#else /* __LITTLE_ENDIAN__ */ + { 0x0000000000000000, 0x00000000dea713f1 }, + { 0x0000000000000000, 0x0000000105ec76f1 } +#endif /* __LITTLE_ENDIAN__ */ + }; +#endif /* POWER8_INTRINSICS */ + +#endif /* __ASSEMBLER__ */ diff --git a/mysys/crc32ieee.cc b/mysys/crc32ieee.cc new file mode 100644 index 00000000..14e8017d --- /dev/null +++ b/mysys/crc32ieee.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2020, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include +#include +#include + +/* TODO: remove this once zlib adds inherent support for hardware accelerated +crc32 for all architectures. */ +static unsigned int my_crc32_zlib(unsigned int crc, const void *data, + size_t len) +{ + return (unsigned int) crc32(crc, (const Bytef *)data, (unsigned int) len); +} + +#ifdef HAVE_PCLMUL +extern "C" int crc32_pclmul_enabled(); +extern "C" unsigned int crc32_pclmul(unsigned int, const void *, size_t); +#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC) +extern "C" int crc32_aarch64_available(); +extern "C" unsigned int crc32_aarch64(unsigned int, const void *, size_t); +#endif + + +typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t); + +static my_crc32_t init_crc32() +{ +#ifdef HAVE_PCLMUL + if (crc32_pclmul_enabled()) + return crc32_pclmul; +#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC) + if (crc32_aarch64_available()) + return crc32_aarch64; +#endif + return my_crc32_zlib; +} + +static const my_crc32_t my_checksum_func= init_crc32(); + +#ifdef __powerpc64__ +# error "my_checksum() is defined in mysys/crc32/crc32_ppc64.c" +#endif +extern "C" +uint32 my_checksum(uint32 crc, const void *data, size_t len) +{ + return my_checksum_func(crc, data, len); +} diff --git a/mysys/errors.c b/mysys/errors.c new file mode 100644 index 00000000..d88540fe --- /dev/null +++ b/mysys/errors.c @@ -0,0 +1,132 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2009, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" + +#ifndef SHARED_LIBRARY + +const char *globerrs[GLOBERRS]= +{ + "Can't create/write to file '%s' (Errcode: %M)", + "Error reading file '%s' (Errcode: %M)", + "Error writing file '%s' (Errcode: %M)", + "Error on close of '%s' (Errcode: %M)", + "Out of memory (Needed %u bytes)", + "Error on delete of '%s' (Errcode: %M)", + "Error on rename of '%s' to '%s' (Errcode: %M)", + "", + "Unexpected end-of-file found when reading file '%s' (Errcode: %M)", + "Can't lock file (Errcode: %M)", + "Can't unlock file (Errcode: %M)", + "Can't read dir of '%s' (Errcode: %M)", + "Can't get stat of '%s' (Errcode: %M)", + "Can't change size of file (Errcode: %M)", + "Can't open stream from handle (Errcode: %M)", + "Can't get working directory (Errcode: %M)", + "Can't change dir to '%s' (Errcode: %M)", + "Warning: '%s' had %d links", + "Warning: %d files and %d streams is left open\n", + "Disk is full writing '%s' (Errcode: %M). Waiting for someone to free space... (Expect up to %d secs delay for server to continue after freeing disk space)", + "Can't create directory '%s' (Errcode: %M)", + "Character set '%s' is not a compiled character set and is not specified in the '%s' file", + "Out of resources when opening file '%s' (Errcode: %M)", + "Can't read value for symlink '%s' (Errcode: %M)", + "Can't create symlink '%s' pointing at '%s' (Errcode: %M)", + "Error on realpath() on '%s' (Errcode: %M)", + "Can't sync file '%s' to disk (Errcode: %M)", + "Collation '%s' is not a compiled collation and is not specified in the '%s' file", + "File '%s' not found (Errcode: %M)", + "File '%s' (fileno: %d) was not closed", + "Can't change ownership of the file '%s' (Errcode: %M)", + "Can't change permissions of the file '%s' (Errcode: %M)", + "Can't seek in file '%s' (Errcode: %M)", + "Can't change mode for file '%s' to 0x%lx (Errcode: %M)", + "Warning: Can't copy ownership for file '%s' (Errcode: %M)", + "Failed to release memory pointer %p, %zu bytes (Errcode: %M)", + "Lock Pages in memory access rights required", + "Memcntl %s cmd %s error", + "Warning: Charset id '%d' csname '%s' trying to replace existing csname '%s'", +}; + +void init_glob_errs(void) +{ + /* This is now done statically. */ +} + +#else + +void init_glob_errs() +{ + EE(EE_CANTCREATEFILE) = "Can't create/write to file '%s' (Errcode: %M)"; + EE(EE_READ) = "Error reading file '%s' (Errcode: %M)"; + EE(EE_WRITE) = "Error writing file '%s' (Errcode: %M)"; + EE(EE_BADCLOSE) = "Error on close of '%'s (Errcode: %M)"; + EE(EE_OUTOFMEMORY) = "Out of memory (Needed %u bytes)"; + EE(EE_DELETE) = "Error on delete of '%s' (Errcode: %M)"; + EE(EE_LINK) = "Error on rename of '%s' to '%s' (Errcode: %M)"; + EE(EE_EOFERR) = "Unexpected eof found when reading file '%s' (Errcode: %M)"; + EE(EE_CANTLOCK) = "Can't lock file (Errcode: %M)"; + EE(EE_CANTUNLOCK) = "Can't unlock file (Errcode: %M)"; + EE(EE_DIR) = "Can't read dir of '%s' (Errcode: %M)"; + EE(EE_STAT) = "Can't get stat of '%s' (Errcode: %M)"; + EE(EE_CANT_CHSIZE) = "Can't change size of file (Errcode: %M)"; + EE(EE_CANT_OPEN_STREAM)= "Can't open stream from handle (Errcode: %M)"; + EE(EE_GETWD) = "Can't get working directory (Errcode: %M)"; + EE(EE_SETWD) = "Can't change dir to '%s' (Errcode: %M)"; + EE(EE_LINK_WARNING) = "Warning: '%s' had %d links"; + EE(EE_OPEN_WARNING) = "Warning: %d files and %d streams is left open\n"; + EE(EE_DISK_FULL) = "Disk is full writing '%s' (Errcode: %M). Waiting for someone to free space... (Expect up to %d secs delay for server to continue after freeing disk space)", + EE(EE_CANT_MKDIR) ="Can't create directory '%s' (Errcode: %M)"; + EE(EE_UNKNOWN_CHARSET)= "Character set '%s' is not a compiled character set and is not specified in the %s file"; + EE(EE_OUT_OF_FILERESOURCES)="Out of resources when opening file '%s' (Errcode: %M)"; + EE(EE_CANT_READLINK)= "Can't read value for symlink '%s' (Errcode: %M)"; + EE(EE_CANT_SYMLINK)= "Can't create symlink '%s' pointing at '%s' (Errcode: %M)"; + EE(EE_REALPATH)= "Error on realpath() on '%s' (Errcode: %M)"; + EE(EE_SYNC)= "Can't sync file '%s' to disk (Errcode: %M)"; + EE(EE_UNKNOWN_COLLATION)= "Collation '%s' is not a compiled collation and is not specified in the %s file"; + EE(EE_FILENOTFOUND) = "File '%s' not found (Errcode: %M)"; + EE(EE_FILE_NOT_CLOSED) = "File '%s' (fileno: %d) was not closed"; + EE(EE_CHANGE_OWNERSHIP) = "Can't change ownership of the file '%s' (Errcode: %M)"; + EE(EE_CHANGE_PERMISSIONS) = "Can't change permissions of the file '%s' (Errcode: %M)"; + EE(EE_CANT_SEEK) = "Can't seek in file '%s' (Errcode: %M)"; + EE(EE_CANT_CHMOD) = "Can't change mode for file '%s' to 0x%lx (Errcode: %M)"; + EE(EE_CANT_COPY_OWNERSHIP)= "Warning: Can't copy ownership for file '%s' (Errcode: %M)"; + EE(EE_BADMEMORYRELEASE)= "Failed to release memory pointer %p, %zu bytes (Errcode: %M)"; + EE(EE_PERM_LOCK_MEMORY)= "Lock Pages in memory access rights required"; + EE(EE_MEMCNTL) = "Memcntl %s cmd %s error"; + EE(EE_DUPLICATE_CHARSET)= "Warning: Charset id %d trying to replace csname %s with %s"; +} +#endif + +void wait_for_free_space(const char *filename, int errors) +{ + if (errors == 0) + my_error(EE_DISK_FULL,MYF(ME_BELL | ME_ERROR_LOG | ME_WARNING), + filename,my_errno,MY_WAIT_FOR_USER_TO_FIX_PANIC); + if (!(errors % MY_WAIT_GIVE_USER_A_MESSAGE)) + my_printf_error(EE_DISK_FULL, + "Retry in %d secs. Message reprinted in %d secs", + MYF(ME_BELL | ME_ERROR_LOG | ME_WARNING), + MY_WAIT_FOR_USER_TO_FIX_PANIC, + MY_WAIT_GIVE_USER_A_MESSAGE * MY_WAIT_FOR_USER_TO_FIX_PANIC ); + (void) sleep(MY_WAIT_FOR_USER_TO_FIX_PANIC); +} + +const char **get_global_errmsgs(int nr __attribute__((unused))) +{ + return globerrs; +} diff --git a/mysys/file_logger.c b/mysys/file_logger.c new file mode 100644 index 00000000..a753c049 --- /dev/null +++ b/mysys/file_logger.c @@ -0,0 +1,248 @@ +/* Copyright (C) 2012 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef FLOGGER_SKIP_INCLUDES +#include "my_global.h" +#include +#include +#include +#include +#endif /*FLOGGER_SKIP_INCLUDES*/ + +#ifndef flogger_mutex_init +#define flogger_mutex_init(A,B,C) mysql_mutex_init(A,B,C) +#define flogger_mutex_destroy(A) mysql_mutex_destroy(A) +#define flogger_mutex_lock(A) mysql_mutex_lock(A) +#define flogger_mutex_unlock(A) mysql_mutex_unlock(A) +#endif /*flogger_mutex_init*/ + +#ifdef HAVE_PSI_INTERFACE +/* These belong to the service initialization */ +static PSI_mutex_key key_LOCK_logger_service; +static PSI_mutex_info mutex_list[]= +{{ &key_LOCK_logger_service, "logger_service_file_st::lock", PSI_FLAG_GLOBAL}}; +#endif + +typedef struct logger_handle_st { + File file; + char path[FN_REFLEN]; + unsigned long long size_limit; + unsigned int rotations; + size_t path_len; + mysql_mutex_t lock; +} LSFS; + + +#define LOG_FLAGS (O_APPEND | O_CREAT | O_WRONLY) + +static unsigned int n_dig(unsigned int i) +{ + return (i == 0) ? 0 : ((i < 10) ? 1 : ((i < 100) ? 2 : 3)); +} + + +LOGGER_HANDLE *logger_open(const char *path, + unsigned long long size_limit, + unsigned int rotations) +{ + LOGGER_HANDLE new_log, *l_perm; + /* + I don't think we ever need more rotations, + but if it's so, the rotation procedure should be adapted to it. + */ + if (rotations > 999) + return 0; + + new_log.rotations= rotations; + new_log.size_limit= size_limit; + new_log.path_len= strlen(fn_format(new_log.path, path, + mysql_data_home, "", MY_UNPACK_FILENAME)); + + if (new_log.path_len+n_dig(rotations)+1 > FN_REFLEN) + { + errno= ENAMETOOLONG; + /* File path too long */ + return 0; + } + if ((new_log.file= my_open(new_log.path, LOG_FLAGS, MYF(0))) < 0) + { + errno= my_errno; + /* Check errno for the cause */ + return 0; + } + + if (!(l_perm= (LOGGER_HANDLE *) my_malloc(PSI_INSTRUMENT_ME, + sizeof(LOGGER_HANDLE), MYF(0)))) + { + my_close(new_log.file, MYF(0)); + new_log.file= -1; + return 0; /* End of memory */ + } + *l_perm= new_log; + flogger_mutex_init(key_LOCK_logger_service, &l_perm->lock, + MY_MUTEX_INIT_FAST); + return l_perm; +} + +int logger_close(LOGGER_HANDLE *log) +{ + int result; + File file= log->file; + flogger_mutex_destroy(&log->lock); + my_free(log); + if ((result= my_close(file, MYF(0)))) + errno= my_errno; + return result; +} + + +static char *logname(LOGGER_HANDLE *log, char *buf, unsigned int n_log) +{ + sprintf(buf+log->path_len, ".%0*u", n_dig(log->rotations), n_log); + return buf; +} + + +static int do_rotate(LOGGER_HANDLE *log) +{ + char namebuf[FN_REFLEN]; + int result; + unsigned int i; + char *buf_old, *buf_new, *tmp; + + if (log->rotations == 0) + return 0; + + memcpy(namebuf, log->path, log->path_len); + + buf_new= logname(log, namebuf, log->rotations); + buf_old= log->path; + for (i=log->rotations-1; i>0; i--) + { + logname(log, buf_old, i); + if (!access(buf_old, F_OK) && + (result= my_rename(buf_old, buf_new, MYF(0)))) + goto exit; + tmp= buf_old; + buf_old= buf_new; + buf_new= tmp; + } + if ((result= my_close(log->file, MYF(0)))) + goto exit; + namebuf[log->path_len]= 0; + result= my_rename(namebuf, logname(log, log->path, 1), MYF(0)); + log->file= my_open(namebuf, LOG_FLAGS, MYF(0)); +exit: + errno= my_errno; + return log->file < 0 || result; +} + + +/* + Return 1 if we should rotate the log +*/ + +my_bool logger_time_to_rotate(LOGGER_HANDLE *log) +{ + my_off_t filesize; + if (log->rotations > 0 && + (filesize= my_tell(log->file, MYF(0))) != (my_off_t) -1 && + ((ulonglong) filesize >= log->size_limit)) + return 1; + return 0; +} + + +int logger_vprintf(LOGGER_HANDLE *log, const char* fmt, va_list ap) +{ + int result; + char cvtbuf[1024]; + size_t n_bytes; + + flogger_mutex_lock(&log->lock); + if (logger_time_to_rotate(log) && do_rotate(log)) + { + result= -1; + errno= my_errno; + goto exit; /* Log rotation needed but failed */ + } + + n_bytes= my_vsnprintf(cvtbuf, sizeof(cvtbuf), fmt, ap); + if (n_bytes >= sizeof(cvtbuf)) + n_bytes= sizeof(cvtbuf) - 1; + + result= (int)my_write(log->file, (uchar *) cvtbuf, n_bytes, MYF(0)); + +exit: + flogger_mutex_unlock(&log->lock); + return result; +} + + +static int logger_write_r(LOGGER_HANDLE *log, my_bool allow_rotations, + const char *buffer, size_t size) +{ + int result; + + flogger_mutex_lock(&log->lock); + if (allow_rotations && logger_time_to_rotate(log) && do_rotate(log)) + { + result= -1; + errno= my_errno; + goto exit; /* Log rotation needed but failed */ + } + + result= (int)my_write(log->file, (uchar *) buffer, size, MYF(0)); + +exit: + flogger_mutex_unlock(&log->lock); + return result; +} + + +int logger_write(LOGGER_HANDLE *log, const char *buffer, size_t size) +{ + return logger_write_r(log, TRUE, buffer, size); +} + +int logger_rotate(LOGGER_HANDLE *log) +{ + int result; + flogger_mutex_lock(&log->lock); + result= do_rotate(log); + flogger_mutex_unlock(&log->lock); + return result; +} + + +int logger_printf(LOGGER_HANDLE *log, const char *fmt, ...) +{ + int result; + va_list args; + va_start(args,fmt); + result= logger_vprintf(log, fmt, args); + va_end(args); + return result; +} + +void logger_init_mutexes() +{ +#ifdef HAVE_PSI_INTERFACE + if (unlikely(PSI_server)) + PSI_server->register_mutex("sql_logger", mutex_list, 1); +#endif +} + diff --git a/mysys/get_password.c b/mysys/get_password.c new file mode 100644 index 00000000..bf474912 --- /dev/null +++ b/mysys/get_password.c @@ -0,0 +1,230 @@ +/* Copyright (c) 2000, 2001, 2003, 2006, 2008 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* +** Ask for a password from tty +** This is an own file to avoid conflicts with curses +*/ +#include +#include +#include "mysql.h" +#include +#include + +#ifdef HAVE_GETPASS +#ifdef HAVE_PWD_H +#include +#endif /* HAVE_PWD_H */ +#else /* ! HAVE_GETPASS */ +#ifndef _WIN32 +#include +#ifdef HAVE_TERMIOS_H /* For tty-password */ +#include +#define TERMIO struct termios +#else +#ifdef HAVE_TERMIO_H /* For tty-password */ +#include +#define TERMIO struct termio +#else +#include +#define TERMIO struct sgttyb +#endif +#endif +#ifdef alpha_linux_port +#include /* QQ; Fix this in configure */ +#include +#endif +#else +#include +#endif /* _WIN32 */ +#endif /* HAVE_GETPASS */ + +#ifdef HAVE_GETPASSPHRASE /* For Solaris */ +#define getpass(A) getpassphrase(A) +#endif + +#ifdef _WIN32 +/* were just going to fake it here and get input from + the keyboard */ + +char *my_get_tty_password(const char *opt_message) +{ + wchar_t wbuf[80]; + char *to; + int to_len; + UINT cp; + wchar_t *pos=wbuf,*end=wbuf + array_elements(wbuf)-1; + DBUG_ENTER("my_get_tty_password"); + _cputs(opt_message ? opt_message : "Enter password: "); + for (;;) + { + int wc; + wc=_getwch(); + if (wc == '\b' || wc == 127) + { + if (pos != wbuf) + { + _cputs("\b \b"); + pos--; + continue; + } + } + if (wc == '\n' || wc == '\r' || wc == 3 || pos == end) + break; + if (iswcntrl(wc)) + continue; + + /* Do not print '*' for half-unicode char(high surrogate)*/ + if (wc < 0xD800 || wc > 0xDBFF) + { + _cputs("*"); + } + *(pos++)= (wchar_t)wc; + } + *pos=0; + _cputs("\n"); + + /* + Allocate output string, and convert UTF16 password to output codepage. + */ + cp= GetACP() == CP_UTF8 ? CP_UTF8 : GetConsoleCP(); + + if (!(to_len= WideCharToMultiByte(cp, 0, wbuf, -1, NULL, 0, NULL, NULL))) + DBUG_RETURN(NULL); + + if (!(to= my_malloc(PSI_INSTRUMENT_ME, to_len, MYF(MY_FAE)))) + DBUG_RETURN(NULL); + + if (!WideCharToMultiByte(cp, 0, wbuf, -1, to, to_len, NULL, NULL)) + { + my_free(to); + DBUG_RETURN(NULL); + } + DBUG_RETURN(to); +} + +#else + + +#ifndef HAVE_GETPASS +/* +** Can't use fgets, because readline will get confused +** length is max number of chars in to, not counting \0 +* to will not include the eol characters. +*/ + +static void get_password(char *to,uint length,int fd, my_bool echo) +{ + char *pos=to,*end=to+length; + + for (;;) + { + uchar tmp; + if (my_read(fd,&tmp,1,MYF(0)) != 1) + break; + if (tmp == '\b' || (int) tmp == 127) + { + if (pos != to) + { + if (echo) + { + fputs("\b \b",stderr); + fflush(stderr); + } + pos--; + continue; + } + } + if (tmp == '\n' || tmp == '\r' || tmp == 3) + break; + if (iscntrl(tmp) || pos == end) + continue; + if (echo) + { + fputc('*',stderr); + fflush(stderr); + } + *(pos++)= (char) tmp; + } + while (pos != to && isspace(pos[-1]) == ' ') + pos--; /* Allow dummy space at end */ + *pos=0; + return; +} + +#endif /* ! HAVE_GETPASS */ + + +char *my_get_tty_password(const char *opt_message) +{ +#ifdef HAVE_GETPASS + char *passbuff; +#else /* ! HAVE_GETPASS */ + TERMIO org,tmp; +#endif /* HAVE_GETPASS */ + char buff[80]; + + DBUG_ENTER("my_get_tty_password"); + +#ifdef HAVE_GETPASS + passbuff = getpass(opt_message ? opt_message : "Enter password: "); + + /* copy the password to buff and clear original (static) buffer */ + strnmov(buff, passbuff, sizeof(buff) - 1); +#ifdef _PASSWORD_LEN + memset(passbuff, 0, _PASSWORD_LEN); +#endif +#else + if (isatty(fileno(stderr))) + { + fputs(opt_message ? opt_message : "Enter password: ",stderr); + fflush(stderr); + } +#if defined(HAVE_TERMIOS_H) + tcgetattr(fileno(stdin), &org); + tmp = org; + tmp.c_lflag &= ~(ECHO | ISIG | ICANON); + tmp.c_cc[VMIN] = 1; + tmp.c_cc[VTIME] = 0; + tcsetattr(fileno(stdin), TCSADRAIN, &tmp); + get_password(buff, sizeof(buff)-1, fileno(stdin), isatty(fileno(stderr))); + tcsetattr(fileno(stdin), TCSADRAIN, &org); +#elif defined(HAVE_TERMIO_H) + ioctl(fileno(stdin), (int) TCGETA, &org); + tmp=org; + tmp.c_lflag &= ~(ECHO | ISIG | ICANON); + tmp.c_cc[VMIN] = 1; + tmp.c_cc[VTIME]= 0; + ioctl(fileno(stdin),(int) TCSETA, &tmp); + get_password(buff,sizeof(buff)-1,fileno(stdin),isatty(fileno(stderr))); + ioctl(fileno(stdin),(int) TCSETA, &org); +#else + gtty(fileno(stdin), &org); + tmp=org; + tmp.sg_flags &= ~ECHO; + tmp.sg_flags |= RAW; + stty(fileno(stdin), &tmp); + get_password(buff,sizeof(buff)-1,fileno(stdin),isatty(fileno(stderr))); + stty(fileno(stdin), &org); +#endif + if (isatty(fileno(stderr))) + fputc('\n',stderr); +#endif /* HAVE_GETPASS */ + + DBUG_RETURN(my_strdup(PSI_INSTRUMENT_ME, buff, MYF(MY_FAE))); +} + +#endif /*_WIN32*/ diff --git a/mysys/guess_malloc_library.c b/mysys/guess_malloc_library.c new file mode 100644 index 00000000..ed86ae06 --- /dev/null +++ b/mysys/guess_malloc_library.c @@ -0,0 +1,65 @@ +/* Copyright (c) 2002, 2015, Oracle and/or its affiliates. + Copyright (c) 2012, 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* guess_malloc_library() deduces, to the best of its ability, + the currently used malloc library and its version */ + +#include +#include "my_global.h" +#include + +typedef const char* (*tc_version_type)(int*, int*, const char**); +typedef int (*mallctl_type)(const char*, void*, size_t*, void*, size_t); + +char *guess_malloc_library() +{ + tc_version_type tc_version_func; + mallctl_type mallctl_func; +#ifndef HAVE_DLOPEN + return (char*) MALLOC_LIBRARY; +#else + static char buf[128]; + + if (strcmp(MALLOC_LIBRARY, "system") != 0) + { + return (char*) MALLOC_LIBRARY; + } + + /* tcmalloc */ + tc_version_func= (tc_version_type) dlsym(RTLD_DEFAULT, "tc_version"); + if (tc_version_func) + { + int major, minor; + const char* ver_str = tc_version_func(&major, &minor, NULL); + strxnmov(buf, sizeof(buf)-1, "tcmalloc ", ver_str, NULL); + return buf; + } + + /* jemalloc */ + mallctl_func= (mallctl_type) dlsym(RTLD_DEFAULT, "mallctl"); + if (mallctl_func) + { + char *ver; + size_t len = sizeof(ver); + mallctl_func("version", &ver, &len, NULL, 0); + strxnmov(buf, sizeof(buf)-1, "jemalloc ", ver, NULL); + return buf; + } + + return (char*) MALLOC_LIBRARY; +#endif +} + diff --git a/mysys/hash.c b/mysys/hash.c new file mode 100644 index 00000000..fccd4a24 --- /dev/null +++ b/mysys/hash.c @@ -0,0 +1,947 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. + Copyright (c) 2011, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* The hash functions used for saveing keys */ +/* One of key_length or key_length_offset must be given */ +/* Key length of 0 isn't allowed */ + +#include "mysys_priv.h" +#include +#include +#include "hash.h" + +#define NO_RECORD ~((my_hash_value_type) 0) +#define LOWFIND 1 +#define LOWUSED 2 +#define HIGHFIND 4 +#define HIGHUSED 8 + +typedef struct st_hash_info { + uint32 next; /* index to next key */ + my_hash_value_type hash_nr; + uchar *data; /* data for current entry */ +} HASH_LINK; + +static uint my_hash_mask(my_hash_value_type hashnr, + size_t buffmax, size_t maxlength); +static void movelink(HASH_LINK *array,uint pos,uint next_link,uint newlink); +static int hashcmp(const HASH *hash, HASH_LINK *pos, const uchar *key, + size_t length); + +my_hash_value_type my_hash_sort(CHARSET_INFO *cs, const uchar *key, + size_t length) +{ + ulong nr1= 1, nr2= 4; + my_ci_hash_sort(cs, (uchar*) key, length, &nr1, &nr2); + return (my_hash_value_type) nr1; +} + +/** + @brief Initialize the hash + + @details + + Initialize the hash, by defining and giving valid values for + its elements. The failure to allocate memory for the + hash->array element will not result in a fatal failure. The + dynamic array that is part of the hash will allocate memory + as required during insertion. + + @param[in] psi_key The key to register instrumented memory + @param[in,out] hash The hash that is initialized + @param[in] growth_size size incrememnt for the underlying dynarray + @param[in] charset The character set information + @param[in] size The hash size + @param[in] key_offest The key offset for the hash + @param[in] key_length The length of the key used in + the hash + @param[in] get_key get the key for the hash + @param[in] free_element pointer to the function that + does cleanup + @param[in] flags flags set in the hash + @return indicates success or failure of initialization + @retval 0 success + @retval 1 failure +*/ +my_bool +my_hash_init2(PSI_memory_key psi_key, HASH *hash, size_t growth_size, + CHARSET_INFO *charset, size_t size, size_t key_offset, + size_t key_length, my_hash_get_key get_key, + my_hash_function hash_function, + void (*free_element)(void*), uint flags) +{ + my_bool res; + DBUG_ENTER("my_hash_init2"); + DBUG_PRINT("enter",("hash:%p size: %u", hash, (uint) size)); + + hash->records=0; + hash->key_offset=key_offset; + hash->key_length=key_length; + hash->blength=1; + hash->get_key=get_key; + hash->hash_function= hash_function ? hash_function : my_hash_sort; + hash->free=free_element; + hash->flags=flags; + hash->charset=charset; + res= init_dynamic_array2(psi_key, &hash->array, sizeof(HASH_LINK), NULL, size, + growth_size, MYF((flags & HASH_THREAD_SPECIFIC ? + MY_THREAD_SPECIFIC : 0))); + DBUG_RETURN(res); +} + + +/* + Call hash->free on all elements in hash. + + SYNOPSIS + my_hash_free_elements() + hash hash table + + NOTES: + Sets records to 0 +*/ + +static inline void my_hash_free_elements(HASH *hash) +{ + uint records= hash->records; + if (records == 0) + return; + + /* + Set records to 0 early to guard against anyone looking at the structure + during the free process + */ + hash->records= 0; + + if (hash->free) + { + HASH_LINK *data=dynamic_element(&hash->array,0,HASH_LINK*); + HASH_LINK *end= data + records; + do + { + (*hash->free)((data++)->data); + } while (data < end); + } +} + + +/* + Free memory used by hash. + + SYNOPSIS + my_hash_free() + hash the hash to delete elements of + + NOTES: Hash can't be reused without calling my_hash_init again. +*/ + +void my_hash_free(HASH *hash) +{ + DBUG_ENTER("my_hash_free"); + DBUG_PRINT("enter",("hash:%p elements: %ld", + hash, hash->records)); + + my_hash_free_elements(hash); + hash->free= 0; + delete_dynamic(&hash->array); + hash->blength= 0; + DBUG_VOID_RETURN; +} + + +/* + Delete all elements from the hash (the hash itself is to be reused). + + SYNOPSIS + my_hash_reset() + hash the hash to delete elements of +*/ + +void my_hash_reset(HASH *hash) +{ + DBUG_ENTER("my_hash_reset"); + DBUG_PRINT("enter",("hash:%p", hash)); + + my_hash_free_elements(hash); + reset_dynamic(&hash->array); + /* Set row pointers so that the hash can be reused at once */ + hash->blength= 1; + DBUG_VOID_RETURN; +} + +/* some helper functions */ + +/* + This function is char* instead of uchar* as HPUX11 compiler can't + handle inline functions that are not defined as native types +*/ + +static inline char* +my_hash_key(const HASH *hash, const uchar *record, size_t *length, + my_bool first) +{ + if (hash->get_key) + return (char*) (*hash->get_key)(record,length,first); + *length=hash->key_length; + return (char*) record+hash->key_offset; +} + + /* Calculate pos according to keys */ + +static uint my_hash_mask(my_hash_value_type hashnr, size_t buffmax, + size_t maxlength) +{ + if ((hashnr & (buffmax-1)) < maxlength) + return (uint) (hashnr & (buffmax-1)); + return (uint) (hashnr & ((buffmax >> 1) -1)); +} + +static inline uint my_hash_rec_mask(HASH_LINK *pos, + size_t buffmax, size_t maxlength) +{ + return my_hash_mask(pos->hash_nr, buffmax, maxlength); +} + + + +/* for compilers which can not handle inline */ +static +#if !defined(__USLC__) && !defined(__sgi) +inline +#endif +my_hash_value_type rec_hashnr(HASH *hash,const uchar *record) +{ + size_t length; + uchar *key= (uchar*) my_hash_key(hash, record, &length, 0); + return hash->hash_function(hash->charset, key, length); +} + + +uchar* my_hash_search(const HASH *hash, const uchar *key, size_t length) +{ + HASH_SEARCH_STATE state; + return my_hash_first(hash, key, length, &state); +} + +uchar* my_hash_search_using_hash_value(const HASH *hash, + my_hash_value_type hash_value, + const uchar *key, + size_t length) +{ + HASH_SEARCH_STATE state; + return my_hash_first_from_hash_value(hash, hash_value, + key, length, &state); +} + + +/* + Search after a record based on a key + + NOTE + Assigns the number of the found record to HASH_SEARCH_STATE state +*/ + +uchar* my_hash_first(const HASH *hash, const uchar *key, size_t length, + HASH_SEARCH_STATE *current_record) +{ + uchar *res; + DBUG_ASSERT(my_hash_inited(hash)); + + res= my_hash_first_from_hash_value(hash, + hash->hash_function(hash->charset, key, + length ? length : + hash->key_length), + key, length, current_record); + return res; +} + + +uchar* my_hash_first_from_hash_value(const HASH *hash, + my_hash_value_type hash_value, + const uchar *key, + size_t length, + HASH_SEARCH_STATE *current_record) +{ + HASH_LINK *pos; + DBUG_ENTER("my_hash_first_from_hash_value"); + + if (hash->records) + { + uint flag= 1; + uint idx= my_hash_mask(hash_value, + hash->blength, hash->records); + if (!length) + length= hash->key_length; // length for fixed length keys or 0 + do + { + pos= dynamic_element(&hash->array,idx,HASH_LINK*); + if (!hashcmp(hash,pos,key,length)) + { + DBUG_PRINT("exit",("found key at %d",idx)); + *current_record= idx; + DBUG_RETURN (pos->data); + } + if (flag) + { + flag=0; /* Reset flag */ + if (my_hash_rec_mask(pos, hash->blength, hash->records) != idx) + break; /* Wrong link */ + } + } + while ((idx=pos->next) != NO_RECORD); + } + *current_record= NO_RECORD; + DBUG_RETURN(0); +} + + /* Get next record with identical key */ + /* Can only be called if previous calls was my_hash_search */ + +uchar* my_hash_next(const HASH *hash, const uchar *key, size_t length, + HASH_SEARCH_STATE *current_record) +{ + HASH_LINK *pos; + uint idx; + + if (*current_record != NO_RECORD) + { + HASH_LINK *data=dynamic_element(&hash->array,0,HASH_LINK*); + if (!length) + length= hash->key_length; // length for fixed length keys or 0 + for (idx=data[*current_record].next; idx != NO_RECORD ; idx=pos->next) + { + pos=data+idx; + if (!hashcmp(hash,pos,key,length)) + { + *current_record= idx; + return pos->data; + } + } + *current_record= NO_RECORD; + } + return 0; +} + + + /* Change link from pos to new_link */ + +static void movelink(HASH_LINK *array,uint find,uint next_link,uint newlink) +{ + HASH_LINK *old_link; + do + { + old_link=array+next_link; + } + while ((next_link=old_link->next) != find); + old_link->next= newlink; + return; +} + +/* + Compare a key in a record to a whole key. Return 0 if identical + + SYNOPSIS + hashcmp() + hash hash table + pos position of hash record to use in comparison + key key for comparison + length length of key + + NOTES: + length equal 0 can mean 2 things: + 1) it is fixed key length hash (HASH::key_length != 0) and + default length should be taken in this case + 2) it is really 0 length key for variable key length hash + (HASH::key_length == 0) + + RETURN + = 0 key of record == key + != 0 key of record != key + */ + +static int hashcmp(const HASH *hash, HASH_LINK *pos, const uchar *key, + size_t length) +{ + size_t rec_keylength; + uchar *rec_key; + rec_key= (uchar*) my_hash_key(hash, pos->data, &rec_keylength, 1); + return (length != rec_keylength) || + my_strnncoll(hash->charset, (uchar*) rec_key, rec_keylength, + (uchar*) key, rec_keylength); +} + + +/** + Write a hash-key to the hash-index + + @return + @retval 0 ok + @retval 1 Duplicate key or out of memory +*/ + +my_bool my_hash_insert(HASH *info, const uchar *record) +{ + int flag; + size_t idx, halfbuff, first_index; + size_t length; + my_hash_value_type current_hash_nr, UNINIT_VAR(rec_hash_nr), + UNINIT_VAR(rec2_hash_nr); + uchar *UNINIT_VAR(rec_data),*UNINIT_VAR(rec2_data), *key; + HASH_LINK *data,*empty,*UNINIT_VAR(gpos),*UNINIT_VAR(gpos2),*pos; + + key= (uchar*) my_hash_key(info, record, &length, 1); + current_hash_nr= info->hash_function(info->charset, key, length); + + if (info->flags & HASH_UNIQUE) + { + if (my_hash_search_using_hash_value(info, current_hash_nr, key, length)) + return(TRUE); /* Duplicate entry */ + } + + flag=0; + if (!(empty=(HASH_LINK*) alloc_dynamic(&info->array))) + return(TRUE); /* No more memory */ + + data=dynamic_element(&info->array,0,HASH_LINK*); + halfbuff= info->blength >> 1; + + idx=first_index=info->records-halfbuff; + if (idx != info->records) /* If some records */ + { + do + { + my_hash_value_type hash_nr; + pos=data+idx; + hash_nr= pos->hash_nr; + if (flag == 0) /* First loop; Check if ok */ + if (my_hash_mask(hash_nr, info->blength, info->records) != first_index) + break; + if (!(hash_nr & halfbuff)) + { /* Key will not move */ + if (!(flag & LOWFIND)) + { + if (flag & HIGHFIND) + { + flag= LOWFIND | HIGHFIND; + /* key shall be moved to the current empty position */ + gpos= empty; + rec_data= pos->data; + rec_hash_nr= pos->hash_nr; + empty=pos; /* This place is now free */ + } + else + { + flag= LOWFIND | LOWUSED; /* key isn't changed */ + gpos= pos; + rec_data= pos->data; + rec_hash_nr= pos->hash_nr; + } + } + else + { + if (!(flag & LOWUSED)) + { + /* Change link of previous LOW-key */ + gpos->data= rec_data; + gpos->hash_nr= rec_hash_nr; + gpos->next= (uint) (pos-data); + flag= (flag & HIGHFIND) | (LOWFIND | LOWUSED); + } + gpos= pos; + rec_data= pos->data; + rec_hash_nr= pos->hash_nr; + } + } + else + { /* key will be moved */ + if (!(flag & HIGHFIND)) + { + flag= (flag & LOWFIND) | HIGHFIND; + /* key shall be moved to the last (empty) position */ + gpos2= empty; + empty= pos; + rec2_data= pos->data; + rec2_hash_nr= pos->hash_nr; + } + else + { + if (!(flag & HIGHUSED)) + { + /* Change link of previous hash-key and save */ + gpos2->data= rec2_data; + gpos2->hash_nr= rec2_hash_nr; + gpos2->next= (uint) (pos-data); + flag= (flag & LOWFIND) | (HIGHFIND | HIGHUSED); + } + gpos2= pos; + rec2_data= pos->data; + rec2_hash_nr= pos->hash_nr; + } + } + } + while ((idx=pos->next) != NO_RECORD); + + if ((flag & (LOWFIND | LOWUSED)) == LOWFIND) + { + gpos->data= rec_data; + gpos->hash_nr= rec_hash_nr; + gpos->next= NO_RECORD; + } + if ((flag & (HIGHFIND | HIGHUSED)) == HIGHFIND) + { + gpos2->data= rec2_data; + gpos2->hash_nr= rec2_hash_nr; + gpos2->next= NO_RECORD; + } + } + + idx= my_hash_mask(current_hash_nr, info->blength, info->records + 1); + pos= data+idx; + /* Check if we are at the empty position */ + if (pos == empty) + { + pos->next=NO_RECORD; + } + else + { + /* Move conflicting record to empty position (last) */ + empty[0]= pos[0]; + /* Check if the moved record was in same hash-nr family */ + gpos= data + my_hash_rec_mask(pos, info->blength, info->records + 1); + if (pos == gpos) + { + /* Point to moved record */ + pos->next= (uint32) (empty - data); + } + else + { + pos->next= NO_RECORD; + movelink(data,(uint) (pos-data),(uint) (gpos-data),(uint) (empty-data)); + } + } + pos->data= (uchar*) record; + pos->hash_nr= current_hash_nr; + if (++info->records == info->blength) + info->blength+= info->blength; + return(0); +} + + +/** + Remove one record from hash-table. + + @fn hash_delete() + @param hash Hash tree + @param record Row to be deleted + + @notes + The record with the same record ptr is removed. + If there is a free-function it's called if record was found. + + hash->free() is guarantee to be called only after the row has been + deleted from the hash and the hash can be reused by other threads. + + @return + @retval 0 ok + @retval 1 Record not found +*/ + +my_bool my_hash_delete(HASH *hash, uchar *record) +{ + uint pos2,idx,empty_index; + my_hash_value_type pos_hashnr, lastpos_hashnr; + size_t blength; + HASH_LINK *data,*lastpos,*gpos,*pos,*pos3,*empty; + DBUG_ENTER("my_hash_delete"); + if (!hash->records) + DBUG_RETURN(1); + + blength=hash->blength; + data=dynamic_element(&hash->array,0,HASH_LINK*); + /* Search after record with key */ + pos= data + my_hash_mask(rec_hashnr(hash, record), blength, hash->records); + gpos = 0; + + while (pos->data != record) + { + gpos=pos; + if (pos->next == NO_RECORD) + DBUG_RETURN(1); /* Key not found */ + pos=data+pos->next; + } + + if ( --(hash->records) < hash->blength >> 1) hash->blength>>=1; + lastpos=data+hash->records; + + /* Remove link to record */ + empty=pos; empty_index=(uint) (empty-data); + if (gpos) + gpos->next=pos->next; /* unlink current ptr */ + else if (pos->next != NO_RECORD) + { + empty=data+(empty_index=pos->next); + pos[0]= empty[0]; + } + + if (empty == lastpos) /* last key at wrong pos or no next link */ + goto exit; + + /* Move the last key (lastpos) */ + lastpos_hashnr= lastpos->hash_nr; + /* pos is where lastpos should be */ + pos= data + my_hash_mask(lastpos_hashnr, hash->blength, hash->records); + if (pos == empty) /* Move to empty position. */ + { + empty[0]=lastpos[0]; + goto exit; + } + pos_hashnr= pos->hash_nr; + /* pos3 is where the pos should be */ + pos3= data + my_hash_mask(pos_hashnr, hash->blength, hash->records); + if (pos != pos3) + { /* pos is on wrong posit */ + empty[0]=pos[0]; /* Save it here */ + pos[0]=lastpos[0]; /* This should be here */ + movelink(data,(uint) (pos-data),(uint) (pos3-data),empty_index); + goto exit; + } + pos2= my_hash_mask(lastpos_hashnr, blength, hash->records + 1); + if (pos2 == my_hash_mask(pos_hashnr, blength, hash->records + 1)) + { /* Identical key-positions */ + if (pos2 != hash->records) + { + empty[0]=lastpos[0]; + movelink(data,(uint) (lastpos-data),(uint) (pos-data),empty_index); + goto exit; + } + idx= (uint) (pos-data); /* Link pos->next after lastpos */ + } + else idx= NO_RECORD; /* Different positions merge */ + + empty[0]=lastpos[0]; + movelink(data,idx,empty_index,pos->next); + pos->next=empty_index; + +exit: + (void) pop_dynamic(&hash->array); + if (hash->free) + (*hash->free)((uchar*) record); + DBUG_RETURN(0); +} + + +/** + Update keys when record has changed. + This is much more efficient than using a delete & insert. +*/ + +my_bool my_hash_update(HASH *hash, uchar *record, uchar *old_key, + size_t old_key_length) +{ + uint new_index, new_pos_index, org_index, records, idx; + size_t length, empty, blength; + my_hash_value_type hash_nr; + HASH_LINK org_link,*data,*previous,*pos; + uchar *new_key; + DBUG_ENTER("my_hash_update"); + + new_key= (uchar*) my_hash_key(hash, record, &length, 1); + hash_nr= hash->hash_function(hash->charset, new_key, length); + + if (HASH_UNIQUE & hash->flags) + { + HASH_SEARCH_STATE state; + uchar *found; + + if ((found= my_hash_first_from_hash_value(hash, hash_nr, new_key, length, + &state))) + { + do + { + if (found != record) + DBUG_RETURN(1); /* Duplicate entry */ + } + while ((found= my_hash_next(hash, new_key, length, &state))); + } + } + + data=dynamic_element(&hash->array,0,HASH_LINK*); + blength=hash->blength; records=hash->records; + + /* Search after record with key */ + + idx= my_hash_mask(hash->hash_function(hash->charset, old_key, + (old_key_length ? old_key_length : + hash->key_length)), + blength, records); + org_index= idx; + new_index= my_hash_mask(hash_nr, blength, records); + previous=0; + for (;;) + { + if ((pos= data+idx)->data == record) + break; + previous=pos; + if ((idx=pos->next) == NO_RECORD) + DBUG_RETURN(1); /* Not found in links */ + } + + if (org_index == new_index) + { + data[idx].hash_nr= hash_nr; /* Hash number may have changed */ + DBUG_RETURN(0); /* Record is in right position */ + } + + org_link= *pos; + empty=idx; + + /* Relink record from current chain */ + + if (!previous) + { + if (pos->next != NO_RECORD) + { + empty=pos->next; + *pos= data[pos->next]; + } + } + else + previous->next=pos->next; /* unlink pos */ + + /* Move data to correct position */ + if (new_index == empty) + { + /* + At this point record is unlinked from the old chain, thus it holds + random position. By the chance this position is equal to position + for the first element in the new chain. That means updated record + is the only record in the new chain. + */ + if (empty != idx) + { + /* + Record was moved while unlinking it from the old chain. + Copy data to a new position. + */ + data[empty]= org_link; + } + data[empty].next= NO_RECORD; + data[empty].hash_nr= hash_nr; + DBUG_RETURN(0); + } + pos=data+new_index; + new_pos_index= my_hash_rec_mask(pos, blength, records); + if (new_index != new_pos_index) + { /* Other record in wrong position */ + data[empty]= *pos; + movelink(data,new_index,new_pos_index, (uint) empty); + org_link.next=NO_RECORD; + data[new_index]= org_link; + data[new_index].hash_nr= hash_nr; + } + else + { /* Link in chain at right position */ + org_link.next=data[new_index].next; + data[empty]=org_link; + data[empty].hash_nr= hash_nr; + data[new_index].next= (uint) empty; + } + DBUG_RETURN(0); +} + + +uchar *my_hash_element(HASH *hash, size_t idx) +{ + if (idx < hash->records) + return dynamic_element(&hash->array,idx,HASH_LINK*)->data; + return 0; +} + + +/* + Replace old row with new row. This should only be used when key + isn't changed +*/ + +void my_hash_replace(HASH *hash, HASH_SEARCH_STATE *current_record, + uchar *new_row) +{ + if (*current_record != NO_RECORD) /* Safety */ + dynamic_element(&hash->array, *current_record, HASH_LINK*)->data= new_row; +} + + +/** + Iterate over all elements in hash and call function with the element + + @param hash hash array + @param action function to call for each argument + @param argument second argument for call to action + + @notes + If one of functions calls returns 1 then the iteration aborts + + @retval 0 ok + @retval 1 iteration aborted becasue action returned 1 +*/ + +my_bool my_hash_iterate(HASH *hash, my_hash_walk_action action, void *argument) +{ + uint records, i; + + records= hash->records; + + for (i= 0 ; i < records ; i++) + { + if ((*action)(dynamic_element(&hash->array, i, HASH_LINK *)->data, + argument)) + return 1; + } + return 0; +} + + +#if !defined(DBUG_OFF) || defined(MAIN) + +my_bool my_hash_check(HASH *hash) +{ + int error; + uint i,rec_link,found,max_links,seek,links,idx; + uint records; + size_t blength; + HASH_LINK *data,*hash_info; + + records=hash->records; blength=hash->blength; + data=dynamic_element(&hash->array,0,HASH_LINK*); + error=0; + + for (i=found=max_links=seek=0 ; i < records ; i++) + { + size_t length; + uchar *key= (uchar*) my_hash_key(hash, data[i].data, &length, 0); + if (data[i].hash_nr != hash->hash_function(hash->charset, key, length)) + { + DBUG_PRINT("error", ("record at %d has wrong hash", i)); + error= 1; + } + + if (my_hash_rec_mask(data + i, blength, records) == i) + { + found++; seek++; links=1; + for (idx=data[i].next ; + idx != NO_RECORD && found < records + 1; + idx=hash_info->next) + { + if (idx >= records) + { + DBUG_PRINT("error", + ("Found pointer outside array to %d from link starting at %d", + idx,i)); + error=1; + } + hash_info=data+idx; + seek+= ++links; + if ((rec_link= my_hash_rec_mask(hash_info, + blength, records)) != i) + { + DBUG_PRINT("error", ("Record in wrong link at %d: Start %d " + "Record:%p Record-link %d", + idx, i, hash_info->data, rec_link)); + error=1; + } + else + found++; + } + if (links > max_links) max_links=links; + } + } + if (found != records) + { + DBUG_PRINT("error",("Found %u of %u records", found, records)); + error=1; + } + if (records) + DBUG_PRINT("info", + ("records: %u seeks: %d max links: %d hitrate: %.2f", + records,seek,max_links,(float) seek / (float) records)); + DBUG_ASSERT(error == 0); + return error; +} +#endif + +#ifdef MAIN + +#define RECORDS 1000 + +uchar *test_get_key(uchar *data, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= 2; + return data; +} + + +int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) +{ + uchar records[RECORDS][2], copy[2]; + HASH hash_test; + uint i; + MY_INIT(argv[0]); + DBUG_PUSH("d:t:O,/tmp/test_hash.trace"); + + printf("my_hash_init\n"); + if (my_hash_init2(PSI_INSTRUMENT_ME, &hash_test, 100, &my_charset_bin, 20, + 0, 0, (my_hash_get_key) test_get_key, 0, 0, HASH_UNIQUE)) + { + fprintf(stderr, "hash init failed\n"); + exit(1); + } + + printf("my_hash_insert\n"); + for (i= 0 ; i < RECORDS ; i++) + { + int2store(records[i],i); + my_hash_insert(&hash_test, records[i]); + my_hash_check(&hash_test); + } + printf("my_hash_update\n"); + for (i= 0 ; i < RECORDS ; i+=2) + { + memcpy(copy, records[i], 2); + int2store(records[i],i + RECORDS); + if (my_hash_update(&hash_test, records[i], copy, 2)) + { + fprintf(stderr, "hash update failed\n"); + exit(1); + } + my_hash_check(&hash_test); + } + printf("my_hash_delete\n"); + for (i= 0 ; i < RECORDS ; i++) + { + if (my_hash_delete(&hash_test, records[i])) + { + fprintf(stderr, "hash delete failed\n"); + exit(1); + } + my_hash_check(&hash_test); + } + my_hash_free(&hash_test); + printf("ok\n"); + my_end(MY_CHECK_ERROR); + return(0); +} +#endif /* MAIN */ diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c new file mode 100644 index 00000000..fc3f320a --- /dev/null +++ b/mysys/lf_alloc-pin.c @@ -0,0 +1,539 @@ +/* QQ: TODO multi-pinbox */ +/* Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + wait-free concurrent allocator based on pinning addresses + + It works as follows: every thread (strictly speaking - every CPU, but + it's too difficult to do) has a small array of pointers. They're called + "pins". Before using an object its address must be stored in this array + (pinned). When an object is no longer necessary its address must be + removed from this array (unpinned). When a thread wants to free() an + object it scans all pins of all threads to see if somebody has this + object pinned. If yes - the object is not freed (but stored in a + "purgatory"). To reduce the cost of a single free() pins are not scanned + on every free() but only added to (thread-local) purgatory. On every + LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects + are freed. + + Pins are used to solve ABA problem. To use pins one must obey + a pinning protocol: + + 1. Let's assume that PTR is a shared pointer to an object. Shared means + that any thread may modify it anytime to point to a different object + and free the old object. Later the freed object may be potentially + allocated by another thread. If we're unlucky that other thread may + set PTR to point to this object again. This is ABA problem. + 2. Create a local pointer LOCAL_PTR. + 3. Pin the PTR in a loop: + do + { + LOCAL_PTR= PTR; + pin(PTR, PIN_NUMBER); + } while (LOCAL_PTR != PTR) + 4. It is guaranteed that after the loop has ended, LOCAL_PTR + points to an object (or NULL, if PTR may be NULL), that + will never be freed. It is not guaranteed though + that LOCAL_PTR == PTR (as PTR can change any time) + 5. When done working with the object, remove the pin: + unpin(PIN_NUMBER) + 6. When copying pins (as in the list traversing loop: + pin(CUR, 1); + while () + { + do // standard + { // pinning + NEXT=CUR->next; // loop + pin(NEXT, 0); // see #3 + } while (NEXT != CUR->next); // above + ... + ... + CUR=NEXT; + pin(CUR, 1); // copy pin[0] to pin[1] + } + which keeps CUR address constantly pinned), note than pins may be + copied only upwards (!!!), that is pin[N] to pin[M], M > N. + 7. Don't keep the object pinned longer than necessary - the number of + pins you have is limited (and small), keeping an object pinned + prevents its reuse and cause unnecessary mallocs. + + Explanations: + + 3. The loop is important. The following can occur: + thread1> LOCAL_PTR= PTR + thread2> free(PTR); PTR=0; + thread1> pin(PTR, PIN_NUMBER); + now thread1 cannot access LOCAL_PTR, even if it's pinned, + because it points to a freed memory. That is, it *must* + verify that it has indeed pinned PTR, the shared pointer. + + 6. When a thread wants to free some LOCAL_PTR, and it scans + all lists of pins to see whether it's pinned, it does it + upwards, from low pin numbers to high. Thus another thread + must copy an address from one pin to another in the same + direction - upwards, otherwise the scanning thread may + miss it. + + Implementation details: + + Pins are given away from a "pinbox". Pinbox is stack-based allocator. + It used dynarray for storing pins, new elements are allocated by dynarray + as necessary, old are pushed in the stack for reuse. ABA is solved by + versioning a pointer - because we use an array, a pointer to pins is 16 bit, + upper 16 bits are used for a version. + + It is assumed that pins belong to a THD and are not transferable + between THD's (LF_PINS::stack_ends_here being a primary reason + for this limitation). +*/ +#include "mysys_priv.h" +#include +#include "my_cpu.h" + +/* + when using alloca() leave at least that many bytes of the stack - + for functions we might be calling from within this stack frame +*/ +#define ALLOCA_SAFETY_MARGIN 8192 + +#define LF_PINBOX_MAX_PINS 65536 + +static void lf_pinbox_real_free(LF_PINS *pins); + +/* + Initialize a pinbox. Normally called from lf_alloc_init. + See the latter for details. +*/ +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func, void *free_func_arg) +{ + DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); + lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS)); + pinbox->pinstack_top_ver= 0; + pinbox->pins_in_array= 0; + pinbox->free_ptr_offset= free_ptr_offset; + pinbox->free_func= free_func; + pinbox->free_func_arg= free_func_arg; +} + +void lf_pinbox_destroy(LF_PINBOX *pinbox) +{ + lf_dynarray_destroy(&pinbox->pinarray); +} + +/* + Get pins from a pinbox. Usually called via lf_alloc_get_pins() or + lf_hash_get_pins(). + + SYNOPSIS + pinbox - + + DESCRIPTION + get a new LF_PINS structure from a stack of unused pins, + or allocate a new one out of dynarray. + + NOTE + It is assumed that pins belong to a thread and are not transferable + between threads. +*/ +LF_PINS *lf_pinbox_get_pins(LF_PINBOX *pinbox) +{ + uint32 pins, next, top_ver; + LF_PINS *el; + /* + We have an array of max. 64k elements. + The highest index currently allocated is pinbox->pins_in_array. + Freed elements are in a lifo stack, pinstack_top_ver. + pinstack_top_ver is 32 bits; 16 low bits are the index in the + array, to the first element of the list. 16 high bits are a version + (every time the 16 low bits are updated, the 16 high bits are + incremented). Versioning prevents the ABA problem. + */ + top_ver= pinbox->pinstack_top_ver; + do + { + if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) + { + /* the stack of free elements is empty */ + pins= my_atomic_add32((int32 volatile*) &pinbox->pins_in_array, 1)+1; + if (unlikely(pins >= LF_PINBOX_MAX_PINS)) + return 0; + /* + note that the first allocated element has index 1 (pins==1). + index 0 is reserved to mean "NULL pointer" + */ + el= (LF_PINS *)lf_dynarray_lvalue(&pinbox->pinarray, pins); + if (unlikely(!el)) + return 0; + break; + } + el= (LF_PINS *)lf_dynarray_value(&pinbox->pinarray, pins); + next= el->link; + } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, + (int32*) &top_ver, + top_ver-pins+next+LF_PINBOX_MAX_PINS)); + /* + set el->link to the index of el in the dynarray (el->link has two usages: + - if element is allocated, it's its own index + - if element is free, it's its next element in the free stack + */ + el->link= pins; + el->purgatory_count= 0; + el->pinbox= pinbox; + + return el; +} + +/* + Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or + lf_hash_put_pins(). + + DESCRIPTION + empty the purgatory (XXX deadlock warning below!), + push LF_PINS structure to a stack +*/ +void lf_pinbox_put_pins(LF_PINS *pins) +{ + LF_PINBOX *pinbox= pins->pinbox; + uint32 top_ver, nr; + nr= pins->link; + +#ifndef DBUG_OFF + { + /* This thread should not hold any pin. */ + int i; + for (i= 0; i < LF_PINBOX_PINS; i++) + DBUG_ASSERT(pins->pin[i] == 0); + } +#endif /* DBUG_OFF */ + + /* + XXX this will deadlock if other threads will wait for + the caller to do something after lf_pinbox_put_pins(), + and they would have pinned addresses that the caller wants to free. + Thus: only free pins when all work is done and nobody can wait for you!!! + */ + while (pins->purgatory_count) + { + lf_pinbox_real_free(pins); + if (pins->purgatory_count) + pthread_yield(); + } + top_ver= pinbox->pinstack_top_ver; + do + { + pins->link= top_ver % LF_PINBOX_MAX_PINS; + } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, + (int32*) &top_ver, + top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); + return; +} + +static int ptr_cmp(void **a, void **b) +{ + return *a < *b ? -1 : *a == *b ? 0 : 1; +} + +#define add_to_purgatory(PINS, ADDR) \ + do \ + { \ + my_atomic_storeptr_explicit( \ + (void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset), \ + (PINS)->purgatory, MY_MEMORY_ORDER_RELEASE); \ + (PINS)->purgatory= (ADDR); \ + (PINS)->purgatory_count++; \ + } while (0) + +/* + Free an object allocated via pinbox allocator + + DESCRIPTION + add an object to purgatory. if necessary, calllf_pinbox_real_free() + to actually free something. +*/ +void lf_pinbox_free(LF_PINS *pins, void *addr) +{ + add_to_purgatory(pins, addr); + if (pins->purgatory_count % LF_PURGATORY_SIZE == 0) + lf_pinbox_real_free(pins); + DBUG_EXECUTE_IF("unconditional_pinbox_free", + if (pins->purgatory_count % LF_PURGATORY_SIZE) + lf_pinbox_real_free(pins);); +} + +struct st_harvester { + void **granary; + int npins; +}; + +/* + callback forlf_dynarray_iterate: + scan all pins of all threads and accumulate all pins +*/ +static int harvest_pins(LF_PINS *el, struct st_harvester *hv) +{ + int i; + LF_PINS *el_end= el+MY_MIN(hv->npins, LF_DYNARRAY_LEVEL_LENGTH); + for (; el < el_end; el++) + { + for (i= 0; i < LF_PINBOX_PINS; i++) + { + void *p= el->pin[i]; + if (p) + *hv->granary++= p; + } + } + /* + hv->npins may become negative below, but it means that + we're on the last dynarray page and harvest_pins() won't be + called again. We don't bother to make hv->npins() correct + (that is 0) in this case. + */ + hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; + return 0; +} + +/* + callback forlf_dynarray_iterate: + scan all pins of all threads and see if addr is present there +*/ +static int match_pins(LF_PINS *el, void *addr) +{ + int i; + LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; + for (; el < el_end; el++) + for (i= 0; i < LF_PINBOX_PINS; i++) + if (el->pin[i] == addr) + return 1; + return 0; +} + +#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset))) +#define anext_node(X) next_node(&allocator->pinbox, (X)) + +/* + Scan the purgatory and free everything that can be freed +*/ +static void lf_pinbox_real_free(LF_PINS *pins) +{ + int npins; + void *list; + void **addr= NULL; + void *first= NULL, *last= NULL; + struct st_my_thread_var *var= my_thread_var; + void *stack_ends_here= var ? var->stack_ends_here : NULL; + LF_PINBOX *pinbox= pins->pinbox; + + npins= pinbox->pins_in_array+1; + +#ifdef HAVE_ALLOCA + if (stack_ends_here != NULL) + { + int alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins; + /* create a sorted list of pinned addresses, to speed up searches */ + if (available_stack_size(&pinbox, stack_ends_here) > + alloca_size + ALLOCA_SAFETY_MARGIN) + { + struct st_harvester hv; + addr= (void **) alloca(alloca_size); + hv.granary= addr; + hv.npins= npins; + /* scan the dynarray and accumulate all pinned addresses */ + lf_dynarray_iterate(&pinbox->pinarray, + (lf_dynarray_func)harvest_pins, &hv); + + npins= (int)(hv.granary-addr); + /* and sort them */ + if (npins) + qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); + } + } +#endif + + list= pins->purgatory; + pins->purgatory= 0; + pins->purgatory_count= 0; + while (list) + { + void *cur= list; + list= *(void **)((char *)cur+pinbox->free_ptr_offset); + if (npins) + { + if (addr) /* use binary search */ + { + void **a, **b, **c; + for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2) + if (cur == *c) + a= b= c; + else if (cur > *c) + a= c; + else + b= c; + if (cur == *a || cur == *b) + goto found; + } + else /* no alloca - no cookie. linear search here */ + { + if (lf_dynarray_iterate(&pinbox->pinarray, + (lf_dynarray_func)match_pins, cur)) + goto found; + } + } + /* not pinned - freeing */ + if (last) + last= next_node(pinbox, last)= (uchar *)cur; + else + first= last= (uchar *)cur; + continue; +found: + /* pinned - keeping */ + add_to_purgatory(pins, cur); + } + if (last) + pinbox->free_func(first, last, pinbox->free_func_arg); +} + +/* lock-free memory allocator for fixed-size objects */ + +/* + callback forlf_pinbox_real_free to free a list of unpinned objects - + add it back to the allocator stack + + DESCRIPTION + 'first' and 'last' are the ends of the linked list of nodes: + first->el->el->....->el->last. Use first==last to free only one element. +*/ +static void alloc_free(uchar *first, + uchar volatile *last, + LF_ALLOCATOR *allocator) +{ + /* + we need a union here to access type-punned pointer reliably. + otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop + */ + union { uchar * node; void *ptr; } tmp; + tmp.node= allocator->top; + do + { + anext_node(last)= tmp.node; + } while (!my_atomic_casptr((void **)(char *)&allocator->top, + (void **)&tmp.ptr, first) && LF_BACKOFF()); +} + +/* + initialize lock-free allocator + + SYNOPSIS + allocator - + size a size of an object to allocate + free_ptr_offset an offset inside the object to a sizeof(void *) + memory that is guaranteed to be unused after + the object is put in the purgatory. Unused by ANY + thread, not only the purgatory owner. + This memory will be used to link waiting-to-be-freed + objects in a purgatory list. +*/ +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) +{ + lf_pinbox_init(&allocator->pinbox, free_ptr_offset, + (lf_pinbox_free_func *)alloc_free, allocator); + allocator->top= 0; + allocator->mallocs= 0; + allocator->element_size= size; + allocator->constructor= 0; + allocator->destructor= 0; + DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset); +} + +/* + destroy the allocator, free everything that's in it + + NOTE + As every other init/destroy function here and elsewhere it + is not thread safe. No, this function is no different, ensure + that no thread needs the allocator before destroying it. + We are not responsible for any damage that may be caused by + accessing the allocator when it is being or has been destroyed. + Oh yes, and don't put your cat in a microwave. +*/ +void lf_alloc_destroy(LF_ALLOCATOR *allocator) +{ + uchar *node= allocator->top; + while (node) + { + uchar *tmp= anext_node(node); + if (allocator->destructor) + allocator->destructor(node); + my_free(node); + node= tmp; + } + lf_pinbox_destroy(&allocator->pinbox); + allocator->top= 0; +} + +/* + Allocate and return an new object. + + DESCRIPTION + Pop an unused object from the stack or malloc it is the stack is empty. + pin[0] is used, it's removed on return. +*/ +void *lf_alloc_new(LF_PINS *pins) +{ + LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + uchar *node; + for (;;) + { + do + { + node= allocator->top; + lf_pin(pins, 0, node); + } while (node != allocator->top && LF_BACKOFF()); + if (!node) + { + node= (void *)my_malloc(key_memory_lf_node, allocator->element_size, + MYF(MY_WME)); + if (allocator->constructor) + allocator->constructor(node); +#ifdef MY_LF_EXTRA_DEBUG + if (likely(node != 0)) + my_atomic_add32(&allocator->mallocs, 1); +#endif + break; + } + if (my_atomic_casptr((void **)(char *)&allocator->top, + (void *)&node, anext_node(node))) + break; + } + lf_unpin(pins, 0); + return node; +} + +/* + count the number of objects in a pool. + + NOTE + This is NOT thread-safe !!! +*/ +uint lf_alloc_pool_count(LF_ALLOCATOR *allocator) +{ + uint i; + uchar *node; + for (node= allocator->top, i= 0; node; node= anext_node(node), i++) + /* no op */; + return i; +} + diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c new file mode 100644 index 00000000..6a4ea3d3 --- /dev/null +++ b/mysys/lf_dynarray.c @@ -0,0 +1,205 @@ +/* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Analog of DYNAMIC_ARRAY that never reallocs + (so no pointer into the array may ever become invalid). + + Memory is allocated in non-contiguous chunks. + This data structure is not space efficient for sparse arrays. + + Every element is aligned to sizeof(element) boundary + (to avoid false sharing if element is big enough). + + LF_DYNARRAY is a recursive structure. On the zero level + LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements, + on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers + to arrays of elements, on the second level it's an array of pointers + to arrays of pointers to arrays of elements. And so on. + + With four levels the number of elements is limited to 4311810304 + (but as in all functions index is uint, the real limit is 2^32-1) + + Actually, it's wait-free, not lock-free ;-) +*/ + +#include "mysys_priv.h" +#include +#include + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) +{ + bzero(array, sizeof(*array)); + array->size_of_element= element_size; +} + +static void recursive_free(void **alloc, int level) +{ + if (!alloc) + return; + + if (level) + { + int i; + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + recursive_free(alloc[i], level-1); + my_free(alloc); + } + else + my_free(alloc[-1]); +} + +void lf_dynarray_destroy(LF_DYNARRAY *array) +{ + int i; + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + recursive_free(array->level[i], i); +} + +static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH + + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH +}; + +static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH, +}; + +/* + Returns a valid lvalue pointer to the element number 'idx'. + Allocates memory if necessary. +*/ +void *lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr= 0; + int i; + + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; + for (; i > 0; i--) + { + if (!(ptr= *ptr_ptr)) + { + void *alloc= my_malloc(key_memory_lf_dynarray, LF_DYNARRAY_LEVEL_LENGTH * + sizeof(void *), MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) + return(NULL); + if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) + ptr= alloc; + else + my_free(alloc); + } + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx%= dynarray_idxes_in_prev_level[i]; + } + if (!(ptr= *ptr_ptr)) + { + uchar *alloc, *data; + alloc= my_malloc(key_memory_lf_dynarray, + LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + + MY_MAX(array->size_of_element, sizeof(void *)), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) + return(NULL); + /* reserve the space for free() address */ + data= alloc + sizeof(void *); + { /* alignment */ + intptr mod= ((intptr)data) % array->size_of_element; + if (mod) + data+= array->size_of_element - mod; + } + ((void **)data)[-1]= alloc; /* free() will need the original pointer */ + if (my_atomic_casptr(ptr_ptr, &ptr, data)) + ptr= data; + else + my_free(alloc); + } + return ((uchar*)ptr) + array->size_of_element * idx; +} + +/* + Returns a pointer to the element number 'idx' + or NULL if an element does not exists +*/ +void *lf_dynarray_value(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr= 0; + int i; + + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; + for (; i > 0; i--) + { + if (!(ptr= *ptr_ptr)) + return(NULL); + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx %= dynarray_idxes_in_prev_level[i]; + } + if (!(ptr= *ptr_ptr)) + return(NULL); + return ((uchar*)ptr) + array->size_of_element * idx; +} + +static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, + lf_dynarray_func func, void *arg) +{ + int res, i; + if (!ptr) + return 0; + if (!level) + return func(ptr, arg); + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) + return res; + return 0; +} + +/* + Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements + in lf_dynarray. + + DESCRIPTION + lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements + each. lf_dynarray_iterate() calls user-supplied function on every array + from the set. It is the fastest way to scan the array, faster than + for (i=0; i < N; i++) { func(lf_dynarray_value(dynarray, i)); } + + NOTE + if func() returns non-zero, the scan is aborted +*/ +int lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) +{ + int i, res; + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + if ((res= recursive_iterate(array, array->level[i], i, func, arg))) + return res; + return 0; +} + diff --git a/mysys/lf_hash.cc b/mysys/lf_hash.cc new file mode 100644 index 00000000..c8f2e3f4 --- /dev/null +++ b/mysys/lf_hash.cc @@ -0,0 +1,591 @@ +/* Copyright (c) 2006, 2018, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + extensible hash + + TODO + try to get rid of dummy nodes ? + for non-unique hash, count only _distinct_ values + (but how to do it in lf_hash_delete ?) +*/ +#include "mysys_priv.h" +#include +#include +#include +#include +#include "my_cpu.h" +#include "assume_aligned.h" + +/* An element of the list */ +typedef struct { + intptr link; /* a pointer to the next element in a list and a flag */ + const uchar *key; + size_t keylen; + uint32 hashnr; /* reversed hash number, for sorting */ + /* + data is stored here, directly after the keylen. + thus the pointer to data is (void*)(slist_element_ptr+1) + */ +} LF_SLIST; + +const int LF_HASH_OVERHEAD= sizeof(LF_SLIST); + +/* + a structure to pass the context (pointers two the three successive elements + in a list) from l_find to l_insert/l_delete +*/ +typedef struct { + intptr *prev; + LF_SLIST *curr, *next; +} CURSOR; + +/* + the last bit in LF_SLIST::link is a "deleted" flag. + the helper macros below convert it to a pure pointer or a pure flag +*/ +#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/** walk the list, searching for an element or invoking a callback + + Search for hashnr/key/keylen in the list starting from 'head' and + position the cursor. The list is ORDER BY hashnr, key + + @param head start walking the list from this node + @param cs charset for comparing keys, NULL if callback is used + @param hashnr hash number to search for + @param key key to search for OR data for the callback + @param keylen length of the key to compare, 0 if callback is used + @param cursor for returning the found element + @param pins see lf_alloc-pin.c + @param callback callback action, invoked for every element + + @note + cursor is positioned in either case + pins[0..2] are used, they are NOT removed on return + callback might see some elements twice (because of retries) + + @return + if find: 0 - not found + 1 - found + if callback: + 0 - ok + 1 - error (callbck returned 1) +*/ +static int l_find(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr, + const uchar *key, size_t keylen, CURSOR *cursor, LF_PINS *pins, + my_hash_walk_action callback) +{ + uint32 cur_hashnr; + const uchar *cur_key; + size_t cur_keylen; + intptr link; + + DBUG_ASSERT(!cs || !callback); /* should not be set both */ + DBUG_ASSERT(!keylen || !callback); /* should not be set both */ + +retry: + cursor->prev= (intptr *) my_assume_aligned(head); + do { /* PTR() isn't necessary below, head is a dummy node */ + cursor->curr= my_assume_aligned((LF_SLIST *)(*cursor->prev)); + lf_pin(pins, 1, cursor->curr); + } while (my_atomic_loadptr( + (void **)my_assume_aligned(cursor->prev)) + != cursor->curr && LF_BACKOFF()); + for (;;) + { + if (unlikely(!cursor->curr)) + return 0; /* end of the list */ + + cur_hashnr= cursor->curr->hashnr; + cur_keylen= cursor->curr->keylen; + /* The key element needs to be aligned, not necessary what it points to */ + my_assume_aligned(&cursor->curr->key); + cur_key= (const uchar *) my_atomic_loadptr_explicit((void **) &cursor->curr->key, + MY_MEMORY_ORDER_ACQUIRE); + + do { + /* attempting to my_assume_aligned onlink below broke the implementation */ + link= (intptr) my_atomic_loadptr_explicit((void **) &cursor->curr->link, + MY_MEMORY_ORDER_RELAXED); + cursor->next= my_assume_aligned(PTR(link)); + lf_pin(pins, 0, cursor->next); + } while (link != (intptr) my_atomic_loadptr((void *volatile *) &cursor->curr->link) + && LF_BACKOFF()); + + if (!DELETED(link)) + { + if (unlikely(callback)) + { + if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key)) + return 1; + } + else if (cur_hashnr >= hashnr) + { + int r= 1; + if (cur_hashnr > hashnr || + (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) + return !r; + } + cursor->prev= &(cursor->curr->link); + if (!(cur_hashnr & 1)) /* dummy node */ + head= (LF_SLIST **)cursor->prev; + lf_pin(pins, 2, cursor->curr); + } + else + { + /* + we found a deleted node - be nice, help the other thread + and remove this deleted node + */ + if (my_atomic_casptr((void **) cursor->prev, + (void **) &cursor->curr, cursor->next) && LF_BACKOFF()) + lf_alloc_free(pins, cursor->curr); + else + goto retry; + } + cursor->curr= cursor->next; + lf_pin(pins, 1, cursor->curr); + } +} + + +/* static l_find is the only user my_assume_aligned, keep the rest as c scoped */ +C_MODE_START + +/* + DESCRIPTION + insert a 'node' in the list that starts from 'head' in the correct + position (as found by l_find) + + RETURN + 0 - inserted + not 0 - a pointer to a duplicate (not pinned and thus unusable) + + NOTE + it uses pins[0..2], on return all pins are removed. + if there're nodes with the same key value, a new node is added before them. +*/ +static LF_SLIST *l_insert(LF_SLIST **head, CHARSET_INFO *cs, + LF_SLIST *node, LF_PINS *pins, uint flags) +{ + CURSOR cursor; + int res; + + for (;;) + { + if (l_find(head, cs, node->hashnr, node->key, node->keylen, + &cursor, pins, 0) && + (flags & LF_HASH_UNIQUE)) + { + res= 0; /* duplicate found */ + break; + } + else + { + node->link= (intptr)cursor.curr; + DBUG_ASSERT(node->link != (intptr)node); /* no circular references */ + DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */ + if (my_atomic_casptr((void **) cursor.prev, + (void **)(char*) &cursor.curr, node)) + { + res= 1; /* inserted ok */ + break; + } + } + } + lf_unpin(pins, 0); + lf_unpin(pins, 1); + lf_unpin(pins, 2); + /* + Note that cursor.curr is not pinned here and the pointer is unreliable, + the object may disappear anytime. But if it points to a dummy node, the + pointer is safe, because dummy nodes are never freed - initialize_bucket() + uses this fact. + */ + return res ? 0 : cursor.curr; +} + +/* + DESCRIPTION + deletes a node as identified by hashnr/keey/keylen from the list + that starts from 'head' + + RETURN + 0 - ok + 1 - not found + + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static int l_delete(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr, + const uchar *key, uint keylen, LF_PINS *pins) +{ + CURSOR cursor; + int res; + + for (;;) + { + if (!l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0)) + { + res= 1; /* not found */ + break; + } + else + { + /* mark the node deleted */ + if (my_atomic_casptr((void **) (char*) &(cursor.curr->link), + (void **) (char*) &cursor.next, + (void *)(((intptr)cursor.next) | 1))) + { + /* and remove it from the list */ + if (my_atomic_casptr((void **)cursor.prev, + (void **)(char*)&cursor.curr, cursor.next)) + lf_alloc_free(pins, cursor.curr); + else + { + /* + somebody already "helped" us and removed the node ? + Let's check if we need to help that someone too! + (to ensure the number of "set DELETED flag" actions + is equal to the number of "remove from the list" actions) + */ + l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0); + } + res= 0; + break; + } + } + } + lf_unpin(pins, 0); + lf_unpin(pins, 1); + lf_unpin(pins, 2); + return res; +} + +/* + DESCRIPTION + searches for a node as identified by hashnr/keey/keylen in the list + that starts from 'head' + + RETURN + 0 - not found + node - found + + NOTE + it uses pins[0..2], on return the pin[2] keeps the node found + all other pins are removed. +*/ +static LF_SLIST *l_search(LF_SLIST **head, CHARSET_INFO *cs, + uint32 hashnr, const uchar *key, uint keylen, + LF_PINS *pins) +{ + CURSOR cursor; + int res= l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0); + if (res) + lf_pin(pins, 2, cursor.curr); + else + lf_unpin(pins, 2); + lf_unpin(pins, 1); + lf_unpin(pins, 0); + return res ? cursor.curr : 0; +} + +static inline const uchar* hash_key(const LF_HASH *hash, + const uchar *record, size_t *length) +{ + if (hash->get_key) + return (*hash->get_key)(record, length, 0); + *length= hash->key_length; + return record + hash->key_offset; +} + +/* + Compute the hash key value from the raw key. + + @note, that the hash value is limited to 2^31, because we need one + bit to distinguish between normal and dummy nodes. +*/ +static inline my_hash_value_type calc_hash(CHARSET_INFO *cs, + const uchar *key, + size_t keylen) +{ + ulong nr1= 1, nr2= 4; + my_ci_hash_sort(cs, (uchar*) key, keylen, &nr1, &nr2); + return nr1; +} + +#define MAX_LOAD 1.0 /* average number of elements in a bucket */ + +static int initialize_bucket(LF_HASH *, LF_SLIST **, uint, LF_PINS *); + +static void default_initializer(LF_HASH *hash, void *dst, const void *src) +{ + memcpy(dst, src, hash->element_size); +} + + +/* + Initializes lf_hash, the arguments are compatible with hash_init + + @note element_size sets both the size of allocated memory block for + lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically + they are the same, indeed. But LF_HASH::element_size can be decreased + after lf_hash_init, and then lf_alloc will allocate larger block that + lf_hash_insert will copy over. It is desirable if part of the element + is expensive to initialize - for example if there is a mutex or + DYNAMIC_ARRAY. In this case they should be initialize in the + LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them. + + The above works well with PODS. For more complex cases (e.g. C++ classes + with private members) use initializer function. +*/ +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, my_hash_get_key get_key, + CHARSET_INFO *charset) +{ + lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, + offsetof(LF_SLIST, key)); + lf_dynarray_init(&hash->array, sizeof(LF_SLIST *)); + hash->size= 1; + hash->count= 0; + hash->element_size= element_size; + hash->flags= flags; + hash->charset= charset ? charset : &my_charset_bin; + hash->key_offset= key_offset; + hash->key_length= key_length; + hash->get_key= get_key; + hash->initializer= default_initializer; + hash->hash_function= calc_hash; + DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); +} + +void lf_hash_destroy(LF_HASH *hash) +{ + LF_SLIST *el, **head= (LF_SLIST **)lf_dynarray_value(&hash->array, 0); + + if (head) + { + el= *head; + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + lf_alloc_direct_free(&hash->alloc, el); /* normal node */ + else + my_free(el); /* dummy node */ + el= (LF_SLIST *)next; + } + } + lf_alloc_destroy(&hash->alloc); + lf_dynarray_destroy(&hash->array); +} + +/* + DESCRIPTION + inserts a new element to a hash. it will have a _copy_ of + data, not a pointer to it. + + RETURN + 0 - inserted + 1 - didn't (unique key conflict) + -1 - out of memory + + NOTE + see l_insert() for pin usage notes +*/ +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) +{ + int csize, bucket, hashnr; + LF_SLIST *node, **el; + + node= (LF_SLIST *)lf_alloc_new(pins); + if (unlikely(!node)) + return -1; + hash->initializer(hash, node + 1, data); + node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); + hashnr= hash->hash_function(hash->charset, node->key, node->keylen) & INT_MAX32; + bucket= hashnr % hash->size; + el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket); + if (unlikely(!el)) + return -1; + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return -1; + node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */ + if (l_insert(el, hash->charset, node, pins, hash->flags)) + { + lf_alloc_free(pins, node); + return 1; + } + csize= hash->size; + if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&hash->size, &csize, csize*2); + return 0; +} + +/* + DESCRIPTION + deletes an element with the given key from the hash (if a hash is + not unique and there're many elements with this key - the "first" + matching element is deleted) + RETURN + 0 - deleted + 1 - didn't (not found) + NOTE + see l_delete() for pin usage notes +*/ +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) +{ + LF_SLIST **el; + uint bucket, hashnr; + + hashnr= hash->hash_function(hash->charset, (uchar *)key, keylen) & INT_MAX32; + + /* hide OOM errors - if we cannot initialize a bucket, try the previous one */ + for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket)) + { + el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket); + if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0)) + break; + if (unlikely(bucket == 0)) + return 1; /* if there's no bucket==0, the hash is empty */ + } + if (l_delete(el, hash->charset, my_reverse_bits(hashnr) | 1, + (uchar *)key, keylen, pins)) + { + return 1; + } + my_atomic_add32(&hash->count, -1); + return 0; +} + +/* + RETURN + a pointer to an element with the given key (if a hash is not unique and + there're many elements with this key - the "first" matching element) + NULL if nothing is found + + NOTE + see l_search() for pin usage notes +*/ +void *lf_hash_search_using_hash_value(LF_HASH *hash, LF_PINS *pins, + my_hash_value_type hashnr, + const void *key, uint keylen) +{ + LF_SLIST **el, *found; + uint bucket; + + /* hide OOM errors - if we cannot initialize a bucket, try the previous one */ + for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket)) + { + el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket); + if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0)) + break; + if (unlikely(bucket == 0)) + return 0; /* if there's no bucket==0, the hash is empty */ + } + found= l_search(el, hash->charset, my_reverse_bits(hashnr) | 1, + (uchar *)key, keylen, pins); + return found ? found+1 : 0; +} + + +/** + Iterate over all elements in hash and call function with the element + + @note + If one of 'action' invocations returns 1 the iteration aborts. + 'action' might see some elements twice! + + @retval 0 ok + @retval 1 error (action returned 1) +*/ +int lf_hash_iterate(LF_HASH *hash, LF_PINS *pins, + my_hash_walk_action action, void *argument) +{ + CURSOR cursor; + uint bucket= 0; + int res; + LF_SLIST **el; + + el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket); + if (unlikely(!el)) + return 0; /* if there's no bucket==0, the hash is empty */ + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return 0; /* if there's no bucket==0, the hash is empty */ + + res= l_find(el, 0, 0, (uchar*)argument, 0, &cursor, pins, action); + + lf_unpin(pins, 2); + lf_unpin(pins, 1); + lf_unpin(pins, 0); + return res; +} + +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) +{ + return lf_hash_search_using_hash_value(hash, pins, + hash->hash_function(hash->charset, + (uchar*) key, + keylen) & INT_MAX32, + key, keylen); +} + +static const uchar *dummy_key= (uchar*)""; + +/* + RETURN + 0 - ok + -1 - out of memory +*/ +static int initialize_bucket(LF_HASH *hash, LF_SLIST **node, + uint bucket, LF_PINS *pins) +{ + uint parent= my_clear_highest_bit(bucket); + LF_SLIST *dummy= (LF_SLIST *)my_malloc(key_memory_lf_slist, + sizeof(LF_SLIST), MYF(MY_WME)); + LF_SLIST **tmp= 0, *cur; + LF_SLIST **el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, parent); + if (unlikely(!el || !dummy)) + return -1; + if (*el == NULL && bucket && + unlikely(initialize_bucket(hash, el, parent, pins))) + { + my_free(dummy); + return -1; + } + dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */ + dummy->key= dummy_key; + dummy->keylen= 0; + if ((cur= l_insert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE))) + { + my_free(dummy); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy); + /* + note that if the CAS above failed (after l_insert() succeeded), + it would mean that some other thread has executed l_insert() for + the same dummy node, its l_insert() failed, it picked up our + dummy node (in "dummy= cur") and executed the same CAS as above. + Which means that even if CAS above failed we don't need to retry, + and we should not free(dummy) - there's no memory leak here + */ + return 0; +} + +C_MODE_END diff --git a/mysys/list.c b/mysys/list.c new file mode 100644 index 00000000..2efe53c0 --- /dev/null +++ b/mysys/list.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Code for handling doubly linked lists in C +*/ + +#include "mysys_priv.h" +#include + + + + /* Add an element to start of list */ + +LIST *list_add(LIST *root, LIST *element) +{ + DBUG_ENTER("list_add"); + DBUG_PRINT("enter",("root: %p element: %p", root, element)); + if (root) + { + if (root->prev) /* If add in mid of list */ + root->prev->next= element; + element->prev=root->prev; + root->prev=element; + } + else + element->prev=0; + element->next=root; + DBUG_RETURN(element); /* New root */ +} + + +LIST *list_delete(LIST *root, LIST *element) +{ + if (element->prev) + element->prev->next=element->next; + else + root=element->next; + if (element->next) + element->next->prev=element->prev; + return root; +} + + +void list_free(LIST *root, uint free_data) +{ + LIST *next; + while (root) + { + next=root->next; + if (free_data) + my_free(root->data); + my_free(root); + root=next; + } +} + + +LIST *list_cons(void *data, LIST *list) +{ + LIST *new_charset=(LIST*) my_malloc(key_memory_LIST, sizeof(LIST),MYF(MY_FAE)); + if (!new_charset) + return 0; + new_charset->data=data; + return list_add(list,new_charset); +} + + +LIST *list_reverse(LIST *root) +{ + LIST *last; + + last=root; + while (root) + { + last=root; + root=root->next; + last->next=last->prev; + last->prev=root; + } + return last; +} + +uint list_length(LIST *list) +{ + uint count; + for (count=0 ; list ; list=list->next, count++) ; + return count; +} + + +int list_walk(LIST *list, list_walk_action action, uchar* argument) +{ + int error=0; + while (list) + { + if ((error = (*action)(list->data,argument))) + return error; + list=list_rest(list); + } + return 0; +} diff --git a/mysys/ma_dyncol.c b/mysys/ma_dyncol.c new file mode 100644 index 00000000..35060b17 --- /dev/null +++ b/mysys/ma_dyncol.c @@ -0,0 +1,4433 @@ +/* Copyright (c) 2011, 2017, MariaDB Corporation. + Copyright (c) 2011, 2012, Oleksandr Byelkin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. +*/ + +/* + Numeric format: + =============== + * Fixed header part + 1 byte flags: + 0,1 bits - - 1 + 2-7 bits - 0 + 2 bytes column counter + * Columns directory sorted by column number, each entry contains of: + 2 bytes column number + bytes (1-4) combined offset from beginning of + the data segment + 3 bit type + * Data of above columns size of data and length depend on type + + Columns with names: + =================== + * Fixed header part + 1 byte flags: + 0,1 bits - - 2 + 2 bit - 1 (means format with names) + 3,4 bits - 00 (means - 2, + now 2 is the only supported size) + 5-7 bits - 0 + 2 bytes column counter + * Variable header part (now it is actually fixed part) + (2) bytes size of stored names pool + * Column directory sorted by names, each consists of + (2) bytes offset of name + bytes (2-5)bytes combined offset from beginning of + the data segment + 4 bit type + * Names stored one after another + * Data of above columns size of data and length depend on type +*/ + +#include "mysys_priv.h" +#include +#include +#include + +uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, + CHARSET_INFO *from_cs, uint *errors); +/* + Flag byte bits + + 2 bits which determinate size of offset in the header -1 +*/ +/* mask to get above bits */ +#define DYNCOL_FLG_OFFSET (1U|2U) +#define DYNCOL_FLG_NAMES 4U +#define DYNCOL_FLG_NMOFFSET (8U|16U) +/** + All known flags mask that could be set. + + @note DYNCOL_FLG_NMOFFSET should be 0 for now. +*/ +#define DYNCOL_FLG_KNOWN (1U|2U|4U) + +/* formats */ +enum enum_dyncol_format +{ + dyncol_fmt_num= 0, + dyncol_fmt_str= 1 +}; + +/* dynamic column size reserve */ +#define DYNCOL_SYZERESERVE 80 + +#define DYNCOL_OFFSET_ERROR 0xffffffff + +/* length of fixed string header 1 byte - flags, 2 bytes - columns counter */ +#define FIXED_HEADER_SIZE 3 +/* + length of fixed string header with names + 1 byte - flags, 2 bytes - columns counter, 2 bytes - name pool size +*/ +#define FIXED_HEADER_SIZE_NM 5 + +#define COLUMN_NUMBER_SIZE 2 +/* 2 bytes offset from the name pool */ +#define COLUMN_NAMEPTR_SIZE 2 + +#define MAX_OFFSET_LENGTH 4 +#define MAX_OFFSET_LENGTH_NM 5 + +#define DYNCOL_NUM_CHAR 6 + +my_bool mariadb_dyncol_has_names(DYNAMIC_COLUMN *str) +{ + if (str->length < 1) + return FALSE; + return MY_TEST(str->str[0] & DYNCOL_FLG_NAMES); +} + +static enum enum_dyncol_func_result +dynamic_column_time_store(DYNAMIC_COLUMN *str, + MYSQL_TIME *value, enum enum_dyncol_format format); +static enum enum_dyncol_func_result +dynamic_column_date_store(DYNAMIC_COLUMN *str, + MYSQL_TIME *value); +static enum enum_dyncol_func_result +dynamic_column_time_read_internal(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length); +static enum enum_dyncol_func_result +dynamic_column_date_read_internal(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length); +static enum enum_dyncol_func_result +dynamic_column_get_internal(DYNAMIC_COLUMN *str, + DYNAMIC_COLUMN_VALUE *store_it_here, + uint num_key, LEX_STRING *str_key); +static enum enum_dyncol_func_result +dynamic_column_exists_internal(DYNAMIC_COLUMN *str, uint num_key, + LEX_STRING *str_key); +static enum enum_dyncol_func_result +dynamic_column_update_many_fmt(DYNAMIC_COLUMN *str, + uint add_column_count, + void *column_keys, + DYNAMIC_COLUMN_VALUE *values, + my_bool string_keys); +static int plan_sort_num(const void *a, const void *b); +static int plan_sort_named(const void *a, const void *b); + +/* + Structure to hold information about dynamic columns record and + iterate through it. +*/ + +struct st_dyn_header +{ + uchar *header, *nmpool, *dtpool, *data_end; + size_t offset_size; + size_t entry_size; + size_t header_size; + size_t nmpool_size; + size_t data_size; + /* dyncol_fmt_num - numeric columns, dyncol_fmt_str - column names */ + enum enum_dyncol_format format; + uint column_count; + + uchar *entry, *data, *name; + size_t offset; + size_t length; + enum enum_dynamic_column_type type; +}; + +typedef struct st_dyn_header DYN_HEADER; + +static inline my_bool read_fixed_header(DYN_HEADER *hdr, + DYNAMIC_COLUMN *str); +static void set_fixed_header(DYNAMIC_COLUMN *str, + uint offset_size, + uint column_count); + +/* + Calculate entry size (E) and header size (H) by offset size (O) and column + count (C) and fixed part of entry size (F). +*/ + +#define calc_param(E,H,F,O,C) do { \ + (*(E))= (O) + F; \ + (*(H))= (*(E)) * (C); \ +}while(0); + + +/** + Name pool size functions, for numeric format it is 0 +*/ + +static size_t name_size_num(void *keys __attribute__((unused)), + uint i __attribute__((unused))) +{ + return 0; +} + + +/** + Name pool size functions. +*/ +static size_t name_size_named(void *keys, uint i) +{ + return ((LEX_STRING *) keys)[i].length; +} + + +/** + Comparator function for references on column numbers for qsort + (numeric format) +*/ + +static int column_sort_num(const void *a, const void *b) +{ + return **((uint **)a) - **((uint **)b); +} + +/** + Comparator function for references on column numbers for qsort + (names format) +*/ + +int mariadb_dyncol_column_cmp_named(const LEX_STRING *s1, const LEX_STRING *s2) +{ + /* + We compare instead of subtraction to avoid data loss in case of huge + length difference (more then fit in int). + */ + int rc= (s1->length > s2->length ? 1 : + (s1->length < s2->length ? -1 : 0)); + if (rc == 0) + rc= memcmp((void *)s1->str, (void *)s2->str, + (size_t) s1->length); + return rc; +} + + +/** + Comparator function for references on column numbers for qsort + (names format) +*/ + +static int column_sort_named(const void *a, const void *b) +{ + return mariadb_dyncol_column_cmp_named(*((LEX_STRING **)a), + *((LEX_STRING **)b)); +} + + +/** + Check limit function (numeric format) +*/ + +static my_bool check_limit_num(const void *val) +{ + return **((uint **)val) > UINT_MAX16; +} + + +/** + Check limit function (names format) +*/ + +static my_bool check_limit_named(const void *val) +{ + return (*((LEX_STRING **)val))->length > MAX_NAME_LENGTH; +} + + +/** + Write numeric format static header part. +*/ + +static void set_fixed_header_num(DYNAMIC_COLUMN *str, DYN_HEADER *hdr) +{ + set_fixed_header(str, (uint)hdr->offset_size, hdr->column_count); + hdr->header= (uchar *)str->str + FIXED_HEADER_SIZE; + hdr->nmpool= hdr->dtpool= hdr->header + hdr->header_size; +} + + +/** + Write names format static header part. +*/ + +static void set_fixed_header_named(DYNAMIC_COLUMN *str, DYN_HEADER *hdr) +{ + DBUG_ASSERT(hdr->column_count <= 0xffff); + DBUG_ASSERT(hdr->offset_size <= MAX_OFFSET_LENGTH_NM); + /* size of data offset, named format flag, size of names offset (0 means 2) */ + str->str[0]= + (char) (((uchar)str->str[0] & ~(DYNCOL_FLG_OFFSET | DYNCOL_FLG_NMOFFSET)) | + (hdr->offset_size - 2) | DYNCOL_FLG_NAMES); + int2store(str->str + 1, hdr->column_count); /* columns number */ + int2store(str->str + 3, hdr->nmpool_size); + hdr->header= (uchar *)str->str + FIXED_HEADER_SIZE_NM; + hdr->nmpool= hdr->header + hdr->header_size; + hdr->dtpool= hdr->nmpool + hdr->nmpool_size; +} + + +/** + Store offset and type information in the given place + + @param place Beginning of the index entry + @param offset_size Size of offset field in bytes + @param type Type to be written + @param offset Offset to be written +*/ + +static my_bool type_and_offset_store_num(uchar *place, size_t offset_size, + DYNAMIC_COLUMN_TYPE type, + size_t offset) +{ + ulong val = (((ulong) offset) << 3) | (type - 1); + DBUG_ASSERT(type != DYN_COL_NULL); + DBUG_ASSERT(((type - 1) & (~7U)) == 0); /* fit in 3 bits */ + DBUG_ASSERT(offset_size >= 1 && offset_size <= 4); + + /* Index entry starts with column number; jump over it */ + place+= COLUMN_NUMBER_SIZE; + + switch (offset_size) { + case 1: + if (offset >= 0x1f) /* all 1 value is reserved */ + return TRUE; + place[0]= (uchar)val; + break; + case 2: + if (offset >= 0x1fff) /* all 1 value is reserved */ + return TRUE; + int2store(place, val); + break; + case 3: + if (offset >= 0x1fffff) /* all 1 value is reserved */ + return TRUE; + int3store(place, val); + break; + case 4: + if (offset >= 0x1fffffff) /* all 1 value is reserved */ + return TRUE; + int4store(place, val); + break; + default: + return TRUE; + } + return FALSE; +} + + +static my_bool type_and_offset_store_named(uchar *place, size_t offset_size, + DYNAMIC_COLUMN_TYPE type, + size_t offset) +{ + ulonglong val = (((ulong) offset) << 4) | (type - 1); + DBUG_ASSERT(type != DYN_COL_NULL); + DBUG_ASSERT(((type - 1) & (~0xfU)) == 0); /* fit in 4 bits */ + DBUG_ASSERT(offset_size >= 2 && offset_size <= 5); + + /* Index entry starts with name offset; jump over it */ + place+= COLUMN_NAMEPTR_SIZE; + switch (offset_size) { + case 2: + if (offset >= 0xfff) /* all 1 value is reserved */ + return TRUE; + int2store(place, val); + break; + case 3: + if (offset >= 0xfffff) /* all 1 value is reserved */ + return TRUE; + int3store(place, val); + break; + case 4: + if (offset >= 0xfffffff) /* all 1 value is reserved */ + return TRUE; + int4store(place, val); + break; + case 5: +#if SIZEOF_SIZE_T > 4 + if (offset >= 0xfffffffffull) /* all 1 value is reserved */ + return TRUE; +#endif + int5store(place, val); + break; + case 1: + default: + return TRUE; + } + return FALSE; +} + +/** + Write numeric format header entry + 2 bytes - column number + 1-4 bytes - data offset combined with type + + @param hdr descriptor of dynamic column record + @param column_key pointer to uint (column number) + @param value value which will be written (only type used) + @param offset offset of the data +*/ + +static my_bool put_header_entry_num(DYN_HEADER *hdr, + void *column_key, + DYNAMIC_COLUMN_VALUE *value, + size_t offset) +{ + uint *column_number= (uint *)column_key; + int2store(hdr->entry, *column_number); + DBUG_ASSERT(hdr->nmpool_size == 0); + if (type_and_offset_store_num(hdr->entry, hdr->offset_size, + value->type, + offset)) + return TRUE; + hdr->entry= hdr->entry + hdr->entry_size; + return FALSE; +} + + +/** + Write names format header entry + 1 byte - name length + 2 bytes - name offset in the name pool + 1-4 bytes - data offset combined with type + + @param hdr descriptor of dynamic column record + @param column_key pointer to LEX_STRING (column name) + @param value value which will be written (only type used) + @param offset offset of the data +*/ + +static my_bool put_header_entry_named(DYN_HEADER *hdr, + void *column_key, + DYNAMIC_COLUMN_VALUE *value, + size_t offset) +{ + LEX_STRING *column_name= (LEX_STRING *)column_key; + DBUG_ASSERT(column_name->length <= MAX_NAME_LENGTH); + DBUG_ASSERT(hdr->name - hdr->nmpool < (long) 0x10000L); + int2store(hdr->entry, hdr->name - hdr->nmpool); + memcpy(hdr->name, column_name->str, column_name->length); + DBUG_ASSERT(hdr->nmpool_size != 0 || column_name->length == 0); + if (type_and_offset_store_named(hdr->entry, hdr->offset_size, + value->type, + offset)) + return TRUE; + hdr->entry+= hdr->entry_size; + hdr->name+= column_name->length; + return FALSE; +} + + +/** + Calculate length of offset field for given data length + + @param data_length Length of the data segment + + @return number of bytes +*/ + +static size_t dynamic_column_offset_bytes_num(size_t data_length) +{ + if (data_length < 0x1f) /* all 1 value is reserved */ + return 1; + if (data_length < 0x1fff) /* all 1 value is reserved */ + return 2; + if (data_length < 0x1fffff) /* all 1 value is reserved */ + return 3; + if (data_length < 0x1fffffff) /* all 1 value is reserved */ + return 4; + return MAX_OFFSET_LENGTH + 1; /* For an error generation*/ +} + +static size_t dynamic_column_offset_bytes_named(size_t data_length) +{ + if (data_length < 0xfff) /* all 1 value is reserved */ + return 2; + if (data_length < 0xfffff) /* all 1 value is reserved */ + return 3; + if (data_length < 0xfffffff) /* all 1 value is reserved */ + return 4; +#if SIZEOF_SIZE_T > 4 + if (data_length < 0xfffffffffull) /* all 1 value is reserved */ +#endif + return 5; + return MAX_OFFSET_LENGTH_NM + 1; /* For an error generation */ +} + +/** + Read offset and type information from index entry + + @param type Where to put type info + @param offset Where to put offset info + @param place beginning of the type and offset + @param offset_size Size of offset field in bytes +*/ + +static my_bool type_and_offset_read_num(DYNAMIC_COLUMN_TYPE *type, + size_t *offset, + uchar *place, size_t offset_size) +{ + ulong UNINIT_VAR(val); + ulong UNINIT_VAR(lim); + + DBUG_ASSERT(offset_size >= 1 && offset_size <= 4); + + switch (offset_size) { + case 1: + val= (ulong)place[0]; + lim= 0x1f; + break; + case 2: + val= uint2korr(place); + lim= 0x1fff; + break; + case 3: + val= uint3korr(place); + lim= 0x1fffff; + break; + case 4: + val= uint4korr(place); + lim= 0x1fffffff; + break; + default: + DBUG_ASSERT(0); /* impossible */ + return 1; + } + *type= (val & 0x7) + 1; + *offset= val >> 3; + return (*offset >= lim); +} + +static my_bool type_and_offset_read_named(DYNAMIC_COLUMN_TYPE *type, + size_t *offset, + uchar *place, size_t offset_size) +{ + ulonglong UNINIT_VAR(val); + ulonglong UNINIT_VAR(lim); + DBUG_ASSERT(offset_size >= 2 && offset_size <= 5); + + switch (offset_size) { + case 2: + val= uint2korr(place); + lim= 0xfff; + break; + case 3: + val= uint3korr(place); + lim= 0xfffff; + break; + case 4: + val= uint4korr(place); + lim= 0xfffffff; + break; + case 5: + val= uint5korr(place); + lim= 0xfffffffffull; + break; + case 1: + default: + DBUG_ASSERT(0); /* impossible */ + return 1; + } + *type= (val & 0xf) + 1; + *offset= (size_t) (val >> 4); + return (*offset >= lim); +} + +/** + Format descriptor, contain constants and function references for + format processing +*/ + +struct st_service_funcs +{ + /* size of fixed header */ + uint fixed_hdr; + /* size of fixed part of header entry */ + uint fixed_hdr_entry; + + /*size of array element which stores keys */ + uint key_size_in_array; + + /* Maximum data offset size in bytes */ + size_t max_offset_size; + + size_t (*name_size) + (void *, uint); + int (*column_sort) + (const void *a, const void *b); + my_bool (*check_limit) + (const void *val); + void (*set_fixed_hdr) + (DYNAMIC_COLUMN *str, DYN_HEADER *hdr); + my_bool (*put_header_entry)(DYN_HEADER *hdr, + void *column_key, + DYNAMIC_COLUMN_VALUE *value, + size_t offset); + int (*plan_sort)(const void *a, const void *b); + size_t (*dynamic_column_offset_bytes)(size_t data_length); + my_bool (*type_and_offset_read)(DYNAMIC_COLUMN_TYPE *type, + size_t *offset, + uchar *place, size_t offset_size); + +}; + + +/** + Actual our 2 format descriptors +*/ + +static struct st_service_funcs fmt_data[2]= +{ + { + FIXED_HEADER_SIZE, + COLUMN_NUMBER_SIZE, + sizeof(uint), + MAX_OFFSET_LENGTH, + &name_size_num, + &column_sort_num, + &check_limit_num, + &set_fixed_header_num, + &put_header_entry_num, + &plan_sort_num, + &dynamic_column_offset_bytes_num, + &type_and_offset_read_num + }, + { + FIXED_HEADER_SIZE_NM, + COLUMN_NAMEPTR_SIZE, + sizeof(LEX_STRING), + MAX_OFFSET_LENGTH_NM, + &name_size_named, + &column_sort_named, + &check_limit_named, + &set_fixed_header_named, + &put_header_entry_named, + &plan_sort_named, + &dynamic_column_offset_bytes_named, + &type_and_offset_read_named + } +}; + + +/** + Read dynamic column record header and fill the descriptor + + @param hdr dynamic columns record descriptor to fill + @param str dynamic columns record + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +init_read_hdr(DYN_HEADER *hdr, DYNAMIC_COLUMN *str) +{ + if (read_fixed_header(hdr, str)) + return ER_DYNCOL_FORMAT; + hdr->header= (uchar*)str->str + fmt_data[hdr->format].fixed_hdr; + calc_param(&hdr->entry_size, &hdr->header_size, + fmt_data[hdr->format].fixed_hdr_entry, hdr->offset_size, + hdr->column_count); + hdr->nmpool= hdr->header + hdr->header_size; + hdr->dtpool= hdr->nmpool + hdr->nmpool_size; + hdr->data_size= str->length - fmt_data[hdr->format].fixed_hdr - + hdr->header_size - hdr->nmpool_size; + hdr->data_end= (uchar*)str->str + str->length; + return ER_DYNCOL_OK; +} + + +/** + Initialize dynamic column string with (make it empty but correct format) + + @param str The string to initialize + @param size Amount of preallocated memory for the string. + + @retval FALSE OK + @retval TRUE error +*/ + +static my_bool dynamic_column_init_named(DYNAMIC_COLUMN *str, size_t size) +{ + DBUG_ASSERT(size != 0); + + /* + Make string with no fields (empty header) + - First \0 is flags + - other 2 \0 is number of fields + */ + if (init_dynamic_string(str, NULL, size, DYNCOL_SYZERESERVE)) + return TRUE; + return FALSE; +} + + +/** + Calculate how many bytes needed to store val as variable length integer + where first bit indicate continuation of the sequence. + + @param val The value for which we are calculating length + + @return number of bytes +*/ + +static size_t dynamic_column_var_uint_bytes(ulonglong val) +{ + size_t len= 0; + do + { + len++; + val>>= 7; + } while (val); + return len; +} + + +/** + Stores variable length unsigned integer value to a string + + @param str The string where to append the value + @param val The value to put in the string + + @return ER_DYNCOL_* return code + + @notes + This is used to store a number together with other data in the same + object. (Like decimals, length of string etc) + (As we don't know the length of this object, we can't store 0 in 0 bytes) +*/ + +static enum enum_dyncol_func_result +dynamic_column_var_uint_store(DYNAMIC_COLUMN *str, ulonglong val) +{ + if (dynstr_realloc(str, 10)) /* max what we can use */ + return ER_DYNCOL_RESOURCE; + + do + { + ulonglong rest= val >> 7; + str->str[str->length++]= ((val & 0x7f) | (rest ? 0x80 : 0x00)); + val= rest; + } while (val); + return ER_DYNCOL_OK; +} + + +/** + Reads variable length unsigned integer value from a string + + @param data The string from which the int should be read + @param data_length Max length of data + @param len Where to put length of the string read in bytes + + @return value of the unsigned integer read from the string + + In case of error, *len is set to 0 +*/ + +static ulonglong +dynamic_column_var_uint_get(uchar *data, size_t data_length, + size_t *len) +{ + ulonglong val= 0; + uint length; + uchar *end= data + data_length; + + for (length=0; data < end ; data++) + { + val+= (((ulonglong)((*data) & 0x7f)) << (length * 7)); + length++; + if (!((*data) & 0x80)) + { + /* End of data */ + *len= length; + return val; + } + } + /* Something was wrong with data */ + *len= 0; /* Mark error */ + return 0; +} + + +/** + Calculate how many bytes needed to store val as unsigned. + + @param val The value for which we are calculating length + + @return number of bytes (0-8) +*/ + +static size_t dynamic_column_uint_bytes(ulonglong val) +{ + size_t len; + + for (len= 0; val ; val>>= 8, len++) + ; + return len; +} + + +/** + Append the string with given unsigned int value. + + @param str The string where to put the value + @param val The value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_uint_store(DYNAMIC_COLUMN *str, ulonglong val) +{ + if (dynstr_realloc(str, 8)) /* max what we can use */ + return ER_DYNCOL_RESOURCE; + + for (; val; val>>= 8) + str->str[str->length++]= (char) (val & 0xff); + return ER_DYNCOL_OK; +} + + +/** + Read unsigned int value of given length from the string + + @param store_it_here The structure to store the value + @param data The string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_uint_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + ulonglong value= 0; + size_t i; + + for (i= 0; i < length; i++) + value+= ((ulonglong)data[i]) << (i*8); + + store_it_here->x.ulong_value= value; + return ER_DYNCOL_OK; +} + +/** + Calculate how many bytes needed to store val as signed in following encoding: + 0 -> 0 + -1 -> 1 + 1 -> 2 + -2 -> 3 + 2 -> 4 + ... + + @param val The value for which we are calculating length + + @return number of bytes +*/ + +static size_t dynamic_column_sint_bytes(longlong val) +{ + return dynamic_column_uint_bytes((((ulonglong) val) << 1) ^ + (val < 0 ? 0xffffffffffffffffull : 0)); +} + + +/** + Append the string with given signed int value. + + @param str the string where to put the value + @param val the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_sint_store(DYNAMIC_COLUMN *str, longlong val) +{ + return dynamic_column_uint_store(str, + (((ulonglong) val) << 1) ^ + (val < 0 ? 0xffffffffffffffffULL : 0)); +} + + +/** + Read signed int value of given length from the string + + @param store_it_here The structure to store the value + @param data The string which should be read + @param length The length (in bytes) of the value in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_sint_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + ulonglong val; + dynamic_column_uint_read(store_it_here, data, length); + val= store_it_here->x.ulong_value; + if (val & 1) + val= (val >> 1) ^ 0xffffffffffffffffULL; + else + val>>= 1; + store_it_here->x.long_value= (longlong) val; + return ER_DYNCOL_OK; +} + + +/** + Calculate how many bytes needed to store the value. + + @param value The value for which we are calculating length + + @return + Error: (size_t) ~0 + ok number of bytes +*/ + +static size_t +dynamic_column_value_len(DYNAMIC_COLUMN_VALUE *value, + enum enum_dyncol_format format) +{ + switch (value->type) { + case DYN_COL_NULL: + return 0; + case DYN_COL_INT: + return dynamic_column_sint_bytes(value->x.long_value); + case DYN_COL_UINT: + return dynamic_column_uint_bytes(value->x.ulong_value); + case DYN_COL_DOUBLE: + return 8; + case DYN_COL_STRING: + return (dynamic_column_var_uint_bytes(value->x.string.charset->number) + + value->x.string.value.length); + case DYN_COL_DECIMAL: + { + int precision= value->x.decimal.value.intg + value->x.decimal.value.frac; + int scale= value->x.decimal.value.frac; + + if (precision == 0 || decimal_is_zero(&value->x.decimal.value)) + { + /* This is here to simplify dynamic_column_decimal_store() */ + value->x.decimal.value.intg= value->x.decimal.value.frac= 0; + return 0; + } + /* + Check if legal decimal; This is needed to not get an assert in + decimal_bin_size(). However this should be impossible as all + decimals entered here should be valid and we have the special check + above to handle the unlikely but possible case that decimal.value.intg + and decimal.frac is 0. + */ + if (scale < 0 || precision <= 0) + { + DBUG_ASSERT(0); /* Impossible */ + return (size_t) ~0; + } + return (dynamic_column_var_uint_bytes(value->x.decimal.value.intg) + + dynamic_column_var_uint_bytes(value->x.decimal.value.frac) + + decimal_bin_size(precision, scale)); + } + case DYN_COL_DATETIME: + if (format == dyncol_fmt_num || value->x.time_value.second_part) + /* date+time in bits: 14 + 4 + 5 + 10 + 6 + 6 + 20 + 1 66bits ~= 9 bytes*/ + return 9; + else + return 6; + case DYN_COL_DATE: + /* date in dits: 14 + 4 + 5 = 23bits ~= 3bytes*/ + return 3; + case DYN_COL_TIME: + if (format == dyncol_fmt_num || value->x.time_value.second_part) + /* time in bits: 10 + 6 + 6 + 20 + 1 = 43bits ~= 6bytes*/ + return 6; + else + return 3; + case DYN_COL_DYNCOL: + return value->x.string.value.length; + } + DBUG_ASSERT(0); + return 0; +} + + +/** + Append double value to a string + + @param str the string where to put the value + @param val the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_double_store(DYNAMIC_COLUMN *str, double val) +{ + if (dynstr_realloc(str, 8)) + return ER_DYNCOL_RESOURCE; + float8store(str->str + str->length, val); + str->length+= 8; + return ER_DYNCOL_OK; +} + + +/** + Read double value of given length from the string + + @param store_it_here The structure to store the value + @param data The string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_double_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + if (length != 8) + return ER_DYNCOL_FORMAT; + float8get(store_it_here->x.double_value, data); + return ER_DYNCOL_OK; +} + + +/** + Append the string with given string value. + + @param str the string where to put the value + @param val the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_string_store(DYNAMIC_COLUMN *str, LEX_STRING *string, + CHARSET_INFO *charset) +{ + enum enum_dyncol_func_result rc; + if ((rc= dynamic_column_var_uint_store(str, charset->number))) + return rc; + if (dynstr_append_mem(str, string->str, string->length)) + return ER_DYNCOL_RESOURCE; + return ER_DYNCOL_OK; +} + +/** + Append the string with given string value. + + @param str the string where to put the value + @param val the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_dyncol_store(DYNAMIC_COLUMN *str, LEX_STRING *string) +{ + if (dynstr_append_mem(str, string->str, string->length)) + return ER_DYNCOL_RESOURCE; + return ER_DYNCOL_OK; +} + +/** + Read string value of given length from the packed string + + @param store_it_here The structure to store the value + @param data The packed string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_string_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + size_t len; + uint charset_nr= (uint)dynamic_column_var_uint_get(data, length, &len); + if (len == 0) /* Wrong packed number */ + return ER_DYNCOL_FORMAT; + store_it_here->x.string.charset= get_charset(charset_nr, MYF(MY_WME)); + if (store_it_here->x.string.charset == NULL) + return ER_DYNCOL_UNKNOWN_CHARSET; + data+= len; + store_it_here->x.string.value.length= (length-= len); + store_it_here->x.string.value.str= (char*) data; + return ER_DYNCOL_OK; +} + +/** + Read Dynamic columns packet string value of given length + from the packed string + + @param store_it_here The structure to store the value + @param data The packed string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_dyncol_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + store_it_here->x.string.charset= &my_charset_bin; + store_it_here->x.string.value.length= length; + store_it_here->x.string.value.str= (char*) data; + return ER_DYNCOL_OK; +} + +/** + Append the string with given decimal value. + + @param str the string where to put the value + @param val the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_decimal_store(DYNAMIC_COLUMN *str, + decimal_t *value) +{ + uint bin_size; + int precision= value->intg + value->frac; + + /* Store decimal zero as empty string */ + if (precision == 0) + return ER_DYNCOL_OK; + + bin_size= decimal_bin_size(precision, value->frac); + if (dynstr_realloc(str, bin_size + 20)) + return ER_DYNCOL_RESOURCE; + + /* The following can't fail as memory is already allocated */ + (void) dynamic_column_var_uint_store(str, value->intg); + (void) dynamic_column_var_uint_store(str, value->frac); + + decimal2bin(value, (uchar *) str->str + str->length, + precision, value->frac); + str->length+= bin_size; + return ER_DYNCOL_OK; +} + + +/** + Prepare the value to be used as decimal. + + @param value The value structure which sould be setup. +*/ + +void mariadb_dyncol_prepare_decimal(DYNAMIC_COLUMN_VALUE *value) +{ + value->x.decimal.value.buf= value->x.decimal.buffer; + value->x.decimal.value.len= DECIMAL_BUFF_LENGTH; + /* just to be safe */ + value->type= DYN_COL_DECIMAL; + decimal_make_zero(&value->x.decimal.value); +} + +void dynamic_column_prepare_decimal(DYNAMIC_COLUMN_VALUE *value) +{ + mariadb_dyncol_prepare_decimal(value); +} + + + +/** + Read decimal value of given length from the string + + @param store_it_here The structure to store the value + @param data The string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_decimal_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + size_t intg_len, frac_len; + int intg, frac, precision, scale; + + dynamic_column_prepare_decimal(store_it_here); + /* Decimals 0.0 is stored as a zero length string */ + if (length == 0) + return ER_DYNCOL_OK; /* value contains zero */ + + intg= (int)dynamic_column_var_uint_get(data, length, &intg_len); + data+= intg_len; + frac= (int)dynamic_column_var_uint_get(data, length - intg_len, &frac_len); + data+= frac_len; + + /* Check the size of data is correct */ + precision= intg + frac; + scale= frac; + if (scale < 0 || precision <= 0 || scale > precision || + (length - intg_len - frac_len) > + (size_t) (DECIMAL_BUFF_LENGTH*sizeof(decimal_digit_t)) || + decimal_bin_size(intg + frac, frac) != + (uint) (length - intg_len - frac_len)) + return ER_DYNCOL_FORMAT; + + if (bin2decimal(data, &store_it_here->x.decimal.value, precision, scale) != + E_DEC_OK) + return ER_DYNCOL_FORMAT; + return ER_DYNCOL_OK; +} + + +/** + Append the string with given datetime value. + + @param str the string where to put the value + @param value the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_date_time_store(DYNAMIC_COLUMN *str, MYSQL_TIME *value, + enum enum_dyncol_format format) +{ + enum enum_dyncol_func_result rc; + /* + 0<----year---->00000!<-hours--><---microseconds---> + 12345678901234123412345 1123456789012345612345612345678901234567890 + <123456><123456><123456><123456><123456><123456><123456><123456><123456> + */ + if ((rc= dynamic_column_date_store(str, value)) || + (rc= dynamic_column_time_store(str, value, format))) + return rc; + return ER_DYNCOL_OK; +} + + +/** + Read datetime value of given length from the packed string + + @param store_it_here The structure to store the value + @param data The packed string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_date_time_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + enum enum_dyncol_func_result rc= ER_DYNCOL_FORMAT; + /* + 0<----year---->00000!<-hours--><---microseconds---> + 12345678901234123412345 1123456789012345612345612345678901234567890 + <123456><123456><123456><123456><123456><123456><123456><123456><123456> + */ + if (length != 9 && length != 6) + goto err; + store_it_here->x.time_value.time_type= MYSQL_TIMESTAMP_DATETIME; + if ((rc= dynamic_column_date_read_internal(store_it_here, data, 3)) || + (rc= dynamic_column_time_read_internal(store_it_here, data + 3, + length - 3))) + goto err; + return ER_DYNCOL_OK; + +err: + store_it_here->x.time_value.time_type= MYSQL_TIMESTAMP_ERROR; + return rc; +} + + +/** + Append the string with given time value. + + @param str the string where to put the value + @param value the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_time_store(DYNAMIC_COLUMN *str, MYSQL_TIME *value, + enum enum_dyncol_format format) +{ + uchar *buf; + if (dynstr_realloc(str, 6)) + return ER_DYNCOL_RESOURCE; + + buf= ((uchar *)str->str) + str->length; + + if (value->time_type == MYSQL_TIMESTAMP_NONE || + value->time_type == MYSQL_TIMESTAMP_ERROR || + value->time_type == MYSQL_TIMESTAMP_DATE) + { + value->neg= 0; + value->second_part= 0; + value->hour= 0; + value->minute= 0; + value->second= 0; + } + DBUG_ASSERT(value->hour <= 838); + DBUG_ASSERT(value->minute <= 59); + DBUG_ASSERT(value->second <= 59); + DBUG_ASSERT(value->second_part <= 999999); + if (format == dyncol_fmt_num || value->second_part) + { + /* + 00000!<-hours--><---microseconds---> + 1123456789012345612345612345678901234567890 + <123456><123456><123456><123456><123456><123456> + */ + buf[0]= (value->second_part & 0xff); + buf[1]= ((value->second_part & 0xff00) >> 8); + buf[2]= (uchar)(((value->second & 0xf) << 4) | + ((value->second_part & 0xf0000) >> 16)); + buf[3]= ((value->minute << 2) | ((value->second & 0x30) >> 4)); + buf[4]= (value->hour & 0xff); + buf[5]= ((value->neg ? 0x4 : 0) | (value->hour >> 8)); + str->length+= 6; + } + else + { + /* + !<-hours--> + 11234567890123456123456 + <123456><123456><123456> + */ + buf[0]= (value->second) | ((value->minute & 0x3) << 6); + buf[1]= (value->minute >> 2) | ((value->hour & 0xf) << 4); + buf[2]= (value->hour >> 4) | (value->neg ? 0x80 : 0); + str->length+= 3; + } + + return ER_DYNCOL_OK; +} + + +/** + Read time value of given length from the packed string + + @param store_it_here The structure to store the value + @param data The packed string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_time_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + store_it_here->x.time_value.year= store_it_here->x.time_value.month= + store_it_here->x.time_value.day= 0; + store_it_here->x.time_value.time_type= MYSQL_TIMESTAMP_TIME; + return dynamic_column_time_read_internal(store_it_here, data, length); +} + +/** + Internal function for reading time part from the string. + + @param store_it_here The structure to store the value + @param data The packed string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_time_read_internal(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + if (length != 6 && length != 3) + goto err; + if (length == 6) + { + /* + 00000!<-hours--><---microseconds---> + 1123456789012345612345612345678901234567890 + <123456><123456><123456><123456><123456><123456> + */ + store_it_here->x.time_value.second_part= (data[0] | + (data[1] << 8) | + ((data[2] & 0xf) << 16)); + store_it_here->x.time_value.second= ((data[2] >> 4) | + ((data[3] & 0x3) << 4)); + store_it_here->x.time_value.minute= (data[3] >> 2); + store_it_here->x.time_value.hour= (((((uint)data[5]) & 0x3 ) << 8) | data[4]); + store_it_here->x.time_value.neg= ((data[5] & 0x4) ? 1 : 0); + } + else + { + /* + !<-hours--> + 11234567890123456123456 + <123456><123456><123456> + */ + store_it_here->x.time_value.second_part= 0; + store_it_here->x.time_value.second= (data[0] & 0x3f); + store_it_here->x.time_value.minute= (data[0] >> 6) | ((data[1] & 0xf) << 2); + store_it_here->x.time_value.hour= (data[1] >> 4) | ((data[2] & 0x3f) << 4); + store_it_here->x.time_value.neg= ((data[2] & 0x80) ? 1 : 0); + } + if (store_it_here->x.time_value.second > 59 || + store_it_here->x.time_value.minute > 59 || + store_it_here->x.time_value.hour > 838 || + store_it_here->x.time_value.second_part > 999999) + goto err; + return ER_DYNCOL_OK; + +err: + store_it_here->x.time_value.time_type= MYSQL_TIMESTAMP_ERROR; + return ER_DYNCOL_FORMAT; +} + + +/** + Append the string with given date value. + + @param str the string where to put the value + @param value the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_date_store(DYNAMIC_COLUMN *str, MYSQL_TIME *value) +{ + uchar *buf; + if (dynstr_realloc(str, 3)) + return ER_DYNCOL_RESOURCE; + + buf= ((uchar *)str->str) + str->length; + if (value->time_type == MYSQL_TIMESTAMP_NONE || + value->time_type == MYSQL_TIMESTAMP_ERROR || + value->time_type == MYSQL_TIMESTAMP_TIME) + value->year= value->month= value->day = 0; + DBUG_ASSERT(value->year <= 9999); + DBUG_ASSERT(value->month <= 12); + DBUG_ASSERT(value->day <= 31); + /* + 0<----year----> + 012345678901234123412345 + <123456><123456><123456> + */ + buf[0]= (value->day | + ((value->month & 0x7) << 5)); + buf[1]= ((value->month >> 3) | ((value->year & 0x7F) << 1)); + buf[2]= (value->year >> 7); + str->length+= 3; + return ER_DYNCOL_OK; +} + + + +/** + Read date value of given length from the packed string + + @param store_it_here The structure to store the value + @param data The packed string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_date_read(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, size_t length) +{ + store_it_here->x.time_value.neg= 0; + store_it_here->x.time_value.second_part= 0; + store_it_here->x.time_value.hour= 0; + store_it_here->x.time_value.minute= 0; + store_it_here->x.time_value.second= 0; + store_it_here->x.time_value.time_type= MYSQL_TIMESTAMP_DATE; + return dynamic_column_date_read_internal(store_it_here, data, length); +} + +/** + Internal function for reading date part from the string. + + @param store_it_here The structure to store the value + @param data The packed string which should be read + @param length The length (in bytes) of the value in nthe string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_date_read_internal(DYNAMIC_COLUMN_VALUE *store_it_here, + uchar *data, + size_t length) +{ + if (length != 3) + goto err; + /* + 0<----year----> + 12345678901234123412345 + <123456><123456><123456> + */ + store_it_here->x.time_value.day= (data[0] & 0x1f); + store_it_here->x.time_value.month= (((data[1] & 0x1) << 3) | + (data[0] >> 5)); + store_it_here->x.time_value.year= ((((uint)data[2]) << 7) | + (data[1] >> 1)); + if (store_it_here->x.time_value.day > 31 || + store_it_here->x.time_value.month > 12 || + store_it_here->x.time_value.year > 9999) + goto err; + return ER_DYNCOL_OK; + +err: + store_it_here->x.time_value.time_type= MYSQL_TIMESTAMP_ERROR; + return ER_DYNCOL_FORMAT; +} + + +/** + Append the string with given value. + + @param str the string where to put the value + @param value the value to put in the string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +data_store(DYNAMIC_COLUMN *str, DYNAMIC_COLUMN_VALUE *value, + enum enum_dyncol_format format) +{ + switch (value->type) { + case DYN_COL_INT: + return dynamic_column_sint_store(str, value->x.long_value); + case DYN_COL_UINT: + return dynamic_column_uint_store(str, value->x.ulong_value); + case DYN_COL_DOUBLE: + return dynamic_column_double_store(str, value->x.double_value); + case DYN_COL_STRING: + return dynamic_column_string_store(str, &value->x.string.value, + value->x.string.charset); + case DYN_COL_DECIMAL: + return dynamic_column_decimal_store(str, &value->x.decimal.value); + case DYN_COL_DATETIME: + /* date+time in bits: 14 + 4 + 5 + 5 + 6 + 6 40bits = 5 bytes */ + return dynamic_column_date_time_store(str, &value->x.time_value, format); + case DYN_COL_DATE: + /* date in dits: 14 + 4 + 5 = 23bits ~= 3bytes*/ + return dynamic_column_date_store(str, &value->x.time_value); + case DYN_COL_TIME: + /* time in bits: 5 + 6 + 6 = 17bits ~= 3bytes*/ + return dynamic_column_time_store(str, &value->x.time_value, format); + case DYN_COL_DYNCOL: + return dynamic_column_dyncol_store(str, &value->x.string.value); + case DYN_COL_NULL: + break; /* Impossible */ + } + DBUG_ASSERT(0); + return ER_DYNCOL_OK; /* Impossible */ +} + + +/** + Write information to the fixed header + + @param str String where to write the header + @param offset_size Size of offset field in bytes + @param column_count Number of columns +*/ + +static void set_fixed_header(DYNAMIC_COLUMN *str, + uint offset_size, + uint column_count) +{ + DBUG_ASSERT(column_count <= 0xffff); + DBUG_ASSERT(offset_size <= MAX_OFFSET_LENGTH); + str->str[0]= ((str->str[0] & ~DYNCOL_FLG_OFFSET) | + (offset_size - 1)); /* size of offset */ + int2store(str->str + 1, column_count); /* columns number */ + DBUG_ASSERT((str->str[0] & (~DYNCOL_FLG_KNOWN)) == 0); +} + +/** + Adds columns into the empty string + + @param str String where to write the data (the record) + @param hdr Dynamic columns record descriptor + @param column_count Number of columns in the arrays + @param column_keys Array of columns keys (uint or LEX_STRING) + @param values Array of columns values + @param new_str True if we need to allocate new string + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_new_column_store(DYNAMIC_COLUMN *str, + DYN_HEADER *hdr, + uint column_count, + void *column_keys, + DYNAMIC_COLUMN_VALUE *values, + my_bool new_str) +{ + struct st_service_funcs *fmt= fmt_data + hdr->format; + void **UNINIT_VAR(columns_order); + uchar *element; + uint i; + enum enum_dyncol_func_result rc= ER_DYNCOL_RESOURCE; + size_t all_headers_size; + + if (column_count && !(columns_order= malloc(sizeof(void*)*column_count))) + return ER_DYNCOL_RESOURCE; + if (new_str || str->str == 0) + { + if (column_count) + { + if (dynamic_column_init_named(str, + fmt->fixed_hdr + + hdr->header_size + + hdr->nmpool_size + + hdr->data_size + + DYNCOL_SYZERESERVE)) + goto err; + } + else + { + mariadb_dyncol_init(str); + } + } + else + { + str->length= 0; + if (dynstr_realloc(str, + fmt->fixed_hdr + + hdr->header_size + + hdr->nmpool_size + + hdr->data_size + + DYNCOL_SYZERESERVE)) + goto err; + } + if (!column_count) + return ER_DYNCOL_OK; + + bzero(str->str, fmt->fixed_hdr); + str->length= fmt->fixed_hdr; + + /* sort columns for the header */ + for (i= 0, element= (uchar *) column_keys; + i < column_count; + i++, element+= fmt->key_size_in_array) + columns_order[i]= (void *)element; + qsort(columns_order, (size_t)column_count, sizeof(void*), fmt->column_sort); + + /* + For now we don't allow creating two columns with the same number + at the time of create. This can be fixed later to just use the later + by comparing the pointers. + */ + for (i= 0; i < column_count - 1; i++) + { + if ((*fmt->check_limit)(&columns_order[i]) || + (*fmt->column_sort)(&columns_order[i], &columns_order[i + 1]) == 0) + { + rc= ER_DYNCOL_DATA; + goto err; + } + } + if ((*fmt->check_limit)(&columns_order[i])) + { + rc= ER_DYNCOL_DATA; + goto err; + } + + (*fmt->set_fixed_hdr)(str, hdr); + /* reserve place for header and name pool */ + str->length+= hdr->header_size + hdr->nmpool_size; + + hdr->entry= hdr->header; + hdr->name= hdr->nmpool; + all_headers_size= fmt->fixed_hdr + hdr->header_size + hdr->nmpool_size; + for (i= 0; i < column_count; i++) + { + uint ord= (uint)(((uchar*)columns_order[i] - (uchar*)column_keys) / + fmt->key_size_in_array); + if (values[ord].type != DYN_COL_NULL) + { + /* Store header first in the str */ + if ((*fmt->put_header_entry)(hdr, columns_order[i], values + ord, + str->length - all_headers_size)) + { + rc= ER_DYNCOL_FORMAT; + goto err; + } + + /* Store value in 'str + str->length' and increase str->length */ + if ((rc= data_store(str, values + ord, hdr->format))) + goto err; + } + } + rc= ER_DYNCOL_OK; +err: + free(columns_order); + return rc; +} + +/** + Calculate size of header, name pool and data pool + + @param hdr descriptor of dynamic column record + @param column_count number of elements in arrays + @param column_count Number of columns in the arrays + @param column_keys Array of columns keys (uint or LEX_STRING) + @param values Array of columns values + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +calc_var_sizes(DYN_HEADER *hdr, + uint column_count, + void *column_keys, + DYNAMIC_COLUMN_VALUE *values) +{ + struct st_service_funcs *fmt= fmt_data + hdr->format; + uint i; + hdr->nmpool_size= hdr->data_size= 0; + hdr->column_count= 0; + for (i= 0; i < column_count; i++) + { + if (values[i].type != DYN_COL_NULL) + { + size_t tmp; + hdr->column_count++; + hdr->data_size+= (tmp= dynamic_column_value_len(values + i, + hdr->format)); + if (tmp == (size_t) ~0) + return ER_DYNCOL_DATA; + hdr->nmpool_size+= (*fmt->name_size)(column_keys, i); + } + } + /* + We can handle data up to 0x1fffffff (old format) and + 0xfffffffff (new format) bytes now. + */ + if ((hdr->offset_size= fmt->dynamic_column_offset_bytes(hdr->data_size)) >= + fmt->max_offset_size) + return ER_DYNCOL_LIMIT; + + /* header entry is column number or string pointer + offset & type */ + hdr->entry_size= fmt->fixed_hdr_entry + hdr->offset_size; + hdr->header_size= hdr->column_count * hdr->entry_size; + return ER_DYNCOL_OK; +} + +/** + Create packed string which contains given columns (internal multi format) + + @param str String where to write the data + @param column_count Number of columns in the arrays + @param column_keys Array of columns keys (format dependent) + @param values Array of columns values + @param new_str True if we need allocate new string + @param string_keys keys are strings + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_create_many_internal_fmt(DYNAMIC_COLUMN *str, + uint column_count, + void *column_keys, + DYNAMIC_COLUMN_VALUE *values, + my_bool new_str, + my_bool string_keys) +{ + DYN_HEADER header; + enum enum_dyncol_func_result rc; + bzero(&header, sizeof(header)); + header.format= (string_keys ? 1 : 0); + + if (new_str) + { + /* to make dynstr_free() working in case of errors */ + mariadb_dyncol_init(str); + } + + if ((rc= calc_var_sizes(&header, column_count, column_keys, values)) < 0) + return rc; + + return dynamic_new_column_store(str, &header, + column_count, + column_keys, values, + new_str); +} + + +/** + Create packed string which contains given columns + + @param str String where to write the data + @param column_count Number of columns in the arrays + @param column_numbers Array of columns numbers + @param values Array of columns values + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +dynamic_column_create_many(DYNAMIC_COLUMN *str, + uint column_count, + uint *column_numbers, + DYNAMIC_COLUMN_VALUE *values) +{ + DBUG_ENTER("dynamic_column_create_many"); + DBUG_RETURN(dynamic_column_create_many_internal_fmt(str, column_count, + column_numbers, values, + TRUE, FALSE)); +} + +/** + Create packed string which contains given columns + + @param str String where to write the data + @param column_count Number of columns in the arrays + @param column_numbers Array of columns numbers + @param values Array of columns values + @param new_string True if we need allocate new string + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +mariadb_dyncol_create_many_num(DYNAMIC_COLUMN *str, + uint column_count, + uint *column_numbers, + DYNAMIC_COLUMN_VALUE *values, + my_bool new_string) +{ + DBUG_ENTER("mariadb_dyncol_create_many_num"); + DBUG_RETURN(dynamic_column_create_many_internal_fmt(str, column_count, + column_numbers, values, + new_string, FALSE)); +} + +/** + Create packed string which contains given columns + + @param str String where to write the data + @param column_count Number of columns in the arrays + @param column_keys Array of columns keys + @param values Array of columns value + @param new_string True if we need allocate new string + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +mariadb_dyncol_create_many_named(DYNAMIC_COLUMN *str, + uint column_count, + LEX_STRING *column_keys, + DYNAMIC_COLUMN_VALUE *values, + my_bool new_string) +{ + DBUG_ENTER("mariadb_dyncol_create_many_named"); + DBUG_RETURN(dynamic_column_create_many_internal_fmt(str, column_count, + column_keys, values, + new_string, TRUE)); +} + +/** + Create packed string which contains given column + + @param str String where to write the data + @param column_number Column number + @param value The columns value + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +dynamic_column_create(DYNAMIC_COLUMN *str, uint column_nr, + DYNAMIC_COLUMN_VALUE *value) +{ + DBUG_ENTER("dynamic_column_create"); + DBUG_RETURN(dynamic_column_create_many(str, 1, &column_nr, value)); +} + + +/** + Calculate length of data between given two header entries + + @param entry Pointer to the first entry + @param entry_next Pointer to the last entry + @param header_end Pointer to the header end + @param offset_size Size of offset field in bytes + @param last_offset Size of the data segment + + @return number of bytes +*/ + +static size_t get_length_interval(uchar *entry, uchar *entry_next, + uchar *header_end, size_t offset_size, + size_t last_offset) +{ + size_t offset, offset_next; + DYNAMIC_COLUMN_TYPE type, type_next; + DBUG_ASSERT(entry < entry_next); + + if (type_and_offset_read_num(&type, &offset, entry + COLUMN_NUMBER_SIZE, + offset_size)) + return DYNCOL_OFFSET_ERROR; + if (entry_next >= header_end) + return (last_offset - offset); + if (type_and_offset_read_num(&type_next, &offset_next, + entry_next + COLUMN_NUMBER_SIZE, offset_size) || + (offset_next > last_offset)) + return DYNCOL_OFFSET_ERROR; + return (offset_next - offset); +} + + +/** + Calculate length of data between given hdr->entry and next_entry + + @param hdr descriptor of dynamic column record + @param next_entry next header entry (can point just after last header + entry) + + @return number of bytes +*/ + +static size_t hdr_interval_length(DYN_HEADER *hdr, uchar *next_entry) +{ + struct st_service_funcs *fmt= fmt_data + hdr->format; + size_t next_entry_offset; + DYNAMIC_COLUMN_TYPE next_entry_type; + DBUG_ASSERT(hdr->entry < next_entry); + DBUG_ASSERT(hdr->entry >= hdr->header); + DBUG_ASSERT(next_entry <= hdr->header + hdr->header_size); + + if ((*fmt->type_and_offset_read)(&hdr->type, &hdr->offset, + hdr->entry + fmt->fixed_hdr_entry, + hdr->offset_size) || + hdr->data_size < hdr->offset) + return DYNCOL_OFFSET_ERROR; + if (next_entry == hdr->header + hdr->header_size) + return hdr->data_size - hdr->offset; + if ((*fmt->type_and_offset_read)(&next_entry_type, &next_entry_offset, + next_entry + fmt->fixed_hdr_entry, + hdr->offset_size) || + hdr->data_size < next_entry_offset) + return DYNCOL_OFFSET_ERROR; + return (next_entry_offset - hdr->offset); +} + + +/** + Comparator function for references to header entries for qsort +*/ + +static int header_compar_num(const void *a, const void *b) +{ + uint va= uint2korr((uchar*)a), vb= uint2korr((uchar*)b); + return (va > vb ? 1 : (va < vb ? -1 : 0)); +} + + +/** + Find entry in the numeric format header by the column number + + @param hdr descriptor of dynamic column record + @param key number to find + + @return pointer to the entry or NULL +*/ + +static uchar *find_entry_num(DYN_HEADER *hdr, uint key) +{ + uchar header_entry[2+4]; + DBUG_ASSERT(hdr->format == dyncol_fmt_num); + int2store(header_entry, key); + return hdr->entry= bsearch(header_entry, hdr->header, + (size_t)hdr->column_count, + hdr->entry_size, &header_compar_num); +} + + +/** + Read name from header entry + + @param hdr descriptor of dynamic column record + @param entry pointer to the header entry + @param name where to put name + + @return 0 ok + @return 1 error in data +*/ + +static my_bool read_name(DYN_HEADER *hdr, uchar *entry, LEX_STRING *name) +{ + size_t nmoffset= uint2korr(entry); + uchar *next_entry= entry + hdr->entry_size; + + if (nmoffset > hdr->nmpool_size) + return 1; + + name->str= (char *)hdr->nmpool + nmoffset; + if (next_entry == hdr->header + hdr->header_size) + name->length= hdr->nmpool_size - nmoffset; + else + { + size_t next_nmoffset= uint2korr(next_entry); + if (next_nmoffset > hdr->nmpool_size) + return 1; + name->length= next_nmoffset - nmoffset; + } + return 0; +} + + +/** + Find entry in the names format header by the column number + + @param hdr descriptor of dynamic column record + @param key name to find + + @return pointer to the entry or NULL +*/ +static uchar *find_entry_named(DYN_HEADER *hdr, LEX_STRING *key) +{ + uchar *min= hdr->header; + uchar *max= hdr->header + (hdr->column_count - 1) * hdr->entry_size; + uchar *mid; + DBUG_ASSERT(hdr->format == dyncol_fmt_str); + DBUG_ASSERT(hdr->nmpool != NULL); + while (max >= min) + { + LEX_STRING name; + int cmp; + mid= hdr->header + ((min - hdr->header) + + (max - hdr->header)) / + 2 / + hdr->entry_size * hdr->entry_size; + if (read_name(hdr, mid, &name)) + return NULL; + cmp= mariadb_dyncol_column_cmp_named(&name, key); + if (cmp < 0) + min= mid + hdr->entry_size; + else if (cmp > 0) + max= mid - hdr->entry_size; + else + return mid; + } + return NULL; +} + + +/** + Write number in the buffer (backward direction - starts from the buffer end) + + @return pointer on the number beginning +*/ + +static char *backwritenum(char *chr, uint numkey) +{ + if (numkey == 0) + *(--chr)= '0'; + else + while (numkey > 0) + { + *(--chr)= '0' + numkey % 10; + numkey/= 10; + } + return chr; +} + + +/** + Find column and fill information about it + + @param hdr descriptor of dynamic column record + @param numkey Number of the column to fetch (if strkey is NULL) + @param strkey Name of the column to fetch (or NULL) + + @return 0 ok + @return 1 error in data +*/ + +static my_bool +find_column(DYN_HEADER *hdr, uint numkey, LEX_STRING *strkey) +{ + LEX_STRING nmkey; + char nmkeybuff[DYNCOL_NUM_CHAR]; /* to fit max 2 bytes number */ + DBUG_ASSERT(hdr->header != NULL); + + if (hdr->header + hdr->header_size > hdr->data_end) + return TRUE; + + /* fix key */ + if (hdr->format == dyncol_fmt_num && strkey != NULL) + { + char *end; + numkey= (uint) strtoul(strkey->str, &end, 10); + if (end != strkey->str + strkey->length) + { + /* we can't find non-numeric key among numeric ones */ + hdr->type= DYN_COL_NULL; + return 0; + } + } + else if (hdr->format == dyncol_fmt_str && strkey == NULL) + { + nmkey.str= backwritenum(nmkeybuff + sizeof(nmkeybuff), numkey); + nmkey.length= (nmkeybuff + sizeof(nmkeybuff)) - nmkey.str; + strkey= &nmkey; + } + if (hdr->format == dyncol_fmt_num) + hdr->entry= find_entry_num(hdr, numkey); + else + hdr->entry= find_entry_named(hdr, strkey); + + if (!hdr->entry) + { + /* Column not found */ + hdr->type= DYN_COL_NULL; + return 0; + } + hdr->length= hdr_interval_length(hdr, hdr->entry + hdr->entry_size); + hdr->data= hdr->dtpool + hdr->offset; + /* + Check that the found data is within the ranges. This can happen if + we get data with wrong offsets. + */ + if (hdr->length == DYNCOL_OFFSET_ERROR || + hdr->length > INT_MAX || hdr->offset > hdr->data_size) + return 1; + + return 0; +} + + +/** + Read and check the header of the dynamic string + + @param hdr descriptor of dynamic column record + @param str Dynamic string + + @retval FALSE OK + @retval TRUE error + + Note + We don't check for str->length == 0 as all code that calls this + already have handled this case. +*/ + +static inline my_bool read_fixed_header(DYN_HEADER *hdr, + DYNAMIC_COLUMN *str) +{ + DBUG_ASSERT(str != NULL && str->length != 0); + if ((str->length < 1) || + (str->str[0] & (~DYNCOL_FLG_KNOWN))) + return 1; + hdr->format= ((str->str[0] & DYNCOL_FLG_NAMES) ? + dyncol_fmt_str: + dyncol_fmt_num); + if ((str->length < fmt_data[hdr->format].fixed_hdr)) + return 1; /* Wrong header */ + hdr->offset_size= (str->str[0] & DYNCOL_FLG_OFFSET) + 1 + + (hdr->format == dyncol_fmt_str ? 1 : 0); + hdr->column_count= uint2korr(str->str + 1); + if (hdr->format == dyncol_fmt_str) + hdr->nmpool_size= uint2korr(str->str + 3); // only 2 bytes supported for now + else + hdr->nmpool_size= 0; + return 0; +} + + +/** + Get dynamic column value by column number + + @param str The packed string to extract the column + @param column_nr Number of column to fetch + @param store_it_here Where to store the extracted value + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +dynamic_column_get(DYNAMIC_COLUMN *str, uint column_nr, + DYNAMIC_COLUMN_VALUE *store_it_here) +{ + return dynamic_column_get_internal(str, store_it_here, column_nr, NULL); +} + +enum enum_dyncol_func_result +mariadb_dyncol_get_num(DYNAMIC_COLUMN *str, uint column_nr, + DYNAMIC_COLUMN_VALUE *store_it_here) +{ + return dynamic_column_get_internal(str, store_it_here, column_nr, NULL); +} + + +/** + Get dynamic column value by name + + @param str The packed string to extract the column + @param name Name of column to fetch + @param store_it_here Where to store the extracted value + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +mariadb_dyncol_get_named(DYNAMIC_COLUMN *str, LEX_STRING *name, + DYNAMIC_COLUMN_VALUE *store_it_here) +{ + DBUG_ASSERT(name != NULL); + return dynamic_column_get_internal(str, store_it_here, 0, name); +} + + +static enum enum_dyncol_func_result +dynamic_column_get_value(DYN_HEADER *hdr, DYNAMIC_COLUMN_VALUE *store_it_here) +{ + static enum enum_dyncol_func_result rc; + switch ((store_it_here->type= hdr->type)) { + case DYN_COL_INT: + rc= dynamic_column_sint_read(store_it_here, hdr->data, hdr->length); + break; + case DYN_COL_UINT: + rc= dynamic_column_uint_read(store_it_here, hdr->data, hdr->length); + break; + case DYN_COL_DOUBLE: + rc= dynamic_column_double_read(store_it_here, hdr->data, hdr->length); + break; + case DYN_COL_STRING: + rc= dynamic_column_string_read(store_it_here, hdr->data, hdr->length); + break; + case DYN_COL_DECIMAL: + rc= dynamic_column_decimal_read(store_it_here, hdr->data, hdr->length); + break; + case DYN_COL_DATETIME: + rc= dynamic_column_date_time_read(store_it_here, hdr->data, + hdr->length); + break; + case DYN_COL_DATE: + rc= dynamic_column_date_read(store_it_here, hdr->data, hdr->length); + break; + case DYN_COL_TIME: + rc= dynamic_column_time_read(store_it_here, hdr->data, hdr->length); + break; + case DYN_COL_NULL: + rc= ER_DYNCOL_OK; + break; + case DYN_COL_DYNCOL: + rc= dynamic_column_dyncol_read(store_it_here, hdr->data, hdr->length); + break; + default: + rc= ER_DYNCOL_FORMAT; + store_it_here->type= DYN_COL_NULL; + break; + } + return rc; +} + +/** + Get dynamic column value by number or name + + @param str The packed string to extract the column + @param store_it_here Where to store the extracted value + @param numkey Number of the column to fetch (if strkey is NULL) + @param strkey Name of the column to fetch (or NULL) + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_get_internal(DYNAMIC_COLUMN *str, + DYNAMIC_COLUMN_VALUE *store_it_here, + uint num_key, LEX_STRING *str_key) +{ + DYN_HEADER header; + enum enum_dyncol_func_result rc= ER_DYNCOL_FORMAT; + bzero(&header, sizeof(header)); + + if (str->length == 0) + goto null; + + if ((rc= init_read_hdr(&header, str)) < 0) + goto err; + + if (header.column_count == 0) + goto null; + + if (find_column(&header, num_key, str_key)) + goto err; + + rc= dynamic_column_get_value(&header, store_it_here); + return rc; + +null: + rc= ER_DYNCOL_OK; +err: + store_it_here->type= DYN_COL_NULL; + return rc; +} + + +/** + Check existence of the column in the packed string (by number) + + @param str The packed string to check the column + @param column_nr Number of column to check + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +dynamic_column_exists(DYNAMIC_COLUMN *str, uint column_nr) +{ + return dynamic_column_exists_internal(str, column_nr, NULL); +} + +enum enum_dyncol_func_result +mariadb_dyncol_exists_num(DYNAMIC_COLUMN *str, uint column_nr) +{ + return dynamic_column_exists_internal(str, column_nr, NULL); +} + +/** + Check existence of the column in the packed string (by name) + + @param str The packed string to check the column + @param name Name of column to check + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +mariadb_dyncol_exists_named(DYNAMIC_COLUMN *str, LEX_STRING *name) +{ + DBUG_ASSERT(name != NULL); + return dynamic_column_exists_internal(str, 0, name); +} + + +/** + Check existence of the column in the packed string (by name of number) + + @param str The packed string to check the column + @param num_key Number of the column to fetch (if strkey is NULL) + @param str_key Name of the column to fetch (or NULL) + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_exists_internal(DYNAMIC_COLUMN *str, uint num_key, + LEX_STRING *str_key) +{ + DYN_HEADER header; + enum enum_dyncol_func_result rc; + bzero(&header, sizeof(header)); + + if (str->length == 0) + return ER_DYNCOL_NO; /* no columns */ + + if ((rc= init_read_hdr(&header, str)) < 0) + return rc; + + if (header.column_count == 0) + return ER_DYNCOL_NO; /* no columns */ + + if (find_column(&header, num_key, str_key)) + return ER_DYNCOL_FORMAT; + + return (header.type != DYN_COL_NULL ? ER_DYNCOL_YES : ER_DYNCOL_NO); +} + + +/** + List not-null columns in the packed string (only numeric format) + + @param str The packed string + @param array_of_uint Where to put reference on created array + + @return ER_DYNCOL_* return code +*/ +enum enum_dyncol_func_result +dynamic_column_list(DYNAMIC_COLUMN *str, DYNAMIC_ARRAY *array_of_uint) +{ + DYN_HEADER header; + uchar *read; + uint i; + enum enum_dyncol_func_result rc; + + bzero(array_of_uint, sizeof(*array_of_uint)); /* In case of errors */ + if (str->length == 0) + return ER_DYNCOL_OK; /* no columns */ + + if ((rc= init_read_hdr(&header, str)) < 0) + return rc; + + if (header.format != dyncol_fmt_num) + return ER_DYNCOL_FORMAT; + + if (header.entry_size * header.column_count + FIXED_HEADER_SIZE > + str->length) + return ER_DYNCOL_FORMAT; + + if (my_init_dynamic_array(PSI_INSTRUMENT_ME, array_of_uint, + sizeof(uint), header.column_count, 0, MYF(0))) + return ER_DYNCOL_RESOURCE; + + for (i= 0, read= header.header; + i < header.column_count; + i++, read+= header.entry_size) + { + uint nm= uint2korr(read); + /* Insert can't never fail as it's pre-allocated above */ + (void) insert_dynamic(array_of_uint, (uchar *)&nm); + } + return ER_DYNCOL_OK; +} + +/** + List not-null columns in the packed string (only numeric format) + + @param str The packed string + @param array_of_uint Where to put reference on created array + + @return ER_DYNCOL_* return code +*/ +enum enum_dyncol_func_result +mariadb_dyncol_list_num(DYNAMIC_COLUMN *str, uint *count, uint **nums) +{ + DYN_HEADER header; + uchar *read; + uint i; + enum enum_dyncol_func_result rc; + + (*nums)= 0; (*count)= 0; /* In case of errors */ + + if (str->length == 0) + return ER_DYNCOL_OK; /* no columns */ + + if ((rc= init_read_hdr(&header, str)) < 0) + return rc; + + if (header.format != dyncol_fmt_num) + return ER_DYNCOL_FORMAT; + + if (header.entry_size * header.column_count + FIXED_HEADER_SIZE > + str->length) + return ER_DYNCOL_FORMAT; + + if (!((*nums)= my_malloc(PSI_INSTRUMENT_ME, sizeof(uint) * header.column_count, MYF(0)))) + return ER_DYNCOL_RESOURCE; + + for (i= 0, read= header.header; + i < header.column_count; + i++, read+= header.entry_size) + { + (*nums)[i]= uint2korr(read); + } + (*count)= header.column_count; + return ER_DYNCOL_OK; +} + +/** + List not-null columns in the packed string (any format) + + @param str The packed string + @param count Number of names in the list + @param names Where to put names list (should be freed) + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +mariadb_dyncol_list_named(DYNAMIC_COLUMN *str, uint *count, LEX_STRING **names) +{ + DYN_HEADER header; + uchar *read; + char *pool; + struct st_service_funcs *fmt; + uint i; + enum enum_dyncol_func_result rc; + + (*names)= 0; (*count)= 0; + + if (str->length == 0) + return ER_DYNCOL_OK; /* no columns */ + + if ((rc= init_read_hdr(&header, str)) < 0) + return rc; + + fmt= fmt_data + header.format; + + if (header.entry_size * header.column_count + fmt->fixed_hdr > + str->length) + return ER_DYNCOL_FORMAT; + + { + size_t size; + if (header.format == dyncol_fmt_num) + size= DYNCOL_NUM_CHAR * header.column_count; + else + size= header.nmpool_size + header.column_count; + + *names= my_malloc(PSI_INSTRUMENT_ME, + sizeof(LEX_STRING) * header.column_count + size, MYF(0)); + } + + if (!(*names)) + return ER_DYNCOL_RESOURCE; + pool= ((char *)(*names)) + sizeof(LEX_STRING) * header.column_count; + + for (i= 0, read= header.header; + i < header.column_count; + i++, read+= header.entry_size) + { + if (header.format == dyncol_fmt_num) + { + uint nm= uint2korr(read); + (*names)[i].str= pool; + pool+= DYNCOL_NUM_CHAR; + (*names)[i].length= + longlong2str(nm, (*names)[i].str, 10) - (*names)[i].str; + } + else + { + LEX_STRING tmp; + if (read_name(&header, read, &tmp)) + return ER_DYNCOL_FORMAT; + (*names)[i].length= tmp.length; + (*names)[i].str= pool; + pool+= tmp.length + 1; + memcpy((*names)[i].str, (const void *)tmp.str, tmp.length); + (*names)[i].str[tmp.length]= '\0'; // just for safety + } + } + (*count)= header.column_count; + return ER_DYNCOL_OK; +} + +/** + Find the place of the column in the header or place where it should be put + + @param hdr descriptor of dynamic column record + @param key Name or number of column to fetch + (depends on string_key) + @param string_key True if we gave pointer to LEX_STRING. + + @retval TRUE found + @retval FALSE pointer set to the next row +*/ + +static my_bool +find_place(DYN_HEADER *hdr, void *key, my_bool string_keys) +{ + uint mid, start, end, val; + int UNINIT_VAR(flag); + LEX_STRING str; + char buff[DYNCOL_NUM_CHAR]; + my_bool need_conversion= ((string_keys ? dyncol_fmt_str : dyncol_fmt_num) != + hdr->format); + /* new format can't be numeric if the old one is names */ + DBUG_ASSERT(string_keys || + hdr->format == dyncol_fmt_num); + + start= 0; + end= hdr->column_count -1; + mid= 1; + while (start != end) + { + mid= (start + end) / 2; + hdr->entry= hdr->header + mid * hdr->entry_size; + if (!string_keys) + { + val= uint2korr(hdr->entry); + flag= CMP_NUM(*((uint *)key), val); + } + else + { + if (need_conversion) + { + str.str= backwritenum(buff + sizeof(buff), uint2korr(hdr->entry)); + str.length= (buff + sizeof(buff)) - str.str; + } + else + { + DBUG_ASSERT(hdr->format == dyncol_fmt_str); + if (read_name(hdr, hdr->entry, &str)) + return 0; + } + flag= mariadb_dyncol_column_cmp_named((LEX_STRING *)key, &str); + } + if (flag <= 0) + end= mid; + else + start= mid + 1; + } + hdr->entry= hdr->header + start * hdr->entry_size; + if (start != mid) + { + if (!string_keys) + { + val= uint2korr(hdr->entry); + flag= CMP_NUM(*((uint *)key), val); + } + else + { + if (need_conversion) + { + str.str= backwritenum(buff + sizeof(buff), uint2korr(hdr->entry)); + str.length= (buff + sizeof(buff)) - str.str; + } + else + { + DBUG_ASSERT(hdr->format == dyncol_fmt_str); + if (read_name(hdr, hdr->entry, &str)) + return 0; + } + flag= mariadb_dyncol_column_cmp_named((LEX_STRING *)key, &str); + } + } + if (flag > 0) + hdr->entry+= hdr->entry_size; /* Point at next bigger key */ + return flag == 0; +} + + +/* + It is internal structure which describes a plan of changing the record + of dynamic columns +*/ + +typedef enum {PLAN_REPLACE, PLAN_ADD, PLAN_DELETE, PLAN_NOP} PLAN_ACT; + +struct st_plan { + DYNAMIC_COLUMN_VALUE *val; + void *key; + uchar *place; + size_t length; + long long hdelta, ddelta, ndelta; + long long mv_offset, mv_length; + uint mv_end; + PLAN_ACT act; +}; +typedef struct st_plan PLAN; + + +/** + Sort function for plan by column number +*/ + +static int plan_sort_num(const void *a, const void *b) +{ + return *((uint *)((PLAN *)a)->key) - *((uint *)((PLAN *)b)->key); +} + + +/** + Sort function for plan by column name +*/ + +static int plan_sort_named(const void *a, const void *b) +{ + return mariadb_dyncol_column_cmp_named((LEX_STRING *)((PLAN *)a)->key, + (LEX_STRING *)((PLAN *)b)->key); +} + +#define DELTA_CHECK(S, D, C) \ + if ((S) == 0) \ + (S)= (D); \ + else if (((S) > 0 && (D) < 0) || \ + ((S) < 0 && (D) > 0)) \ + { \ + (C)= TRUE; \ + } + +/** + Update dynamic column by copying in a new record (string). + + @param str Dynamic column record to change + @param plan Plan of changing the record + @param add_column_count number of records in the plan array. + @param hdr descriptor of old dynamic column record + @param new_hdr descriptor of new dynamic column record + @param convert need conversion from numeric to names format + + @return ER_DYNCOL_* return code +*/ + +static enum enum_dyncol_func_result +dynamic_column_update_copy(DYNAMIC_COLUMN *str, PLAN *plan, + uint add_column_count, + DYN_HEADER *hdr, DYN_HEADER *new_hdr, + my_bool convert) +{ + DYNAMIC_COLUMN tmp; + struct st_service_funcs *fmt= fmt_data + hdr->format, + *new_fmt= fmt_data + new_hdr->format; + uint i, j, k; + size_t all_headers_size; + + if (dynamic_column_init_named(&tmp, + (new_fmt->fixed_hdr + new_hdr->header_size + + new_hdr->nmpool_size + + new_hdr->data_size + DYNCOL_SYZERESERVE))) + { + return ER_DYNCOL_RESOURCE; + } + bzero(tmp.str, new_fmt->fixed_hdr); + (*new_fmt->set_fixed_hdr)(&tmp, new_hdr); + /* Adjust tmp to contain whole the future header */ + tmp.length= new_fmt->fixed_hdr + new_hdr->header_size + new_hdr->nmpool_size; + + + /* + Copy data to the new string + i= index in array of changes + j= index in packed string header index + */ + new_hdr->entry= new_hdr->header; + new_hdr->name= new_hdr->nmpool; + all_headers_size= new_fmt->fixed_hdr + + new_hdr->header_size + new_hdr->nmpool_size; + for (i= 0, j= 0; i < add_column_count || j < hdr->column_count; i++) + { + size_t UNINIT_VAR(first_offset); + uint start= j, end; + + /* + Search in i and j for the next column to add from i and where to + add. + */ + + while (i < add_column_count && plan[i].act == PLAN_NOP) + i++; /* skip NOP */ + + if (i == add_column_count) + j= end= hdr->column_count; + else + { + /* + old data portion. We don't need to check that j < column_count + as plan[i].place is guaranteed to have a pointer inside the + data. + */ + while (hdr->header + j * hdr->entry_size < plan[i].place) + j++; + end= j; + if ((plan[i].act == PLAN_REPLACE || plan[i].act == PLAN_DELETE)) + j++; /* data at 'j' will be removed */ + } + + /* + Adjust all headers since last loop. + We have to do this as the offset for data has moved + */ + for (k= start; k < end; k++) + { + uchar *read= hdr->header + k * hdr->entry_size; + void *key; + LEX_STRING name; + size_t offs; + uint nm; + DYNAMIC_COLUMN_TYPE tp; + char buff[DYNCOL_NUM_CHAR]; + + if (hdr->format == dyncol_fmt_num) + { + if (convert) + { + name.str= backwritenum(buff + sizeof(buff), uint2korr(read)); + name.length= (buff + sizeof(buff)) - name.str; + key= &name; + } + else + { + nm= uint2korr(read); /* Column nummber */ + key= &nm; + } + } + else + { + if (read_name(hdr, read, &name)) + goto err; + key= &name; + } + if (fmt->type_and_offset_read(&tp, &offs, + read + fmt->fixed_hdr_entry, + hdr->offset_size)) + goto err; + if (k == start) + first_offset= offs; + else if (offs < first_offset) + goto err; + + offs+= (size_t) plan[i].ddelta; + { + DYNAMIC_COLUMN_VALUE val; + val.type= tp; // only the type used in the header + if ((*new_fmt->put_header_entry)(new_hdr, key, &val, offs)) + goto err; + } + } + + /* copy first the data that was not replaced in original packed data */ + if (start < end) + { + size_t data_size; + /* Add old data last in 'tmp' */ + hdr->entry= hdr->header + start * hdr->entry_size; + data_size= + hdr_interval_length(hdr, hdr->header + end * hdr->entry_size); + if (data_size == DYNCOL_OFFSET_ERROR || + (long) data_size < 0 || + data_size > hdr->data_size - first_offset) + goto err; + + memcpy(tmp.str + tmp.length, (char *)hdr->dtpool + first_offset, + data_size); + tmp.length+= data_size; + } + + /* new data adding */ + if (i < add_column_count) + { + if( plan[i].act == PLAN_ADD || plan[i].act == PLAN_REPLACE) + { + if ((*new_fmt->put_header_entry)(new_hdr, plan[i].key, + plan[i].val, + tmp.length - all_headers_size)) + goto err; + data_store(&tmp, plan[i].val, new_hdr->format); /* Append new data */ + } + } + } + mariadb_dyncol_free(str); + *str= tmp; + return ER_DYNCOL_OK; +err: + mariadb_dyncol_free(&tmp); + return ER_DYNCOL_FORMAT; +} + +static enum enum_dyncol_func_result +dynamic_column_update_move_left(DYNAMIC_COLUMN *str, PLAN *plan, + size_t offset_size, + size_t entry_size, + size_t header_size, + size_t new_offset_size, + size_t new_entry_size, + size_t new_header_size, + uint column_count, + uint new_column_count, + uint add_column_count, + uchar *header_end, + size_t max_offset) +{ + uchar *write; + uchar *header_base= (uchar *)str->str + FIXED_HEADER_SIZE; + uint i, j, k; + size_t curr_offset; + + write= (uchar *)str->str + FIXED_HEADER_SIZE; + set_fixed_header(str, (uint)new_offset_size, new_column_count); + if (!new_column_count) + { + // No records left + DBUG_ASSERT(new_header_size == 0); + str->length= FIXED_HEADER_SIZE; + return ER_DYNCOL_OK; + } + + /* + Move headers first. + i= index in array of changes + j= index in packed string header index + */ + for (curr_offset= 0, i= 0, j= 0; + i < add_column_count || j < column_count; + i++) + { + size_t UNINIT_VAR(first_offset); + uint start= j, end; + + /* + Search in i and j for the next column to add from i and where to + add. + */ + + while (i < add_column_count && plan[i].act == PLAN_NOP) + i++; /* skip NOP */ + + if (i == add_column_count) + j= end= column_count; + else + { + /* + old data portion. We don't need to check that j < column_count + as plan[i].place is guaranteed to have a pointer inside the + data. + */ + while (header_base + j * entry_size < plan[i].place) + j++; + end= j; + if ((plan[i].act == PLAN_REPLACE || plan[i].act == PLAN_DELETE)) + j++; /* data at 'j' will be removed */ + } + plan[i].mv_end= end; + + { + DYNAMIC_COLUMN_TYPE tp; + if (type_and_offset_read_num(&tp, &first_offset, + header_base + start * entry_size + + COLUMN_NUMBER_SIZE, offset_size)) + return ER_DYNCOL_FORMAT; + } + /* find data to be moved */ + if (start < end) + { + size_t data_size= + get_length_interval(header_base + start * entry_size, + header_base + end * entry_size, + header_end, offset_size, max_offset); + if (data_size == DYNCOL_OFFSET_ERROR || + (long) data_size < 0 || + data_size > max_offset - first_offset) + { + str->length= 0; // just something valid + return ER_DYNCOL_FORMAT; + } + DBUG_ASSERT(curr_offset == first_offset + plan[i].ddelta); + plan[i].mv_offset= first_offset; + plan[i].mv_length= data_size; + curr_offset+= data_size; + } + else + { + plan[i].mv_length= 0; + plan[i].mv_offset= curr_offset; + } + + if (plan[i].ddelta == 0 && offset_size == new_offset_size && + plan[i].act != PLAN_DELETE) + write+= entry_size * (end - start); + else + { + /* + Adjust all headers since last loop. + We have to do this as the offset for data has moved + */ + for (k= start; k < end; k++) + { + uchar *read= header_base + k * entry_size; + size_t offs; + uint nm; + DYNAMIC_COLUMN_TYPE tp; + + nm= uint2korr(read); /* Column nummber */ + if (type_and_offset_read_num(&tp, &offs, read + COLUMN_NUMBER_SIZE, + offset_size)) + return ER_DYNCOL_FORMAT; + + if (k > start && offs < first_offset) + { + str->length= 0; // just something valid + return ER_DYNCOL_FORMAT; + } + + offs+= (size_t) plan[i].ddelta; + int2store(write, nm); + /* write rest of data at write + COLUMN_NUMBER_SIZE */ + type_and_offset_store_num(write, new_offset_size, tp, offs); + write+= new_entry_size; + } + } + + /* new data adding */ + if (i < add_column_count) + { + if( plan[i].act == PLAN_ADD || plan[i].act == PLAN_REPLACE) + { + int2store(write, *((uint *)plan[i].key)); + type_and_offset_store_num(write, new_offset_size, + plan[i].val[0].type, + curr_offset); + write+= new_entry_size; + curr_offset+= plan[i].length; + } + } + } + + /* + Move data. + i= index in array of changes + j= index in packed string header index + */ + str->length= (FIXED_HEADER_SIZE + new_header_size); + for (i= 0, j= 0; + i < add_column_count || j < column_count; + i++) + { + uint start= j, end; + + /* + Search in i and j for the next column to add from i and where to + add. + */ + + while (i < add_column_count && plan[i].act == PLAN_NOP) + i++; /* skip NOP */ + + j= end= plan[i].mv_end; + if (i != add_column_count && + (plan[i].act == PLAN_REPLACE || plan[i].act == PLAN_DELETE)) + j++; + + /* copy first the data that was not replaced in original packed data */ + if (start < end && plan[i].mv_length) + { + memmove((header_base + new_header_size + + plan[i].mv_offset + plan[i].ddelta), + header_base + header_size + plan[i].mv_offset, + (size_t) plan[i].mv_length); + } + str->length+= (size_t) plan[i].mv_length; + + /* new data adding */ + if (i < add_column_count) + { + if( plan[i].act == PLAN_ADD || plan[i].act == PLAN_REPLACE) + { + data_store(str, plan[i].val, dyncol_fmt_num);/* Append new data */ + } + } + } + return ER_DYNCOL_OK; +} + +#ifdef UNUSED +static enum enum_dyncol_func_result +dynamic_column_update_move_right(DYNAMIC_COLUMN *str, PLAN *plan, + size_t offset_size, + size_t entry_size, + size_t header_size, + size_t new_offset_size, + size_t new_entry_size, + size_t new_header_size, + uint column_count, + uint new_column_count, + uint add_column_count, + uchar *header_end, + size_t max_offset) +{ + uchar *write; + uchar *header_base= (uchar *)str->str + FIXED_HEADER_SIZE; + uint i, j, k; + size_t curr_offset; + + write= (uchar *)str->str + FIXED_HEADER_SIZE; + set_fixed_header(str, new_offset_size, new_column_count); + + /* + Move data first. + i= index in array of changes + j= index in packed string header index + */ + for (curr_offset= 0, i= 0, j= 0; + i < add_column_count || j < column_count; + i++) + { + size_t UNINIT_VAR(first_offset); + uint start= j, end; + + /* + Search in i and j for the next column to add from i and where to + add. + */ + + while (i < add_column_count && plan[i].act == PLAN_NOP) + i++; /* skip NOP */ + + if (i == add_column_count) + j= end= column_count; + else + { + /* + old data portion. We don't need to check that j < column_count + as plan[i].place is guaranteed to have a pointer inside the + data. + */ + while (header_base + j * entry_size < plan[i].place) + j++; + end= j; + if ((plan[i].act == PLAN_REPLACE || plan[i].act == PLAN_DELETE)) + j++; /* data at 'j' will be removed */ + } + plan[i].mv_end= end; + + { + DYNAMIC_COLUMN_TYPE tp; + type_and_offset_read_num(&tp, &first_offset, + header_base + + start * entry_size + COLUMN_NUMBER_SIZE, + offset_size); + } + /* find data to be moved */ + if (start < end) + { + size_t data_size= + get_length_interval(header_base + start * entry_size, + header_base + end * entry_size, + header_end, offset_size, max_offset); + if (data_size == DYNCOL_OFFSET_ERROR || + (long) data_size < 0 || + data_size > max_offset - first_offset) + { + str->length= 0; // just something valid + return ER_DYNCOL_FORMAT; + } + DBUG_ASSERT(curr_offset == first_offset + plan[i].ddelta); + plan[i].mv_offset= first_offset; + plan[i].mv_length= data_size; + curr_offset+= data_size; + } + else + { + plan[i].mv_length= 0; + plan[i].mv_offset= curr_offset; + } + + if (plan[i].ddelta == 0 && offset_size == new_offset_size && + plan[i].act != PLAN_DELETE) + write+= entry_size * (end - start); + else + { + /* + Adjust all headers since last loop. + We have to do this as the offset for data has moved + */ + for (k= start; k < end; k++) + { + uchar *read= header_base + k * entry_size; + size_t offs; + uint nm; + DYNAMIC_COLUMN_TYPE tp; + + nm= uint2korr(read); /* Column nummber */ + type_and_offset_read_num(&tp, &offs, read + COLUMN_NUMBER_SIZE, + offset_size); + if (k > start && offs < first_offset) + { + str->length= 0; // just something valid + return ER_DYNCOL_FORMAT; + } + + offs+= plan[i].ddelta; + int2store(write, nm); + /* write rest of data at write + COLUMN_NUMBER_SIZE */ + if (type_and_offset_store_num(write, new_offset_size, tp, offs)) + { + str->length= 0; // just something valid + return ER_DYNCOL_FORMAT; + } + write+= new_entry_size; + } + } + + /* new data adding */ + if (i < add_column_count) + { + if( plan[i].act == PLAN_ADD || plan[i].act == PLAN_REPLACE) + { + int2store(write, *((uint *)plan[i].key)); + if (type_and_offset_store_num(write, new_offset_size, + plan[i].val[0].type, + curr_offset)) + { + str->length= 0; // just something valid + return ER_DYNCOL_FORMAT; + } + write+= new_entry_size; + curr_offset+= plan[i].length; + } + } + } + + /* + Move headers. + i= index in array of changes + j= index in packed string header index + */ + str->length= (FIXED_HEADER_SIZE + new_header_size); + for (i= 0, j= 0; + i < add_column_count || j < column_count; + i++) + { + uint start= j, end; + + /* + Search in i and j for the next column to add from i and where to + add. + */ + + while (i < add_column_count && plan[i].act == PLAN_NOP) + i++; /* skip NOP */ + + j= end= plan[i].mv_end; + if (i != add_column_count && + (plan[i].act == PLAN_REPLACE || plan[i].act == PLAN_DELETE)) + j++; + + /* copy first the data that was not replaced in original packed data */ + if (start < end && plan[i].mv_length) + { + memmove((header_base + new_header_size + + plan[i].mv_offset + plan[i].ddelta), + header_base + header_size + plan[i].mv_offset, + plan[i].mv_length); + } + str->length+= plan[i].mv_length; + + /* new data adding */ + if (i < add_column_count) + { + if( plan[i].act == PLAN_ADD || plan[i].act == PLAN_REPLACE) + { + data_store(str, plan[i].val, dyncol_fmt_num); /* Append new data */ + } + } + } + return ER_DYNCOL_OK; +} +#endif + +/** + Update the packed string with the given columns + + @param str String where to write the data + @param add_column_count Number of columns in the arrays + @param column_numbers Array of columns numbers + @param values Array of columns values + + @return ER_DYNCOL_* return code +*/ +/* plan allocated on the stack */ +#define IN_PLACE_PLAN 4 + +enum enum_dyncol_func_result +dynamic_column_update_many(DYNAMIC_COLUMN *str, + uint add_column_count, + uint *column_numbers, + DYNAMIC_COLUMN_VALUE *values) +{ + return dynamic_column_update_many_fmt(str, add_column_count, column_numbers, + values, FALSE); +} + +enum enum_dyncol_func_result +mariadb_dyncol_update_many_num(DYNAMIC_COLUMN *str, + uint add_column_count, + uint *column_numbers, + DYNAMIC_COLUMN_VALUE *values) +{ + return dynamic_column_update_many_fmt(str, add_column_count, column_numbers, + values, FALSE); +} + +enum enum_dyncol_func_result +mariadb_dyncol_update_many_named(DYNAMIC_COLUMN *str, + uint add_column_count, + LEX_STRING *column_names, + DYNAMIC_COLUMN_VALUE *values) +{ + return dynamic_column_update_many_fmt(str, add_column_count, column_names, + values, TRUE); +} + +static uint numlen(uint val) +{ + uint res; + if (val == 0) + return 1; + res= 0; + while(val) + { + res++; + val/=10; + } + return res; +} + +static enum enum_dyncol_func_result +dynamic_column_update_many_fmt(DYNAMIC_COLUMN *str, + uint add_column_count, + void *column_keys, + DYNAMIC_COLUMN_VALUE *values, + my_bool string_keys) +{ + PLAN *plan, *alloc_plan= NULL, in_place_plan[IN_PLACE_PLAN]; + uchar *element; + DYN_HEADER header, new_header; + struct st_service_funcs *fmt, *new_fmt; + long long data_delta= 0, name_delta= 0; + uint i; + uint not_null; + long long header_delta= 0; + long long header_delta_sign, data_delta_sign; + int copy= FALSE; + enum enum_dyncol_func_result rc; + my_bool convert; + + if (add_column_count == 0) + return ER_DYNCOL_OK; + + bzero(&header, sizeof(header)); + bzero(&new_header, sizeof(new_header)); + new_header.format= (string_keys ? dyncol_fmt_str : dyncol_fmt_num); + new_fmt= fmt_data + new_header.format; + + /* + Get columns in column order. As the data in 'str' is already + in column order this allows to replace all columns in one loop. + */ + if (IN_PLACE_PLAN > add_column_count) + plan= in_place_plan; + else if (!(alloc_plan= plan= + my_malloc(PSI_INSTRUMENT_ME, + sizeof(PLAN) * (add_column_count + 1), MYF(0)))) + return ER_DYNCOL_RESOURCE; + + not_null= add_column_count; + for (i= 0, element= (uchar *) column_keys; + i < add_column_count; + i++, element+= new_fmt->key_size_in_array) + { + if ((*new_fmt->check_limit)(&element)) + { + rc= ER_DYNCOL_DATA; + goto end; + } + + plan[i].val= values + i; + plan[i].key= element; + if (values[i].type == DYN_COL_NULL) + not_null--; + + } + + if (str->length == 0) + { + /* + Just add new columns. If there was no columns to add we return + an empty string. + */ + goto create_new_string; + } + + /* Check that header is ok */ + if ((rc= init_read_hdr(&header, str)) < 0) + goto end; + fmt= fmt_data + header.format; + /* new format can't be numeric if the old one is names */ + DBUG_ASSERT(new_header.format == dyncol_fmt_str || + header.format == dyncol_fmt_num); + if (header.column_count == 0) + goto create_new_string; + + qsort(plan, (size_t)add_column_count, sizeof(PLAN), new_fmt->plan_sort); + + new_header.column_count= header.column_count; + new_header.nmpool_size= header.nmpool_size; + if ((convert= (new_header.format == dyncol_fmt_str && + header.format == dyncol_fmt_num))) + { + DBUG_ASSERT(new_header.nmpool_size == 0); + for(i= 0, header.entry= header.header; + i < header.column_count; + i++, header.entry+= header.entry_size) + { + new_header.nmpool_size+= numlen(uint2korr(header.entry)); + } + } + + if (fmt->fixed_hdr + header.header_size + header.nmpool_size > str->length) + { + rc= ER_DYNCOL_FORMAT; + goto end; + } + + /* + Calculate how many columns and data is added/deleted and make a 'plan' + for each of them. + */ + for (i= 0; i < add_column_count; i++) + { + /* + For now we don't allow creating two columns with the same number + at the time of create. This can be fixed later to just use the later + by comparing the pointers. + */ + if (i < add_column_count - 1 && + new_fmt->column_sort(&plan[i].key, &plan[i + 1].key) == 0) + { + rc= ER_DYNCOL_DATA; + goto end; + } + + /* Set common variables for all plans */ + plan[i].ddelta= data_delta; + plan[i].ndelta= name_delta; + /* get header delta in entries */ + plan[i].hdelta= header_delta; + plan[i].length= 0; /* Length if NULL */ + + if (find_place(&header, plan[i].key, string_keys)) + { + size_t entry_data_size, entry_name_size= 0; + + /* Data existed; We have to replace or delete it */ + + entry_data_size= hdr_interval_length(&header, header.entry + + header.entry_size); + if (entry_data_size == DYNCOL_OFFSET_ERROR || + (long) entry_data_size < 0) + { + rc= ER_DYNCOL_FORMAT; + goto end; + } + + if (new_header.format == dyncol_fmt_str) + { + if (header.format == dyncol_fmt_str) + { + LEX_STRING name; + if (read_name(&header, header.entry, &name)) + { + rc= ER_DYNCOL_FORMAT; + goto end; + } + entry_name_size= name.length; + } + else + entry_name_size= numlen(uint2korr(header.entry)); + } + + if (plan[i].val->type == DYN_COL_NULL) + { + /* Inserting a NULL means delete the old data */ + + plan[i].act= PLAN_DELETE; /* Remove old value */ + header_delta--; /* One row less in header */ + data_delta-= entry_data_size; /* Less data to store */ + name_delta-= entry_name_size; + } + else + { + /* Replace the value */ + + plan[i].act= PLAN_REPLACE; + /* get data delta in bytes */ + if ((plan[i].length= dynamic_column_value_len(plan[i].val, + new_header.format)) == + (size_t) ~0) + { + rc= ER_DYNCOL_DATA; + goto end; + } + data_delta+= plan[i].length - entry_data_size; + if (new_header.format == dyncol_fmt_str) + { + name_delta+= ((LEX_STRING *)(plan[i].key))->length - entry_name_size; + } + } + } + else + { + /* Data did not exists. Add if it it's not NULL */ + + if (plan[i].val->type == DYN_COL_NULL) + { + plan[i].act= PLAN_NOP; /* Mark entry to be skipped */ + } + else + { + /* Add new value */ + + plan[i].act= PLAN_ADD; + header_delta++; /* One more row in header */ + /* get data delta in bytes */ + if ((plan[i].length= dynamic_column_value_len(plan[i].val, + new_header.format)) == + (size_t) ~0) + { + rc= ER_DYNCOL_DATA; + goto end; + } + data_delta+= plan[i].length; + if (new_header.format == dyncol_fmt_str) + name_delta+= ((LEX_STRING *)plan[i].key)->length; + } + } + plan[i].place= header.entry; + } + plan[add_column_count].hdelta= header_delta; + plan[add_column_count].ddelta= data_delta; + plan[add_column_count].act= PLAN_NOP; + plan[add_column_count].place= header.dtpool; + + new_header.column_count= (uint)(header.column_count + header_delta); + + /* + Check if it is only "increasing" or only "decreasing" plan for (header + and data separately). + */ + new_header.data_size= (size_t) (header.data_size + data_delta); + new_header.nmpool_size= (size_t) (new_header.nmpool_size + name_delta); + DBUG_ASSERT(new_header.format != dyncol_fmt_num || + new_header.nmpool_size == 0); + if ((new_header.offset_size= + new_fmt->dynamic_column_offset_bytes(new_header.data_size)) >= + new_fmt->max_offset_size) + { + rc= ER_DYNCOL_LIMIT; + goto end; + } + + copy= ((header.format != new_header.format) || + (new_header.format == dyncol_fmt_str)); + /* if (new_header.offset_size!=offset_size) then we have to rewrite header */ + header_delta_sign= + ((int)new_header.offset_size + new_fmt->fixed_hdr_entry) - + ((int)header.offset_size + fmt->fixed_hdr_entry); + data_delta_sign= 0; + // plan[add_column_count] contains last deltas. + for (i= 0; i <= add_column_count && !copy; i++) + { + /* This is the check for increasing/decreasing */ + DELTA_CHECK(header_delta_sign, plan[i].hdelta, copy); + DELTA_CHECK(data_delta_sign, plan[i].ddelta, copy); + } + calc_param(&new_header.entry_size, &new_header.header_size, + new_fmt->fixed_hdr_entry, + new_header.offset_size, new_header.column_count); + + /* + Need copy because: + 1, Header/data parts moved in different directions. + 2. There is no enough allocated space in the string. + 3. Header and data moved in different directions. + */ + if (copy || /*1.*/ + str->max_length < str->length + header_delta + data_delta || /*2.*/ + ((header_delta_sign < 0 && data_delta_sign > 0) || + (header_delta_sign > 0 && data_delta_sign < 0))) /*3.*/ + rc= dynamic_column_update_copy(str, plan, add_column_count, + &header, &new_header, + convert); + else + if (header_delta_sign < 0) + rc= dynamic_column_update_move_left(str, plan, header.offset_size, + header.entry_size, + header.header_size, + new_header.offset_size, + new_header.entry_size, + new_header.header_size, + header.column_count, + new_header.column_count, + add_column_count, header.dtpool, + header.data_size); + else + /* + rc= dynamic_column_update_move_right(str, plan, offset_size, + entry_size, header_size, + new_header.offset_size, + new_header.entry_size, + new_heder.header_size, column_count, + new_header.column_count, + add_column_count, header_end, + header.data_size); + */ + rc= dynamic_column_update_copy(str, plan, add_column_count, + &header, &new_header, + convert); +end: + my_free(alloc_plan); + return rc; + +create_new_string: + /* There is no columns from before, so let's just add the new ones */ + rc= ER_DYNCOL_OK; + if (not_null != 0) + rc= dynamic_column_create_many_internal_fmt(str, add_column_count, + (uint*)column_keys, values, + str->str == NULL, + string_keys); + goto end; +} + + +/** + Update the packed string with the given column + + @param str String where to write the data + @param column_number Array of columns number + @param values Array of columns values + + @return ER_DYNCOL_* return code +*/ + + +enum enum_dyncol_func_result +dynamic_column_update(DYNAMIC_COLUMN *str, uint column_nr, + DYNAMIC_COLUMN_VALUE *value) +{ + return dynamic_column_update_many(str, 1, &column_nr, value); +} + + +enum enum_dyncol_func_result +mariadb_dyncol_check(DYNAMIC_COLUMN *str) +{ + struct st_service_funcs *fmt; + enum enum_dyncol_func_result rc= ER_DYNCOL_FORMAT; + DYN_HEADER header; + uint i; + size_t data_offset= 0, name_offset= 0; + size_t prev_data_offset= 0, prev_name_offset= 0; + LEX_STRING name= {0,0}, prev_name= {0,0}; + uint num= 0, prev_num= 0; + void *key, *prev_key; + enum enum_dynamic_column_type type= DYN_COL_NULL, prev_type= DYN_COL_NULL; + + DBUG_ENTER("dynamic_column_check"); + + if (str->length == 0) + { + DBUG_PRINT("info", ("empty string is OK")); + DBUG_RETURN(ER_DYNCOL_OK); + } + + bzero(&header, sizeof(header)); + + /* Check that header is OK */ + if (read_fixed_header(&header, str)) + { + DBUG_PRINT("info", ("Reading fixed string header failed")); + goto end; + } + fmt= fmt_data + header.format; + calc_param(&header.entry_size, &header.header_size, + fmt->fixed_hdr_entry, header.offset_size, + header.column_count); + /* headers are out of string length (no space for data and part of headers) */ + if (fmt->fixed_hdr + header.header_size + header.nmpool_size > str->length) + { + DBUG_PRINT("info", ("Fixed header: %u Header size: %u " + "Name pool size: %u but Strig length: %u", + (uint)fmt->fixed_hdr, + (uint)header.header_size, + (uint)header.nmpool_size, + (uint)str->length)); + goto end; + } + header.header= (uchar*)str->str + fmt->fixed_hdr; + header.nmpool= header.header + header.header_size; + header.dtpool= header.nmpool + header.nmpool_size; + header.data_size= str->length - fmt->fixed_hdr - + header.header_size - header.nmpool_size; + + /* read and check headers */ + if (header.format == dyncol_fmt_num) + { + key= # + prev_key= &prev_num; + } + else + { + key= &name; + prev_key= &prev_name; + } + for (i= 0, header.entry= header.header; + i < header.column_count; + i++, header.entry+= header.entry_size) + { + + if (header.format == dyncol_fmt_num) + { + num= uint2korr(header.entry); + } + else + { + DBUG_ASSERT(header.format == dyncol_fmt_str); + if (read_name(&header, header.entry, &name)) + { + DBUG_PRINT("info", ("Reading name failed: Field order: %u" + " Name offset: %u" + " Name pool size: %u", + (uint) i, + uint2korr(header.entry), + (uint)header.nmpool_size)); + goto end; + } + name_offset= name.str - (char *)header.nmpool; + } + if ((*fmt->type_and_offset_read)(&type, &data_offset, + header.entry + fmt->fixed_hdr_entry, + header.offset_size)) + goto end; + + DBUG_ASSERT(type != DYN_COL_NULL); + if (data_offset > header.data_size) + { + DBUG_PRINT("info", ("Field order: %u Data offset: %u" + " > Data pool size: %u", + (uint)i, + (uint)data_offset, + (uint)header.data_size)); + goto end; + } + if (prev_type != DYN_COL_NULL) + { + /* It is not first entry */ + if (prev_data_offset > data_offset || + ((prev_type != DYN_COL_INT && + prev_type != DYN_COL_UINT && + prev_type != DYN_COL_DECIMAL) && prev_data_offset == data_offset)) + { + DBUG_PRINT("info", ("Field order: %u Previous data offset: %u" + " >(=) Current data offset: %u", + (uint)i, + (uint)prev_data_offset, + (uint)data_offset)); + goto end; + } + if (prev_name_offset > name_offset) + { + DBUG_PRINT("info", ("Field order: %u Previous name offset: %u" + " > Current name offset: %u", + (uint)i, + (uint)prev_data_offset, + (uint)data_offset)); + goto end; + } + if ((*fmt->column_sort)(&prev_key, &key) >= 0) + { + DBUG_PRINT("info", ("Field order: %u Previous key >= Current key", + (uint)i)); + goto end; + } + } + prev_num= num; + prev_name= name; + prev_data_offset= data_offset; + prev_name_offset= name_offset; + prev_type= type; + } + + /* check data, which we can */ + for (i= 0, header.entry= header.header; + i < header.column_count; + i++, header.entry+= header.entry_size) + { + DYNAMIC_COLUMN_VALUE store; + // already checked by previouse pass + (*fmt->type_and_offset_read)(&header.type, &header.offset, + header.entry + fmt->fixed_hdr_entry, + header.offset_size); + header.length= + hdr_interval_length(&header, header.entry + header.entry_size); + header.data= header.dtpool + header.offset; + switch ((header.type)) { + case DYN_COL_INT: + rc= dynamic_column_sint_read(&store, header.data, header.length); + break; + case DYN_COL_UINT: + rc= dynamic_column_uint_read(&store, header.data, header.length); + break; + case DYN_COL_DOUBLE: + rc= dynamic_column_double_read(&store, header.data, header.length); + break; + case DYN_COL_STRING: + rc= dynamic_column_string_read(&store, header.data, header.length); + break; + case DYN_COL_DECIMAL: + rc= dynamic_column_decimal_read(&store, header.data, header.length); + break; + case DYN_COL_DATETIME: + rc= dynamic_column_date_time_read(&store, header.data, + header.length); + break; + case DYN_COL_DATE: + rc= dynamic_column_date_read(&store, header.data, header.length); + break; + case DYN_COL_TIME: + rc= dynamic_column_time_read(&store, header.data, header.length); + break; + case DYN_COL_DYNCOL: + rc= dynamic_column_dyncol_read(&store, header.data, header.length); + break; + case DYN_COL_NULL: + default: + rc= ER_DYNCOL_FORMAT; + goto end; + } + if (rc != ER_DYNCOL_OK) + { + DBUG_ASSERT(rc < 0); + DBUG_PRINT("info", ("Field order: %u Can't read data: %i", + (uint)i, (int) rc)); + goto end; + } + } + + rc= ER_DYNCOL_OK; +end: + DBUG_RETURN(rc); +} + +static +my_bool dynstr_append_json_quoted(DYNAMIC_STRING *str, + const char *append, size_t len) +{ + size_t additional= ((str->alloc_increment && str->alloc_increment > 6) ? + str->alloc_increment : + 10); + size_t lim= additional; + size_t i; + if (dynstr_realloc(str, len + additional + 2)) + return TRUE; + str->str[str->length++]= '"'; + for (i= 0; i < len; i++) + { + register char c= append[i]; + if (unlikely(((uchar)c) <= 0x1F)) + { + if (lim < 5) + { + if (dynstr_realloc(str, additional)) + return TRUE; + lim+= additional; + } + lim-= 5; + str->str[str->length++]= '\\'; + str->str[str->length++]= 'u'; + str->str[str->length++]= '0'; + str->str[str->length++]= '0'; + str->str[str->length++]= (c < 0x10 ? '0' : '1'); + c%= 0x10; + str->str[str->length++]= (c < 0xA ? '0' + c : 'A' + (c - 0xA)); + } + else + { + if (c == '"' || c == '\\') + { + if (!lim) + { + if (dynstr_realloc(str, additional)) + return TRUE; + lim= additional; + } + lim--; + str->str[str->length++]= '\\'; + } + str->str[str->length++]= c; + } + } + str->str[str->length++]= '"'; + return FALSE; +} + + +enum enum_dyncol_func_result +mariadb_dyncol_val_str(DYNAMIC_STRING *str, DYNAMIC_COLUMN_VALUE *val, + CHARSET_INFO *cs, char quote) +{ + char buff[40]; + size_t len; + switch (val->type) { + case DYN_COL_INT: + len= snprintf(buff, sizeof(buff), "%lld", val->x.long_value); + if (dynstr_append_mem(str, buff, len)) + return ER_DYNCOL_RESOURCE; + break; + case DYN_COL_UINT: + len= snprintf(buff, sizeof(buff), "%llu", val->x.ulong_value); + if (dynstr_append_mem(str, buff, len)) + return ER_DYNCOL_RESOURCE; + break; + case DYN_COL_DOUBLE: + + len= my_gcvt(val->x.double_value, MY_GCVT_ARG_DOUBLE, + sizeof(buff) - 1, buff, NULL); + if (dynstr_realloc(str, len + (quote ? 2 : 0))) + return ER_DYNCOL_RESOURCE; + dynstr_append_mem(str, buff, len); + break; + case DYN_COL_DYNCOL: + case DYN_COL_STRING: + { + char *alloc= NULL; + char *from= val->x.string.value.str; + ulong bufflen; + my_bool conv= !my_charset_same(val->x.string.charset, cs); + my_bool rc; + len= val->x.string.value.length; + bufflen= (ulong)(len * (conv ? cs->mbmaxlen : 1)); + if (dynstr_realloc(str, bufflen)) + return ER_DYNCOL_RESOURCE; + + // guaranty UTF-8 string for value + if (!my_charset_same(val->x.string.charset, cs)) + { + uint dummy_errors; + if (!quote) + { + /* convert to the destination */ + str->length+= my_convert(str->str, bufflen, + cs, + from, (uint32)len, + val->x.string.charset, + &dummy_errors); + return ER_DYNCOL_OK; + } + if ((alloc= (char *)my_malloc(PSI_INSTRUMENT_ME, bufflen, MYF(0)))) + { + len= my_convert(alloc, bufflen, cs, from, (uint32)len, + val->x.string.charset, &dummy_errors); + from= alloc; + } + else + return ER_DYNCOL_RESOURCE; + } + if (quote) + if (quote == DYNCOL_JSON_ESC) + rc= dynstr_append_json_quoted(str, from, len); + else + rc= dynstr_append_quoted(str, from, len, quote); + else + rc= dynstr_append_mem(str, from, len); + if (alloc) + my_free(alloc); + if (rc) + return ER_DYNCOL_RESOURCE; + break; + } + case DYN_COL_DECIMAL: + { + int tmp_len= sizeof(buff); + decimal2string(&val->x.decimal.value, buff, &tmp_len, + 0, val->x.decimal.value.frac, + '0'); + if (dynstr_append_mem(str, buff, tmp_len)) + return ER_DYNCOL_RESOURCE; + break; + } + case DYN_COL_DATETIME: + case DYN_COL_DATE: + case DYN_COL_TIME: + len= my_TIME_to_str(&val->x.time_value, buff, AUTO_SEC_PART_DIGITS); + if (dynstr_realloc(str, len + (quote ? 2 : 0))) + return ER_DYNCOL_RESOURCE; + if (quote) + str->str[str->length++]= '"'; + dynstr_append_mem(str, buff, len); + if (quote) + str->str[str->length++]= '"'; + break; + case DYN_COL_NULL: + if (dynstr_append_mem(str, "null", 4)) + return ER_DYNCOL_RESOURCE; + break; + default: + return(ER_DYNCOL_FORMAT); + } + return(ER_DYNCOL_OK); +} + + +enum enum_dyncol_func_result +mariadb_dyncol_val_long(longlong *ll, DYNAMIC_COLUMN_VALUE *val) +{ + enum enum_dyncol_func_result rc= ER_DYNCOL_OK; + *ll= 0; + switch (val->type) { + case DYN_COL_INT: + *ll= val->x.long_value; + break; + case DYN_COL_UINT: + *ll= (longlong)val->x.ulong_value; + if (val->x.ulong_value > ULONGLONG_MAX) + rc= ER_DYNCOL_TRUNCATED; + break; + case DYN_COL_DOUBLE: + *ll= (longlong)val->x.double_value; + if (((double) *ll) != val->x.double_value) + rc= ER_DYNCOL_TRUNCATED; + break; + case DYN_COL_STRING: + { + char *src= val->x.string.value.str; + size_t len= val->x.string.value.length; + longlong i= 0, sign= 1; + + while (len && my_isspace(&my_charset_latin1, *src)) src++,len--; + + if (len) + { + if (*src == '-') + { + sign= -1; + src++; + } else if (*src == '+') + src++; + while(len && my_isdigit(&my_charset_latin1, *src)) + { + i= i * 10 + (*src - '0'); + src++; + } + } + else + rc= ER_DYNCOL_TRUNCATED; + if (len) + rc= ER_DYNCOL_TRUNCATED; + *ll= i * sign; + break; + } + case DYN_COL_DECIMAL: + if (decimal2longlong(&val->x.decimal.value, ll) != E_DEC_OK) + rc= ER_DYNCOL_TRUNCATED; + break; + case DYN_COL_DATETIME: + *ll= (val->x.time_value.year * 10000000000ull + + val->x.time_value.month * 100000000L + + val->x.time_value.day * 1000000 + + val->x.time_value.hour * 10000 + + val->x.time_value.minute * 100 + + val->x.time_value.second) * + (val->x.time_value.neg ? -1 : 1); + break; + case DYN_COL_DATE: + *ll= (val->x.time_value.year * 10000 + + val->x.time_value.month * 100 + + val->x.time_value.day) * + (val->x.time_value.neg ? -1 : 1); + break; + case DYN_COL_TIME: + *ll= (val->x.time_value.hour * 10000 + + val->x.time_value.minute * 100 + + val->x.time_value.second) * + (val->x.time_value.neg ? -1 : 1); + break; + case DYN_COL_DYNCOL: + case DYN_COL_NULL: + rc= ER_DYNCOL_TRUNCATED; + break; + default: + return(ER_DYNCOL_FORMAT); + } + return(rc); +} + + +enum enum_dyncol_func_result +mariadb_dyncol_val_double(double *dbl, DYNAMIC_COLUMN_VALUE *val) +{ + enum enum_dyncol_func_result rc= ER_DYNCOL_OK; + *dbl= 0; + switch (val->type) { + case DYN_COL_INT: + *dbl= (double)val->x.long_value; + if (((longlong) *dbl) != val->x.long_value) + rc= ER_DYNCOL_TRUNCATED; + break; + case DYN_COL_UINT: + *dbl= (double)val->x.ulong_value; + if (((ulonglong) *dbl) != val->x.ulong_value) + rc= ER_DYNCOL_TRUNCATED; + break; + case DYN_COL_DOUBLE: + *dbl= val->x.double_value; + break; + case DYN_COL_STRING: + { + char *str, *end; + if (!(str= malloc(val->x.string.value.length + 1))) + return ER_DYNCOL_RESOURCE; + memcpy(str, val->x.string.value.str, val->x.string.value.length); + str[val->x.string.value.length]= '\0'; + *dbl= strtod(str, &end); + if (*end != '\0') + rc= ER_DYNCOL_TRUNCATED; + free(str); + break; + } + case DYN_COL_DECIMAL: + if (decimal2double(&val->x.decimal.value, dbl) != E_DEC_OK) + rc= ER_DYNCOL_TRUNCATED; + break; + case DYN_COL_DATETIME: + *dbl= (double)(val->x.time_value.year * 10000000000ull + + val->x.time_value.month * 100000000L + + val->x.time_value.day * 1000000 + + val->x.time_value.hour * 10000 + + val->x.time_value.minute * 100 + + val->x.time_value.second) * + (val->x.time_value.neg ? -1 : 1); + break; + case DYN_COL_DATE: + *dbl= (double)(val->x.time_value.year * 10000 + + val->x.time_value.month * 100 + + val->x.time_value.day) * + (val->x.time_value.neg ? -1 : 1); + break; + case DYN_COL_TIME: + *dbl= (double)(val->x.time_value.hour * 10000 + + val->x.time_value.minute * 100 + + val->x.time_value.second) * + (val->x.time_value.neg ? -1 : 1); + break; + case DYN_COL_DYNCOL: + case DYN_COL_NULL: + rc= ER_DYNCOL_TRUNCATED; + break; + default: + return(ER_DYNCOL_FORMAT); + } + return(rc); +} + + +/** + Convert to JSON + + @param str The packed string + @param json Where to put json result + + @return ER_DYNCOL_* return code +*/ + +#define JSON_STACK_PROTECTION 10 + +static enum enum_dyncol_func_result +mariadb_dyncol_json_internal(DYNAMIC_COLUMN *str, DYNAMIC_STRING *json, + uint lvl) +{ + DYN_HEADER header; + uint i; + enum enum_dyncol_func_result rc; + + if (lvl >= JSON_STACK_PROTECTION) + { + rc= ER_DYNCOL_RESOURCE; + goto err; + } + + + if (str->length == 0) + return ER_DYNCOL_OK; /* no columns */ + + if ((rc= init_read_hdr(&header, str)) < 0) + goto err; + + if (header.entry_size * header.column_count + FIXED_HEADER_SIZE > + str->length) + { + rc= ER_DYNCOL_FORMAT; + goto err; + } + + rc= ER_DYNCOL_RESOURCE; + + if (dynstr_append_mem(json, "{", 1)) + goto err; + for (i= 0, header.entry= header.header; + i < header.column_count; + i++, header.entry+= header.entry_size) + { + DYNAMIC_COLUMN_VALUE val; + if (i != 0 && dynstr_append_mem(json, ",", 1)) + goto err; + header.length= + hdr_interval_length(&header, header.entry + header.entry_size); + header.data= header.dtpool + header.offset; + /* + Check that the found data is within the ranges. This can happen if + we get data with wrong offsets. + */ + if (header.length == DYNCOL_OFFSET_ERROR || + header.length > INT_MAX || header.offset > header.data_size) + { + rc= ER_DYNCOL_FORMAT; + goto err; + } + if ((rc= dynamic_column_get_value(&header, &val)) < 0) + goto err; + if (header.format == dyncol_fmt_num) + { + uint nm= uint2korr(header.entry); + if (dynstr_realloc(json, DYNCOL_NUM_CHAR + 3)) + goto err; + json->str[json->length++]= '"'; + json->length+= (snprintf(json->str + json->length, + DYNCOL_NUM_CHAR, "%u", nm)); + } + else + { + LEX_STRING name; + if (read_name(&header, header.entry, &name)) + { + rc= ER_DYNCOL_FORMAT; + goto err; + } + if (dynstr_realloc(json, name.length + 3)) + goto err; + json->str[json->length++]= '"'; + memcpy(json->str + json->length, name.str, name.length); + json->length+= name.length; + } + json->str[json->length++]= '"'; + json->str[json->length++]= ':'; + if (val.type == DYN_COL_DYNCOL) + { + /* here we use it only for read so can cheat a bit */ + DYNAMIC_COLUMN dc; + bzero(&dc, sizeof(dc)); + dc.str= val.x.string.value.str; + dc.length= val.x.string.value.length; + if (mariadb_dyncol_json_internal(&dc, json, lvl + 1) < 0) + { + dc.str= NULL; dc.length= 0; + goto err; + } + dc.str= NULL; dc.length= 0; + } + else + { + if ((rc= mariadb_dyncol_val_str(json, &val, DYNCOL_UTF, DYNCOL_JSON_ESC)) + < 0) + goto err; + } + } + if (dynstr_append_mem(json, "}", 1)) + { + rc= ER_DYNCOL_RESOURCE; + goto err; + } + return ER_DYNCOL_OK; + +err: + json->length= 0; + return rc; +} + +enum enum_dyncol_func_result +mariadb_dyncol_json(DYNAMIC_COLUMN *str, DYNAMIC_STRING *json) +{ + + if (init_dynamic_string(json, NULL, str->length * 2, 100)) + return ER_DYNCOL_RESOURCE; + + return mariadb_dyncol_json_internal(str, json, 1); +} + + +/** + Convert to DYNAMIC_COLUMN_VALUE values and names (LEX_STING) dynamic array + + @param str The packed string + @param count number of elements in the arrays + @param names Where to put names (should be free by user) + @param vals Where to put values (should be free by user) + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +mariadb_dyncol_unpack(DYNAMIC_COLUMN *str, + uint *count, + LEX_STRING **names, DYNAMIC_COLUMN_VALUE **vals) +{ + DYN_HEADER header; + char *nm; + uint i; + enum enum_dyncol_func_result rc; + + *count= 0; *names= 0; *vals= 0; + + if (str->length == 0) + return ER_DYNCOL_OK; /* no columns */ + + if ((rc= init_read_hdr(&header, str)) < 0) + return rc; + + + if (header.entry_size * header.column_count + FIXED_HEADER_SIZE > + str->length) + return ER_DYNCOL_FORMAT; + + *vals= my_malloc(PSI_INSTRUMENT_ME, + sizeof(DYNAMIC_COLUMN_VALUE)* header.column_count, MYF(0)); + if (header.format == dyncol_fmt_num) + { + *names= my_malloc(PSI_INSTRUMENT_ME, + sizeof(LEX_STRING) * header.column_count + + DYNCOL_NUM_CHAR * header.column_count, MYF(0)); + nm= (char *)((*names) + header.column_count); + } + else + { + *names= my_malloc(PSI_INSTRUMENT_ME, + sizeof(LEX_STRING) * header.column_count, MYF(0)); + nm= 0; + } + if (!(*vals) || !(*names)) + { + rc= ER_DYNCOL_RESOURCE; + goto err; + } + + for (i= 0, header.entry= header.header; + i < header.column_count; + i++, header.entry+= header.entry_size) + { + header.length= + hdr_interval_length(&header, header.entry + header.entry_size); + header.data= header.dtpool + header.offset; + /* + Check that the found data is within the ranges. This can happen if + we get data with wrong offsets. + */ + if (header.length == DYNCOL_OFFSET_ERROR || + header.length > INT_MAX || header.offset > header.data_size) + { + rc= ER_DYNCOL_FORMAT; + goto err; + } + if ((rc= dynamic_column_get_value(&header, (*vals) + i)) < 0) + goto err; + + if (header.format == dyncol_fmt_num) + { + uint num= uint2korr(header.entry); + (*names)[i].str= nm; + (*names)[i].length= snprintf(nm, DYNCOL_NUM_CHAR, "%u", num); + nm+= (*names)[i].length + 1; + } + else + { + if (read_name(&header, header.entry, (*names) + i)) + { + rc= ER_DYNCOL_FORMAT; + goto err; + } + } + } + + *count= header.column_count; + return ER_DYNCOL_OK; + +err: + if (*vals) + { + my_free(*vals); + *vals= 0; + } + if (*names) + { + my_free(*names); + *names= 0; + } + return rc; +} + +/** + Free arrays allocated by mariadb_dyncol_unpack() + + @param names Where to put names (should be free by user) + @param vals Where to put values (should be free by user) +*/ +void mariadb_dyncol_unpack_free(LEX_STRING *names, DYNAMIC_COLUMN_VALUE *vals) +{ + my_free(names); + my_free(vals); +} + +/** + Get not NULL column count + + @param str The packed string + @param column_count Where to put column count + + @return ER_DYNCOL_* return code +*/ + +enum enum_dyncol_func_result +mariadb_dyncol_column_count(DYNAMIC_COLUMN *str, uint *column_count) +{ + DYN_HEADER header; + enum enum_dyncol_func_result rc; + + *(column_count)= 0; + if (str->length == 0) + return ER_DYNCOL_OK; + + if ((rc= init_read_hdr(&header, str)) < 0) + return rc; + *column_count= header.column_count; + return rc; +} +/** + Free dynamic column + + @param str The packed string +*/ +void mariadb_dyncol_free(DYNAMIC_COLUMN *str) +{ + dynstr_free(str); +} diff --git a/mysys/mf_arr_appstr.c b/mysys/mf_arr_appstr.c new file mode 100644 index 00000000..7c105bcf --- /dev/null +++ b/mysys/mf_arr_appstr.c @@ -0,0 +1,62 @@ +/* Copyright (C) 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include /* strcmp() */ + + +/** + Append str to array, or move to the end if it already exists + + @param str String to be appended + @param array The array, terminated by a NULL element, all unused elements + pre-initialized to NULL + @param size Size of the array; array must be terminated by a NULL + pointer, so can hold size - 1 elements + + @retval FALSE Success + @retval TRUE Failure, array is full +*/ + +my_bool array_append_string_unique(const char *str, + const char **array, size_t size) +{ + const char **p; + /* end points at the terminating NULL element */ + const char **end= array + size - 1; + DBUG_ASSERT(*end == NULL); + + for (p= array; *p; ++p) + { + if (strcmp(*p, str) == 0) + break; + } + if (p >= end) + return TRUE; /* Array is full */ + + DBUG_ASSERT(*p == NULL || strcmp(*p, str) == 0); + + while (*(p + 1)) + { + *p= *(p + 1); + ++p; + } + + DBUG_ASSERT(p < end); + *p= str; + + return FALSE; /* Success */ +} diff --git a/mysys/mf_cache.c b/mysys/mf_cache.c new file mode 100644 index 00000000..2fec59f4 --- /dev/null +++ b/mysys/mf_cache.c @@ -0,0 +1,92 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Open a temporary file and cache it with io_cache. Delete it on close */ + +#include "mysys_priv.h" +#include +#include "my_static.h" +#include "mysys_err.h" + +/** + Open tempfile cached by IO_CACHE + + Should be used when no seeks are done (only reinit_io_buff) + Return 0 if cache is inited ok + The actual file is created when the IO_CACHE buffer gets filled + If dir is not given, use TMPDIR. +*/ +my_bool open_cached_file(IO_CACHE *cache, const char* dir, const char *prefix, + size_t cache_size, myf cache_myflags) +{ + DBUG_ENTER("open_cached_file"); + cache->dir= dir; + if (prefix) + { + DBUG_ASSERT(strlen(prefix) == 2); + memcpy(cache->prefix, prefix, 3); + } + else + cache->prefix[0]= 0; + cache->file_name=0; + cache->buffer=0; /* Mark that not open */ + if (!init_io_cache(cache, -1, cache_size, WRITE_CACHE, 0L, 0, + MYF(cache_myflags | MY_NABP))) + { + DBUG_RETURN(0); + } + DBUG_RETURN(1); +} + +/** + Create the temporary file +*/ +my_bool real_open_cached_file(IO_CACHE *cache) +{ + char name_buff[FN_REFLEN]; + int error=1; + DBUG_ENTER("real_open_cached_file"); + if ((cache->file= create_temp_file(name_buff, cache->dir, + cache->prefix[0] ? cache->prefix : 0, + O_BINARY, MYF(MY_WME | MY_TEMPORARY))) >= 0) + { + error=0; + } + DBUG_RETURN(error); +} + + +void close_cached_file(IO_CACHE *cache) +{ + DBUG_ENTER("close_cached_file"); + if (my_b_inited(cache)) + { + File file=cache->file; + cache->file= -1; /* Don't flush data */ + (void) end_io_cache(cache); + if (file >= 0) + { + (void) my_close(file,MYF(0)); +#ifdef CANT_DELETE_OPEN_FILES + if (cache->file_name) + { + (void) my_delete(cache->file_name, MYF(MY_WME)); + my_free(cache->file_name); + } +#endif + } + } + DBUG_VOID_RETURN; +} diff --git a/mysys/mf_dirname.c b/mysys/mf_dirname.c new file mode 100644 index 00000000..12fd15f8 --- /dev/null +++ b/mysys/mf_dirname.c @@ -0,0 +1,155 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + + /* Functions definied in this file */ + +size_t dirname_length(const char *name) +{ + register char *pos, *gpos; +#ifdef BASKSLASH_MBTAIL + CHARSET_INFO *fs= fs_character_set(); +#endif +#ifdef FN_DEVCHAR + if ((pos=(char*)strrchr(name,FN_DEVCHAR)) == 0) +#endif + pos=(char*) name-1; + + gpos= pos++; + for ( ; *pos ; pos++) /* Find last FN_LIBCHAR */ + { +#ifdef BASKSLASH_MBTAIL + uint l; + if (my_ci_use_mb(fs) && (l= my_ismbchar(fs, pos, pos + 3))) + { + pos+= l - 1; + continue; + } +#endif + if (*pos == FN_LIBCHAR || *pos == '/') + gpos=pos; + } + return (size_t) (gpos+1-(char*) name); +} + + +/* + Gives directory part of filename. Directory ends with '/' + + SYNOPSIS + dirname_part() + to Store directory name here + name Original name + to_length Store length of 'to' here + + RETURN + # Length of directory part in 'name' +*/ + +size_t dirname_part(char *to, const char *name, size_t *to_res_length) +{ + size_t length; + DBUG_ENTER("dirname_part"); + DBUG_PRINT("enter",("'%s'",name)); + + length=dirname_length(name); + *to_res_length= (size_t) (convert_dirname(to, name, name+length) - to); + DBUG_RETURN(length); +} /* dirname */ + + +/* + Convert directory name to use under this system + + SYNPOSIS + convert_dirname() + to Store result here. Must be at least of size + MY_MIN(FN_REFLEN, strlen(from) + 1) to make room + for adding FN_LIBCHAR at the end. + from Original filename. May be == to + from_end Pointer at end of filename (normally end \0) + + IMPLEMENTATION + If Windows converts '/' to '\' + Adds a FN_LIBCHAR to end if the result string if there isn't one + and the last isn't dev_char. + Copies data from 'from' until ASCII(0) for until from == from_end + If you want to use the whole 'from' string, just send NullS as the + last argument. + + If the result string is larger than FN_REFLEN -1, then it's cut. + + RETURN + Returns pointer to end \0 in to +*/ + +#ifndef FN_DEVCHAR +#define FN_DEVCHAR '\0' /* For easier code */ +#endif + +char *convert_dirname(char *to, const char *from, const char *from_end) +{ + char *to_org=to; +#ifdef BACKSLASH_MBTAIL + CHARSET_INFO *fs= fs_character_set(); +#endif + DBUG_ENTER("convert_dirname"); + + /* We use -2 here, becasue we need place for the last FN_LIBCHAR */ + if (!from_end || (from_end - from) > FN_REFLEN-2) + from_end=from+FN_REFLEN -2; + +#if FN_LIBCHAR != '/' + { + for (; from != from_end && *from ; from++) + { + if (*from == '/') + *to++= FN_LIBCHAR; + else + { +#ifdef BACKSLASH_MBTAIL + uint l; + if (my_ci_use_mb(fs) && (l= my_ismbchar(fs, from, from + 3))) + { + memmove(to, from, l); + to+= l; + from+= l - 1; + to_org= to; /* Don't look inside mbchar */ + } + else +#endif + { + *to++= *from; + } + } + } + *to=0; + } +#else + /* This is ok even if to == from, becasue we need to cut the string */ + to= strmake(to, from, (size_t) (from_end-from)); +#endif + + /* Add FN_LIBCHAR to the end of directory path */ + if (to != to_org && (to[-1] != FN_LIBCHAR && to[-1] != FN_DEVCHAR)) + { + *to++=FN_LIBCHAR; + *to=0; + } + DBUG_RETURN(to); /* Pointer to end of dir */ +} /* convert_dirname */ diff --git a/mysys/mf_fn_ext.c b/mysys/mf_fn_ext.c new file mode 100644 index 00000000..3501ca7c --- /dev/null +++ b/mysys/mf_fn_ext.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mysys_priv.h" +#include + +/* + Return a pointer to the extension of the filename. + + SYNOPSIS + fn_ext() + name Name of file + + DESCRIPTION + The extension is defined as everything after the first extension character + (normally '.') after the directory name. + + RETURN VALUES + Pointer to the extension character. If there isn't any extension, + points at the end ASCII(0) of the filename. +*/ + +char *fn_ext(const char *name) +{ + register const char *pos, *gpos; + DBUG_ENTER("fn_ext"); + DBUG_PRINT("mfunkt",("name: '%s'",name)); + +#if defined(FN_DEVCHAR) || defined(BASKSLASH_MBTAIL) + { + char buff[FN_REFLEN]; + size_t res_length; + gpos= name+ dirname_part(buff,(char*) name, &res_length); + } +#else + if (!(gpos= strrchr(name, FN_LIBCHAR))) + gpos= name; +#endif + pos= strchr(gpos, FN_EXTCHAR); + DBUG_RETURN((char*) (pos ? pos : strend(gpos))); +} /* fn_ext */ + + +/* + Return a pointer to the extension of the filename. + + SYNOPSIS + fn_ext2() + name Name of file + + DESCRIPTION + The extension is defined as everything after the last extension character + (normally '.') after the directory name. + + RETURN VALUES + Pointer to the extension character. If there isn't any extension, + points at the end ASCII(0) of the filename. +*/ + +char *fn_ext2(const char *name) +{ + register const char *pos, *gpos; + DBUG_ENTER("fn_ext"); + DBUG_PRINT("mfunkt",("name: '%s'",name)); + +#if defined(FN_DEVCHAR) || defined(BASKSLASH_MBTAIL) + { + char buff[FN_REFLEN]; + size_t res_length; + gpos= name+ dirname_part(buff,(char*) name, &res_length); + } +#else + if (!(gpos= strrchr(name, FN_LIBCHAR))) + gpos= name; +#endif + // locate the last occurrence of FN_EXTCHAR + pos= strrchr(gpos, FN_EXTCHAR); + DBUG_RETURN((char*) (pos ? pos : strend(gpos))); +} /* fn_ext2 */ + diff --git a/mysys/mf_format.c b/mysys/mf_format.c new file mode 100644 index 00000000..57ac3ea4 --- /dev/null +++ b/mysys/mf_format.c @@ -0,0 +1,139 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +/* + Formats a filename with possible replace of directory of extension + Function can handle the case where 'to' == 'name' + For a description of the flag values, consult my_sys.h + The arguments should be in unix format. +*/ + +char * fn_format(char * to, const char *name, const char *dir, + const char *extension, uint flag) +{ + char dev[FN_REFLEN], buff[FN_REFLEN], *pos, *startpos; + const char *ext; + reg1 size_t length; + size_t dev_length; + DBUG_ENTER("fn_format"); + DBUG_ASSERT(name != NULL); + DBUG_ASSERT(extension != NULL); + DBUG_PRINT("enter",("name: %s dir: %s extension: %s flag: %d", + name,dir,extension,flag)); + + /* Copy and skip directory */ + name+=(length=dirname_part(dev, (startpos=(char *) name), &dev_length)); + if (length == 0 || (flag & MY_REPLACE_DIR)) + { + /* Use given directory */ + convert_dirname(dev,dir,NullS); /* Fix to this OS */ + } + else if ((flag & MY_RELATIVE_PATH) && !test_if_hard_path(dev)) + { + /* Put 'dir' before the given path */ + strmake_buf(buff, dev); + pos=convert_dirname(dev,dir,NullS); + strmake(pos,buff,sizeof(buff)-1- (int) (pos-dev)); + } + + if (flag & MY_PACK_FILENAME) + pack_dirname(dev,dev); /* Put in ./.. and ~/.. */ + if (flag & MY_UNPACK_FILENAME) + (void) unpack_dirname(dev,dev); /* Replace ~/.. with dir */ + + if (!(flag & MY_APPEND_EXT) && + (pos= (char*) strchr(name,FN_EXTCHAR)) != NullS) + { + if ((flag & MY_REPLACE_EXT) == 0) /* If we should keep old ext */ + { + length=strlength(name); /* Use old extension */ + ext = ""; + } + else + { + length= (size_t) (pos-(char*) name); /* Change extension */ + ext= extension; + } + } + else + { + length=strlength(name); /* No ext, use the now one */ + ext=extension; + } + + if (strlen(dev)+length+strlen(ext) >= FN_REFLEN || length >= FN_LEN ) + { + /* To long path, return original or NULL */ + size_t tmp_length; + if (flag & MY_SAFE_PATH) + DBUG_RETURN(NullS); + tmp_length= strlength(startpos); + DBUG_PRINT("error",("dev: '%s' ext: '%s' length: %u",dev,ext, + (uint) length)); + (void) strmake(to,startpos,MY_MIN(tmp_length,FN_REFLEN-1)); + } + else + { + if (to == startpos) + { + bmove(buff,(uchar*) name,length); /* Save name for last copy */ + name=buff; + } + pos=strmake(strmov(to,dev),name,length); + (void) strmov(pos,ext); /* Don't convert extension */ + } + if (flag & MY_RETURN_REAL_PATH) + (void) my_realpath(to, to, MYF(0)); + else if (flag & MY_RESOLVE_SYMLINKS) + { + strmov(buff,to); + (void) my_readlink(to, buff, MYF(0)); + } + DBUG_RETURN(to); +} /* fn_format */ + + +/* + strlength(const string str) + Return length of string with end-space:s not counted. +*/ + +size_t strlength(const char *str) +{ + reg1 const char * pos; + reg2 const char * found; + DBUG_ENTER("strlength"); + + pos= found= str; + + while (*pos) + { + if (*pos != ' ') + { + while (*++pos && *pos != ' ') {}; + if (!*pos) + { + found=pos; /* String ends here */ + break; + } + } + found=pos; + while (*++pos == ' ') {}; + } + DBUG_RETURN((size_t) (found - str)); +} /* strlength */ diff --git a/mysys/mf_getdate.c b/mysys/mf_getdate.c new file mode 100644 index 00000000..3db0398b --- /dev/null +++ b/mysys/mf_getdate.c @@ -0,0 +1,82 @@ +/* Copyright (c) 2000, 2004-2007 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Get date in a printable form: yyyy-mm-dd hh:mm:ss */ + +#include "mysys_priv.h" +#include + +/* + get date as string + + SYNOPSIS + get_date() + to - string where date will be written + flag - format of date: + If flag & GETDATE_TIME Return date and time + If flag & GETDATE_SHORT_DATE Return short date format YYMMDD + If flag & GETDATE_HHMMSSTIME Return time in HHMMDD format. + If flag & GETDATE_GMT Date/time in GMT + If flag & GETDATE_FIXEDLENGTH Return fixed length date/time + date - for conversion +*/ + + +void get_date(register char * to, int flag, time_t date) +{ + reg2 struct tm *start_time; + time_t skr; +#if defined(HAVE_LOCALTIME_R) && defined(_REENTRANT) + struct tm tm_tmp; +#endif + + skr=date ? date : (time_t) my_time(0); +#if defined(HAVE_LOCALTIME_R) && defined(_REENTRANT) + if (flag & GETDATE_GMT) + gmtime_r(&skr,&tm_tmp); + else + localtime_r(&skr,&tm_tmp); + start_time= &tm_tmp; +#else + if (flag & GETDATE_GMT) + start_time= gmtime(&skr); + else + start_time= localtime(&skr); +#endif + if (flag & GETDATE_SHORT_DATE) + sprintf(to,"%02d%02d%02d", + start_time->tm_year % 100, + start_time->tm_mon+1, + start_time->tm_mday); + else + sprintf(to, ((flag & GETDATE_FIXEDLENGTH) ? + "%4d-%02d-%02d" : "%d-%02d-%02d"), + start_time->tm_year+1900, + start_time->tm_mon+1, + start_time->tm_mday); + if (flag & GETDATE_DATE_TIME) + sprintf(strend(to), + ((flag & GETDATE_FIXEDLENGTH) ? + " %02d:%02d:%02d" : " %2d:%02d:%02d"), + start_time->tm_hour, + start_time->tm_min, + start_time->tm_sec); + else if (flag & GETDATE_HHMMSSTIME) + sprintf(strend(to),"%02d%02d%02d", + start_time->tm_hour, + start_time->tm_min, + start_time->tm_sec); +} /* get_date */ diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c new file mode 100644 index 00000000..4ee1331b --- /dev/null +++ b/mysys/mf_iocache.c @@ -0,0 +1,1886 @@ +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates + Copyright (c) 2010, 2021, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Cashing of files with only does (sequential) read or writes of fixed- + length records. A read isn't allowed to go over file-length. A read is ok + if it ends at file-length and next read can try to read after file-length + (and get a EOF-error). + Possibly use of asyncronic io. + macros for read and writes for faster io. + Used instead of FILE when reading or writing whole files. + One can change info->pos_in_file to a higher value to skip bytes in file if + also info->read_pos is set to info->read_end. + If called through open_cached_file(), then the temporary file will + only be created if a write exeeds the file buffer or if one calls + my_b_flush_io_cache(). + + If one uses SEQ_READ_APPEND, then two buffers are allocated, one for + reading and another for writing. Reads are first done from disk and + then done from the write buffer. This is an efficient way to read + from a log file when one is writing to it at the same time. + For this to work, the file has to be opened in append mode! + Note that when one uses SEQ_READ_APPEND, one MUST write using + my_b_append ! This is needed because we need to lock the mutex + every time we access the write buffer. + +TODO: + When one SEQ_READ_APPEND and we are reading and writing at the same time, + each time the write buffer gets full and it's written to disk, we will + always do a disk read to read a part of the buffer from disk to the + read buffer. + This should be fixed so that when we do a my_b_flush_io_cache() and + we have been reading the write buffer, we should transfer the rest of the + write buffer to the read buffer before we start to reuse it. +*/ + +#include "mysys_priv.h" +#include +#include +#include "mysql/psi/mysql_file.h" + +PSI_file_key key_file_io_cache; + +#define lock_append_buffer(info) \ + mysql_mutex_lock(&(info)->append_buffer_lock) +#define unlock_append_buffer(info) \ + mysql_mutex_unlock(&(info)->append_buffer_lock) + +#define IO_ROUND_UP(X) (((X)+IO_SIZE-1) & ~(IO_SIZE-1)) +#define IO_ROUND_DN(X) ( (X) & ~(IO_SIZE-1)) + +static int _my_b_cache_read(IO_CACHE *info, uchar *Buffer, size_t Count); +static int _my_b_cache_read_r(IO_CACHE *info, uchar *Buffer, size_t Count); +static int _my_b_seq_read(IO_CACHE *info, uchar *Buffer, size_t Count); +static int _my_b_cache_write(IO_CACHE *info, const uchar *Buffer, size_t Count); +static int _my_b_cache_write_r(IO_CACHE *info, const uchar *Buffer, size_t Count); + +int (*_my_b_encr_read)(IO_CACHE *info,uchar *Buffer,size_t Count)= 0; +int (*_my_b_encr_write)(IO_CACHE *info,const uchar *Buffer,size_t Count)= 0; + + + +static void +init_functions(IO_CACHE* info) +{ + enum cache_type type= info->type; + info->read_function = 0; /* Force a core if used */ + info->write_function = 0; /* Force a core if used */ + switch (type) { + case READ_NET: + /* + Must be initialized by the caller. The problem is that + _my_b_net_read has to be defined in sql directory because of + the dependency on THD, and therefore cannot be visible to + programs that link against mysys but know nothing about THD, such + as myisamchk + */ + DBUG_ASSERT(!(info->myflags & MY_ENCRYPT)); + break; + case SEQ_READ_APPEND: + info->read_function = _my_b_seq_read; + DBUG_ASSERT(!(info->myflags & MY_ENCRYPT)); + break; + case READ_CACHE: + if (info->myflags & MY_ENCRYPT) + { + DBUG_ASSERT(info->share == 0); + info->read_function = _my_b_encr_read; + break; + } + /* fall through */ + case WRITE_CACHE: + if (info->myflags & MY_ENCRYPT) + { + info->write_function = _my_b_encr_write; + break; + } + /* fall through */ + case READ_FIFO: + DBUG_ASSERT(!(info->myflags & MY_ENCRYPT)); + info->read_function = info->share ? _my_b_cache_read_r : _my_b_cache_read; + info->write_function = info->share ? _my_b_cache_write_r : _my_b_cache_write; + break; + case TYPE_NOT_SET: + DBUG_ASSERT(0); + break; + } + if (type == READ_CACHE || type == WRITE_CACHE || type == SEQ_READ_APPEND) + info->myflags|= MY_FULL_IO; + else + info->myflags&= ~MY_FULL_IO; +} + + +/* + Initialize an IO_CACHE object + + SYNOPSOS + init_io_cache_ext() + info cache handler to initialize + file File that should be associated to to the handler + If == -1 then real_open_cached_file() + will be called when it's time to open file. + cachesize Size of buffer to allocate for read/write + If == 0 then use my_default_record_cache_size + type Type of cache + seek_offset Where cache should start reading/writing + use_async_io Set to 1 of we should use async_io (if available) + cache_myflags Bitmap of different flags + MY_WME | MY_FAE | MY_NABP | MY_FNABP | + MY_DONT_CHECK_FILESIZE + file_key Instrumented file key for temporary cache file + + RETURN + 0 ok + # error +*/ + +int init_io_cache_ext(IO_CACHE *info, File file, size_t cachesize, + enum cache_type type, my_off_t seek_offset, + pbool use_async_io, myf cache_myflags, + PSI_file_key file_key __attribute__((unused))) +{ + size_t min_cache; + my_off_t pos; + my_off_t end_of_file= ~(my_off_t) 0; + DBUG_ENTER("init_io_cache_ext"); + DBUG_PRINT("enter",("cache:%p type: %d pos: %llu", + info, (int) type, (ulonglong) seek_offset)); + + info->file= file; + info->type= TYPE_NOT_SET; /* Don't set it until mutex are created */ + info->pos_in_file= seek_offset; + info->alloced_buffer = 0; + info->buffer=0; + info->seek_not_done= 0; + info->next_file_user= NULL; + + if (file >= 0) + { + DBUG_ASSERT(!(cache_myflags & MY_ENCRYPT)); + pos= mysql_file_tell(file, MYF(0)); + if ((pos == (my_off_t) -1) && (my_errno == ESPIPE)) + { + /* + This kind of object doesn't support seek() or tell(). Don't set a + seek_not_done that will make us again try to seek() later and fail. + + Additionally, if we're supposed to start somewhere other than the + the beginning of whatever this file is, then somebody made a bad + assumption. + */ + DBUG_ASSERT(seek_offset == 0); + } + else + info->seek_not_done= MY_TEST(seek_offset != pos); + } + else + if (type == WRITE_CACHE && _my_b_encr_read) + { + cache_myflags|= MY_ENCRYPT; + DBUG_ASSERT(seek_offset == 0); + } + + info->disk_writes= 0; + info->share=0; + + if (!cachesize && !(cachesize= my_default_record_cache_size)) + DBUG_RETURN(1); /* No cache requested */ + min_cache=use_async_io ? IO_SIZE*4 : IO_SIZE*2; + if (type == READ_CACHE || type == SEQ_READ_APPEND) + { /* Assume file isn't growing */ + DBUG_ASSERT(!(cache_myflags & MY_ENCRYPT)); + if (!(cache_myflags & MY_DONT_CHECK_FILESIZE)) + { + /* Calculate end of file to avoid allocating oversized buffers */ + end_of_file= mysql_file_seek(file, 0L, MY_SEEK_END, MYF(0)); + /* Need to reset seek_not_done now that we just did a seek. */ + info->seek_not_done= end_of_file == seek_offset ? 0 : 1; + if (end_of_file < seek_offset) + end_of_file=seek_offset; + /* Trim cache size if the file is very small */ + if ((my_off_t) cachesize > end_of_file-seek_offset+IO_SIZE*2-1) + { + cachesize= (size_t) (end_of_file-seek_offset)+IO_SIZE*2-1; + use_async_io=0; /* No need to use async */ + } + } + } + cache_myflags &= ~MY_DONT_CHECK_FILESIZE; + if (type != READ_NET) + { + /* Retry allocating memory in smaller blocks until we get one */ + cachesize= ((cachesize + min_cache-1) & ~(min_cache-1)); + for (;;) + { + size_t buffer_block; + /* + Unset MY_WAIT_IF_FULL bit if it is set, to prevent conflict with + MY_ZEROFILL. + */ + myf flags= (myf) (cache_myflags & ~(MY_WME | MY_WAIT_IF_FULL)); + + if (cachesize < min_cache) + cachesize = min_cache; + buffer_block= cachesize; + if (type == SEQ_READ_APPEND) + buffer_block *= 2; + else if (cache_myflags & MY_ENCRYPT) + buffer_block= 2*(buffer_block + MY_AES_BLOCK_SIZE) + sizeof(IO_CACHE_CRYPT); + if (cachesize == min_cache) + flags|= (myf) MY_WME; + + if ((info->buffer= (uchar*) my_malloc(key_memory_IO_CACHE, buffer_block, flags)) != 0) + { + if (type == SEQ_READ_APPEND) + info->write_buffer= info->buffer + cachesize; + else + info->write_buffer= info->buffer; + info->alloced_buffer= buffer_block; + break; /* Enough memory found */ + } + if (cachesize == min_cache) + DBUG_RETURN(2); /* Can't alloc cache */ + /* Try with less memory */ + cachesize= (cachesize*3/4 & ~(min_cache-1)); + } + } + + DBUG_PRINT("info",("init_io_cache_ext: cachesize = %lu", (ulong) cachesize)); + info->read_length=info->buffer_length=cachesize; + info->myflags=cache_myflags & ~(MY_NABP | MY_FNABP); + info->request_pos= info->read_pos= info->write_pos = info->buffer; + if (type == SEQ_READ_APPEND) + { + info->append_read_pos = info->write_pos = info->write_buffer; + info->write_end = info->write_buffer + info->buffer_length; + mysql_mutex_init(key_IO_CACHE_append_buffer_lock, + &info->append_buffer_lock, MY_MUTEX_INIT_FAST); + } +#if defined(SAFE_MUTEX) + else + { + /* Clear mutex so that safe_mutex will notice that it's not initialized */ + bzero((char*) &info->append_buffer_lock, sizeof(info->append_buffer_lock)); + } +#endif + + if (type == WRITE_CACHE) + info->write_end= + info->buffer+info->buffer_length- (seek_offset & (IO_SIZE-1)); + else + info->read_end=info->buffer; /* Nothing in cache */ + + /* End_of_file may be changed by user later */ + info->end_of_file= end_of_file; + info->error=0; + info->type= type; + init_functions(info); + DBUG_RETURN(0); +} + +int init_io_cache(IO_CACHE *info, File file, size_t cachesize, + enum cache_type type, my_off_t seek_offset, + my_bool use_async_io, myf cache_myflags) +{ + return init_io_cache_ext(info, file, cachesize, type, seek_offset, + use_async_io, cache_myflags, key_file_io_cache); +} + +/* + Initialize the slave IO_CACHE to read the same file (and data) + as master does. + + One can create multiple slaves from a single master. Every slave and master + will have independent file positions. + + The master must be a non-shared READ_CACHE. + It is assumed that no more reads are done after a master and/or a slave + has been freed (this limitation can be easily lifted). +*/ + +int init_slave_io_cache(IO_CACHE *master, IO_CACHE *slave) +{ + uchar *slave_buf; + DBUG_ASSERT(master->type == READ_CACHE); + DBUG_ASSERT(!master->share); + DBUG_ASSERT(master->alloced_buffer); + + if (!(slave_buf= (uchar*)my_malloc(PSI_INSTRUMENT_ME, master->alloced_buffer, MYF(0)))) + { + return 1; + } + memcpy(slave, master, sizeof(IO_CACHE)); + slave->buffer= slave_buf; + + memcpy(slave->buffer, master->buffer, master->alloced_buffer); + slave->read_pos= slave->buffer + (master->read_pos - master->buffer); + slave->read_end= slave->buffer + (master->read_end - master->buffer); + + if (master->next_file_user) + { + IO_CACHE *p; + for (p= master->next_file_user; + p->next_file_user !=master; + p= p->next_file_user) + {} + + p->next_file_user= slave; + slave->next_file_user= master; + } + else + { + slave->next_file_user= master; + master->next_file_user= slave; + } + return 0; +} + + +void end_slave_io_cache(IO_CACHE *cache) +{ + /* Remove the cache from the next_file_user circular linked list. */ + if (cache->next_file_user != cache) + { + IO_CACHE *p= cache->next_file_user; + while (p->next_file_user != cache) + p= p->next_file_user; + p->next_file_user= cache->next_file_user; + + } + my_free(cache->buffer); +} + +/* + Seek a read io cache to a given offset +*/ +void seek_io_cache(IO_CACHE *cache, my_off_t needed_offset) +{ + my_off_t cached_data_start= cache->pos_in_file; + my_off_t cached_data_end= cache->pos_in_file + (cache->read_end - + cache->buffer); + + if (needed_offset >= cached_data_start && + needed_offset < cached_data_end) + { + /* + The offset we're seeking to is in the buffer. + Move buffer's read position accordingly + */ + cache->read_pos= cache->buffer + (needed_offset - cached_data_start); + } + else + { + if (needed_offset > cache->end_of_file) + needed_offset= cache->end_of_file; + /* + The offset we're seeking to is not in the buffer. + - Set the buffer to be exhausted. + - Make the next read to a mysql_file_seek() call to the required + offset. + TODO(cvicentiu, spetrunia) properly implement aligned seeks for + efficiency. + */ + cache->seek_not_done= 1; + cache->pos_in_file= needed_offset; + /* When reading it must appear as if we've started from the offset + that we've seeked here. We must let _my_b_cache_read assume that + by implying "no reading starting from pos_in_file" has happened. */ + cache->read_pos= cache->buffer; + cache->read_end= cache->buffer; + } +} + + +/* + Use this to reset cache to re-start reading or to change the type + between READ_CACHE <-> WRITE_CACHE + If we are doing a reinit of a cache where we have the start of the file + in the cache, we are reusing this memory without flushing it to disk. +*/ + +my_bool reinit_io_cache(IO_CACHE *info, enum cache_type type, + my_off_t seek_offset, + my_bool use_async_io __attribute__((unused)), + my_bool clear_cache) +{ + DBUG_ENTER("reinit_io_cache"); + DBUG_PRINT("enter",("cache:%p type: %d seek_offset: %llu clear_cache: %d", + info, type, (ulonglong) seek_offset, + (int) clear_cache)); + + DBUG_ASSERT(type == READ_CACHE || type == WRITE_CACHE); + DBUG_ASSERT(info->type == READ_CACHE || info->type == WRITE_CACHE); + + /* If the whole file is in memory, avoid flushing to disk */ + if (! clear_cache && + seek_offset >= info->pos_in_file && + seek_offset <= my_b_tell(info)) + { + /* Reuse current buffer without flushing it to disk */ + uchar *pos; + if (info->type == WRITE_CACHE && type == READ_CACHE) + { + info->read_end=info->write_pos; + info->end_of_file=my_b_tell(info); + /* + Trigger a new seek only if we have a valid + file handle. + */ + info->seek_not_done= (info->file != -1); + } + else if (type == WRITE_CACHE) + { + if (info->type == READ_CACHE) + { + info->write_end=info->write_buffer+info->buffer_length; + info->seek_not_done=1; + } + info->end_of_file = ~(my_off_t) 0; + } + pos=info->request_pos+(seek_offset-info->pos_in_file); + if (type == WRITE_CACHE) + info->write_pos=pos; + else + info->read_pos= pos; + } + else + { + /* + If we change from WRITE_CACHE to READ_CACHE, assume that everything + after the current positions should be ignored. In other cases we + update end_of_file as it may have changed since last init. + */ + if (type == READ_CACHE) + { + if (info->type == WRITE_CACHE) + info->end_of_file= my_b_tell(info); + else + { + if (!(info->myflags & MY_ENCRYPT)) + info->end_of_file= mysql_file_seek(info->file, 0L, + MY_SEEK_END, MYF(0)); + } + } + /* flush cache if we want to reuse it */ + if (!clear_cache && my_b_flush_io_cache(info,1)) + DBUG_RETURN(1); + info->pos_in_file=seek_offset; + /* Better to do always do a seek */ + info->seek_not_done=1; + info->request_pos=info->read_pos=info->write_pos=info->buffer; + if (type == READ_CACHE) + { + info->read_end=info->buffer; /* Nothing in cache */ + } + else + { + if (info->myflags & MY_ENCRYPT) + { + info->write_end = info->write_buffer + info->buffer_length; + if (seek_offset && info->file != -1) + { + info->read_end= info->buffer; + _my_b_encr_read(info, 0, 0); /* prefill the buffer */ + info->write_pos= info->read_pos; + info->seek_not_done=1; + } + } + else + { + info->write_end=(info->buffer + info->buffer_length - + (seek_offset & (IO_SIZE-1))); + } + info->end_of_file= ~(my_off_t) 0; + } + } + info->type=type; + info->error=0; + init_functions(info); + DBUG_RETURN(0); +} /* reinit_io_cache */ + + +int _my_b_read(IO_CACHE *info, uchar *Buffer, size_t Count) +{ + size_t left_length; + int res; + + /* If the buffer is not empty yet, copy what is available. */ + if ((left_length= (size_t) (info->read_end - info->read_pos))) + { + DBUG_ASSERT(Count > left_length); + memcpy(Buffer, info->read_pos, left_length); + Buffer+=left_length; + Count-=left_length; + } + res= info->read_function(info, Buffer, Count); + if (res && info->error >= 0) + info->error+= (int)left_length; /* update number or read bytes */ + return res; +} + +int _my_b_write(IO_CACHE *info, const uchar *Buffer, size_t Count) +{ + size_t rest_length; + int res; + + /* Always use my_b_flush_io_cache() to flush write_buffer! */ + DBUG_ASSERT(Buffer != info->write_buffer); + + if (info->pos_in_file + info->buffer_length > info->end_of_file) + { + my_errno=errno=EFBIG; + return info->error = -1; + } + + rest_length= (size_t) (info->write_end - info->write_pos); + DBUG_ASSERT(Count >= rest_length); + memcpy(info->write_pos, Buffer, (size_t) rest_length); + Buffer+=rest_length; + Count-=rest_length; + info->write_pos+=rest_length; + + if (my_b_flush_io_cache(info, 1)) + return 1; + + if (Count) + { + my_off_t old_pos_in_file= info->pos_in_file; + res= info->write_function(info, Buffer, Count); + Count-= (size_t) (info->pos_in_file - old_pos_in_file); + Buffer+= info->pos_in_file - old_pos_in_file; + } + else + res= 0; + + if (!res && Count) + { + memcpy(info->write_pos, Buffer, Count); + info->write_pos+= Count; + } + return res; +} + +/* + Read buffered. + + SYNOPSIS + _my_b_cache_read() + info IO_CACHE pointer + Buffer Buffer to retrieve count bytes from file + Count Number of bytes to read into Buffer + + NOTE + This function is only called from the my_b_read() macro when there + isn't enough characters in the buffer to satisfy the request. + + WARNING + + When changing this function, be careful with handling file offsets + (end-of_file, pos_in_file). Do not cast them to possibly smaller + types than my_off_t unless you can be sure that their value fits. + Same applies to differences of file offsets. + + When changing this function, check _my_b_cache_read_r(). It might need the + same change. + + RETURN + 0 we succeeded in reading all data + 1 Error: couldn't read requested characters. In this case: + If info->error == -1, we got a read error. + Otherwise info->error contains the number of bytes in Buffer. +*/ + +int _my_b_cache_read(IO_CACHE *info, uchar *Buffer, size_t Count) +{ + size_t length= 0, diff_length, left_length= 0, max_length; + my_off_t pos_in_file; + DBUG_ENTER("_my_b_cache_read"); + + /* pos_in_file always point on where info->buffer was read */ + pos_in_file=info->pos_in_file+ (size_t) (info->read_end - info->buffer); + + /* + Whenever a function which operates on IO_CACHE flushes/writes + some part of the IO_CACHE to disk it will set the property + "seek_not_done" to indicate this to other functions operating + on the IO_CACHE. + */ + if (info->seek_not_done) + { + if ((mysql_file_seek(info->file, pos_in_file, MY_SEEK_SET, MYF(0)) + != MY_FILEPOS_ERROR)) + { + /* No error, reset seek_not_done flag. */ + info->seek_not_done= 0; + + if (info->next_file_user) + { + IO_CACHE *c; + for (c= info->next_file_user; + c!= info; + c= c->next_file_user) + { + c->seek_not_done= 1; + } + } + } + else + { + /* + If the seek failed and the error number is ESPIPE, it is because + info->file is a pipe or socket or FIFO. We never should have tried + to seek on that. See Bugs#25807 and #22828 for more info. + */ + DBUG_ASSERT(my_errno != ESPIPE); + info->error= -1; + DBUG_RETURN(1); + } + } + + /* + Calculate, how much we are within a IO_SIZE block. Ideally this + should be zero. + */ + diff_length= (size_t) (pos_in_file & (IO_SIZE-1)); + + /* + If more than a block plus the rest of the current block is wanted, + we do read directly, without filling the buffer. + */ + if (Count >= (size_t) (IO_SIZE+(IO_SIZE-diff_length))) + { /* Fill first intern buffer */ + size_t read_length; + if (info->end_of_file <= pos_in_file) + { + /* End of file. Return, what we did copy from the buffer. */ + info->error= (int) left_length; + info->seek_not_done=1; + DBUG_RETURN(1); + } + /* + Crop the wanted count to a multiple of IO_SIZE and subtract, + what we did already read from a block. That way, the read will + end aligned with a block. + */ + length= IO_ROUND_DN(Count) - diff_length; + if ((read_length= mysql_file_read(info->file,Buffer, length, info->myflags)) + != length) + { + /* + If we didn't get, what we wanted, we either return -1 for a read + error, or (it's end of file), how much we got in total. + */ + info->error= (read_length == (size_t) -1 ? -1 : + (int) (read_length+left_length)); + info->seek_not_done=1; + DBUG_RETURN(1); + } + Count-=length; + Buffer+=length; + pos_in_file+=length; + left_length+=length; + diff_length=0; + } + + /* + At this point, we want less than one and a partial block. + We will read a full cache, minus the number of bytes, we are + within a block already. So we will reach new alignment. + */ + max_length= info->read_length-diff_length; + /* We will not read past end of file. */ + if (info->type != READ_FIFO && + max_length > (info->end_of_file - pos_in_file)) + max_length= (size_t) (info->end_of_file - pos_in_file); + /* + If there is nothing left to read, + we either are done, or we failed to fulfill the request. + Otherwise, we read max_length into the cache. + */ + if (!max_length) + { + if (Count) + { + /* We couldn't fulfil the request. Return, how much we got. */ + info->error= (int) left_length; + DBUG_RETURN(1); + } + else + { + info->error= 0; + if (length == 0) /* nothing was read */ + DBUG_RETURN(0); /* EOF */ + + length= 0; /* non-zero size read was done */ + } + } + else + { + if (info->next_file_user) + { + IO_CACHE *c; + for (c= info->next_file_user; + c!= info; + c= c->next_file_user) + { + c->seek_not_done= 1; + } + } + if ((length= mysql_file_read(info->file,info->buffer, max_length, + info->myflags)) < Count || + length == (size_t) -1) + { + /* + We got an read error, or less than requested (end of file). + If not a read error, copy, what we got. + */ + if (length != (size_t) -1) + memcpy(Buffer, info->buffer, length); + info->pos_in_file= pos_in_file; + /* For a read error, return -1, otherwise, what we got in total. */ + info->error= length == (size_t) -1 ? -1 : (int) (length+left_length); + info->read_pos=info->read_end=info->buffer; + info->seek_not_done=1; + DBUG_RETURN(1); + } + } + /* + Count is the remaining number of bytes requested. + length is the amount of data in the cache. + Read Count bytes from the cache. + */ + info->read_pos=info->buffer+Count; + info->read_end=info->buffer+length; + info->pos_in_file=pos_in_file; + if (Count) + memcpy(Buffer, info->buffer, Count); + DBUG_RETURN(0); +} + + +/* + Prepare IO_CACHE for shared use. + + SYNOPSIS + init_io_cache_share() + read_cache A read cache. This will be copied for + every thread after setup. + cshare The share. + write_cache If non-NULL a write cache that is to be + synchronized with the read caches. + num_threads Number of threads sharing the cache + including the write thread if any. + + DESCRIPTION + + The shared cache is used so: One IO_CACHE is initialized with + init_io_cache(). This includes the allocation of a buffer. Then a + share is allocated and init_io_cache_share() is called with the io + cache and the share. Then the io cache is copied for each thread. So + every thread has its own copy of IO_CACHE. But the allocated buffer + is shared because cache->buffer is the same for all caches. + + One thread reads data from the file into the buffer. All threads + read from the buffer, but every thread maintains its own set of + pointers into the buffer. When all threads have used up the buffer + contents, one of the threads reads the next block of data into the + buffer. To accomplish this, each thread enters the cache lock before + accessing the buffer. They wait in lock_io_cache() until all threads + joined the lock. The last thread entering the lock is in charge of + reading from file to buffer. It wakes all threads when done. + + Synchronizing a write cache to the read caches works so: Whenever + the write buffer needs a flush, the write thread enters the lock and + waits for all other threads to enter the lock too. They do this when + they have used up the read buffer. When all threads are in the lock, + the write thread copies the write buffer to the read buffer and + wakes all threads. + + share->running_threads is the number of threads not being in the + cache lock. When entering lock_io_cache() the number is decreased. + When the thread that fills the buffer enters unlock_io_cache() the + number is reset to the number of threads. The condition + running_threads == 0 means that all threads are in the lock. Bumping + up the number to the full count is non-intuitive. But increasing the + number by one for each thread that leaves the lock could lead to a + solo run of one thread. The last thread to join a lock reads from + file to buffer, wakes the other threads, processes the data in the + cache and enters the lock again. If no other thread left the lock + meanwhile, it would think it's the last one again and read the next + block... + + The share has copies of 'error', 'buffer', 'read_end', and + 'pos_in_file' from the thread that filled the buffer. We may not be + able to access this information directly from its cache because the + thread may be removed from the share before the variables could be + copied by all other threads. Or, if a write buffer is synchronized, + it would change its 'pos_in_file' after waking the other threads, + possibly before they could copy its value. + + However, the 'buffer' variable in the share is for a synchronized + write cache. It needs to know where to put the data. Otherwise it + would need access to the read cache of one of the threads that is + not yet removed from the share. + + RETURN + void +*/ + +void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare, + IO_CACHE *write_cache, uint num_threads) +{ + DBUG_ENTER("init_io_cache_share"); + DBUG_PRINT("io_cache_share", ("read_cache: %p share: %p " + "write_cache: %p threads: %u", + read_cache, cshare, + write_cache, num_threads)); + + DBUG_ASSERT(num_threads > 1); + DBUG_ASSERT(read_cache->type == READ_CACHE); + DBUG_ASSERT(!write_cache || (write_cache->type == WRITE_CACHE)); + + mysql_mutex_init(key_IO_CACHE_SHARE_mutex, + &cshare->mutex, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_IO_CACHE_SHARE_cond, &cshare->cond, 0); + mysql_cond_init(key_IO_CACHE_SHARE_cond_writer, &cshare->cond_writer, 0); + + cshare->running_threads= num_threads; + cshare->total_threads= num_threads; + cshare->error= 0; /* Initialize. */ + cshare->buffer= read_cache->buffer; + cshare->read_end= NULL; /* See function comment of lock_io_cache(). */ + cshare->pos_in_file= 0; /* See function comment of lock_io_cache(). */ + cshare->source_cache= write_cache; /* Can be NULL. */ + + read_cache->share= cshare; + read_cache->read_function= _my_b_cache_read_r; + + if (write_cache) + { + write_cache->share= cshare; + write_cache->write_function= _my_b_cache_write_r; + } + + DBUG_VOID_RETURN; +} + + +/* + Remove a thread from shared access to IO_CACHE. + + SYNOPSIS + remove_io_thread() + cache The IO_CACHE to be removed from the share. + + NOTE + + Every thread must do that on exit for not to deadlock other threads. + + The last thread destroys the pthread resources. + + A writer flushes its cache first. + + RETURN + void +*/ + +void remove_io_thread(IO_CACHE *cache) +{ + IO_CACHE_SHARE *cshare= cache->share; + uint total; + DBUG_ENTER("remove_io_thread"); + + /* If the writer goes, it needs to flush the write cache. */ + if (cache == cshare->source_cache) + flush_io_cache(cache); + + mysql_mutex_lock(&cshare->mutex); + DBUG_PRINT("io_cache_share", ("%s: %p", + (cache == cshare->source_cache) ? + "writer" : "reader", cache)); + + /* Remove from share. */ + total= --cshare->total_threads; + DBUG_PRINT("io_cache_share", ("remaining threads: %u", total)); + + /* Detach from share. */ + cache->share= NULL; + + /* If the writer goes, let the readers know. */ + if (cache == cshare->source_cache) + { + DBUG_PRINT("io_cache_share", ("writer leaves")); + cshare->source_cache= NULL; + } + + /* If all threads are waiting for me to join the lock, wake them. */ + if (!--cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("the last running thread leaves, wake all")); + mysql_cond_signal(&cshare->cond_writer); + mysql_cond_broadcast(&cshare->cond); + } + + mysql_mutex_unlock(&cshare->mutex); + + if (!total) + { + DBUG_PRINT("io_cache_share", ("last thread removed, destroy share")); + mysql_cond_destroy (&cshare->cond_writer); + mysql_cond_destroy (&cshare->cond); + mysql_mutex_destroy(&cshare->mutex); + } + + DBUG_VOID_RETURN; +} + + +/* + Lock IO cache and wait for all other threads to join. + + SYNOPSIS + lock_io_cache() + cache The cache of the thread entering the lock. + pos File position of the block to read. + Unused for the write thread. + + DESCRIPTION + + Wait for all threads to finish with the current buffer. We want + all threads to proceed in concert. The last thread to join + lock_io_cache() will read the block from file and all threads start + to use it. Then they will join again for reading the next block. + + The waiting threads detect a fresh buffer by comparing + cshare->pos_in_file with the position they want to process next. + Since the first block may start at position 0, we take + cshare->read_end as an additional condition. This variable is + initialized to NULL and will be set after a block of data is written + to the buffer. + + RETURN + 1 OK, lock in place, go ahead and read. + 0 OK, unlocked, another thread did the read. +*/ + +static int lock_io_cache(IO_CACHE *cache, my_off_t pos) +{ + IO_CACHE_SHARE *cshare= cache->share; + DBUG_ENTER("lock_io_cache"); + + /* Enter the lock. */ + mysql_mutex_lock(&cshare->mutex); + cshare->running_threads--; + DBUG_PRINT("io_cache_share", ("%s: %p pos: %lu running: %u", + (cache == cshare->source_cache) ? + "writer" : "reader", cache, (ulong) pos, + cshare->running_threads)); + + if (cshare->source_cache) + { + /* A write cache is synchronized to the read caches. */ + + if (cache == cshare->source_cache) + { + /* The writer waits until all readers are here. */ + while (cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("writer waits in lock")); + mysql_cond_wait(&cshare->cond_writer, &cshare->mutex); + } + DBUG_PRINT("io_cache_share", ("writer awoke, going to copy")); + + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } + + /* The last thread wakes the writer. */ + if (!cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("waking writer")); + mysql_cond_signal(&cshare->cond_writer); + } + + /* + Readers wait until the data is copied from the writer. Another + reason to stop waiting is the removal of the write thread. If this + happens, we leave the lock with old data in the buffer. + */ + while ((!cshare->read_end || (cshare->pos_in_file < pos)) && + cshare->source_cache) + { + DBUG_PRINT("io_cache_share", ("reader waits in lock")); + mysql_cond_wait(&cshare->cond, &cshare->mutex); + } + + /* + If the writer was removed from the share while this thread was + asleep, we need to simulate an EOF condition. The writer cannot + reset the share variables as they might still be in use by readers + of the last block. When we awake here then because the last + joining thread signalled us. If the writer is not the last, it + will not signal. So it is safe to clear the buffer here. + */ + if (!cshare->read_end || (cshare->pos_in_file < pos)) + { + DBUG_PRINT("io_cache_share", ("reader found writer removed. EOF")); + cshare->read_end= cshare->buffer; /* Empty buffer. */ + cshare->error= 0; /* EOF is not an error. */ + } + } + else + { + /* + There are read caches only. The last thread arriving in + lock_io_cache() continues with a locked cache and reads the block. + */ + if (!cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("last thread joined, going to read")); + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } + + /* + All other threads wait until the requested block is read by the + last thread arriving. Another reason to stop waiting is the + removal of a thread. If this leads to all threads being in the + lock, we have to continue also. The first of the awaken threads + will then do the read. + */ + while ((!cshare->read_end || (cshare->pos_in_file < pos)) && + cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("reader waits in lock")); + mysql_cond_wait(&cshare->cond, &cshare->mutex); + } + + /* If the block is not yet read, continue with a locked cache and read. */ + if (!cshare->read_end || (cshare->pos_in_file < pos)) + { + DBUG_PRINT("io_cache_share", ("reader awoke, going to read")); + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } + + /* Another thread did read the block already. */ + } + DBUG_PRINT("io_cache_share", ("reader awoke, going to process %u bytes", + (uint) (cshare->read_end ? (size_t) + (cshare->read_end - cshare->buffer) : + 0))); + + /* + Leave the lock. Do not call unlock_io_cache() later. The thread that + filled the buffer did this and marked all threads as running. + */ + mysql_mutex_unlock(&cshare->mutex); + DBUG_RETURN(0); +} + + +/* + Unlock IO cache. + + SYNOPSIS + unlock_io_cache() + cache The cache of the thread leaving the lock. + + NOTE + This is called by the thread that filled the buffer. It marks all + threads as running and awakes them. This must not be done by any + other thread. + + Do not signal cond_writer. Either there is no writer or the writer + is the only one who can call this function. + + The reason for resetting running_threads to total_threads before + waking all other threads is that it could be possible that this + thread is so fast with processing the buffer that it enters the lock + before even one other thread has left it. If every awoken thread + would increase running_threads by one, this thread could think that + he is again the last to join and would not wait for the other + threads to process the data. + + RETURN + void +*/ + +static void unlock_io_cache(IO_CACHE *cache) +{ + IO_CACHE_SHARE *cshare= cache->share; + DBUG_ENTER("unlock_io_cache"); + DBUG_PRINT("io_cache_share", ("%s: %p pos: %lu running: %u", + (cache == cshare->source_cache) ? + "writer" : "reader", + cache, (ulong) cshare->pos_in_file, + cshare->total_threads)); + + cshare->running_threads= cshare->total_threads; + mysql_cond_broadcast(&cshare->cond); + mysql_mutex_unlock(&cshare->mutex); + DBUG_VOID_RETURN; +} + + +/* + Read from IO_CACHE when it is shared between several threads. + + SYNOPSIS + _my_b_cache_read_r() + cache IO_CACHE pointer + Buffer Buffer to retrieve count bytes from file + Count Number of bytes to read into Buffer + + NOTE + This function is only called from the my_b_read() macro when there + isn't enough characters in the buffer to satisfy the request. + + IMPLEMENTATION + + It works as follows: when a thread tries to read from a file (that + is, after using all the data from the (shared) buffer), it just + hangs on lock_io_cache(), waiting for other threads. When the very + last thread attempts a read, lock_io_cache() returns 1, the thread + does actual IO and unlock_io_cache(), which signals all the waiting + threads that data is in the buffer. + + WARNING + + When changing this function, be careful with handling file offsets + (end-of_file, pos_in_file). Do not cast them to possibly smaller + types than my_off_t unless you can be sure that their value fits. + Same applies to differences of file offsets. (Bug #11527) + + When changing this function, check _my_b_cache_read(). It might need the + same change. + + RETURN + 0 we succeeded in reading all data + 1 Error: can't read requested characters +*/ + +static int _my_b_cache_read_r(IO_CACHE *cache, uchar *Buffer, size_t Count) +{ + my_off_t pos_in_file; + size_t length, diff_length, left_length= 0; + IO_CACHE_SHARE *cshare= cache->share; + DBUG_ENTER("_my_b_cache_read_r"); + DBUG_ASSERT(!(cache->myflags & MY_ENCRYPT)); + + while (Count) + { + size_t cnt, len; + + pos_in_file= cache->pos_in_file + (cache->read_end - cache->buffer); + diff_length= (size_t) (pos_in_file & (IO_SIZE-1)); + length=IO_ROUND_UP(Count+diff_length)-diff_length; + length= ((length <= cache->read_length) ? + length + IO_ROUND_DN(cache->read_length - length) : + length - IO_ROUND_UP(length - cache->read_length)); + if (cache->type != READ_FIFO && + (length > (cache->end_of_file - pos_in_file))) + length= (size_t) (cache->end_of_file - pos_in_file); + if (length == 0) + { + cache->error= (int) left_length; + DBUG_RETURN(1); + } + if (lock_io_cache(cache, pos_in_file)) + { + /* With a synchronized write/read cache we won't come here... */ + DBUG_ASSERT(!cshare->source_cache); + /* + ... unless the writer has gone before this thread entered the + lock. Simulate EOF in this case. It can be distinguished by + cache->file. + */ + if (cache->file < 0) + len= 0; + else + { + /* + Whenever a function which operates on IO_CACHE flushes/writes + some part of the IO_CACHE to disk it will set the property + "seek_not_done" to indicate this to other functions operating + on the IO_CACHE. + */ + if (cache->seek_not_done) + { + if (mysql_file_seek(cache->file, pos_in_file, MY_SEEK_SET, MYF(0)) + == MY_FILEPOS_ERROR) + { + cache->error= -1; + unlock_io_cache(cache); + DBUG_RETURN(1); + } + } + len= mysql_file_read(cache->file, cache->buffer, length, cache->myflags); + } + DBUG_PRINT("io_cache_share", ("read %lu bytes", (ulong) len)); + + cache->read_end= cache->buffer + (len == (size_t) -1 ? 0 : len); + cache->error= (len == length ? 0 : (int) len); + cache->pos_in_file= pos_in_file; + + /* Copy important values to the share. */ + cshare->error= cache->error; + cshare->read_end= cache->read_end; + cshare->pos_in_file= pos_in_file; + + /* Mark all threads as running and wake them. */ + unlock_io_cache(cache); + } + else + { + /* + With a synchronized write/read cache readers always come here. + Copy important values from the share. + */ + cache->error= cshare->error; + cache->read_end= cshare->read_end; + cache->pos_in_file= cshare->pos_in_file; + + len= ((cache->error == -1) ? (size_t) -1 : + (size_t) (cache->read_end - cache->buffer)); + } + cache->read_pos= cache->buffer; + cache->seek_not_done= 0; + if (len == 0 || len == (size_t) -1) + { + DBUG_PRINT("io_cache_share", ("reader error. len %lu left %lu", + (ulong) len, (ulong) left_length)); + cache->error= (int) left_length; + DBUG_RETURN(1); + } + cnt= (len > Count) ? Count : len; + if (cnt) + memcpy(Buffer, cache->read_pos, cnt); + Count -= cnt; + Buffer+= cnt; + left_length+= cnt; + cache->read_pos+= cnt; + } + DBUG_RETURN(0); +} + + +/* + Copy data from write cache to read cache. + + SYNOPSIS + copy_to_read_buffer() + write_cache The write cache. + write_buffer The source of data, mostly the cache buffer. + write_length The number of bytes to copy. + + NOTE + The write thread will wait for all read threads to join the cache + lock. Then it copies the data over and wakes the read threads. + + RETURN + void +*/ + +static void copy_to_read_buffer(IO_CACHE *write_cache, + const uchar *write_buffer, my_off_t pos_in_file) +{ + size_t write_length= (size_t) (write_cache->pos_in_file - pos_in_file); + IO_CACHE_SHARE *cshare= write_cache->share; + + DBUG_ASSERT(cshare->source_cache == write_cache); + /* + write_length is usually less or equal to buffer_length. + It can be bigger if _my_b_cache_write_r() is called with a big length. + */ + while (write_length) + { + size_t copy_length= MY_MIN(write_length, write_cache->buffer_length); + int __attribute__((unused)) rc; + + rc= lock_io_cache(write_cache, pos_in_file); + /* The writing thread does always have the lock when it awakes. */ + DBUG_ASSERT(rc); + + memcpy(cshare->buffer, write_buffer, copy_length); + + cshare->error= 0; + cshare->read_end= cshare->buffer + copy_length; + cshare->pos_in_file= pos_in_file; + + /* Mark all threads as running and wake them. */ + unlock_io_cache(write_cache); + + write_buffer+= copy_length; + write_length-= copy_length; + } +} + + +/* + Do sequential read from the SEQ_READ_APPEND cache. + + We do this in three stages: + - first read from info->buffer + - then if there are still data to read, try the file descriptor + - afterwards, if there are still data to read, try append buffer + + RETURNS + 0 Success + 1 Failed to read +*/ + +static int _my_b_seq_read(IO_CACHE *info, uchar *Buffer, size_t Count) +{ + size_t length, diff_length, save_count, max_length; + my_off_t pos_in_file; + save_count=Count; + + lock_append_buffer(info); + + /* pos_in_file always point on where info->buffer was read */ + if ((pos_in_file=info->pos_in_file + + (size_t) (info->read_end - info->buffer)) >= info->end_of_file) + goto read_append_buffer; + + /* + With read-append cache we must always do a seek before we read, + because the write could have moved the file pointer astray + */ + if (mysql_file_seek(info->file, pos_in_file, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) + { + info->error= -1; + unlock_append_buffer(info); + return (1); + } + info->seek_not_done=0; + + diff_length= (size_t) (pos_in_file & (IO_SIZE-1)); + + /* now the second stage begins - read from file descriptor */ + if (Count >= (size_t) (IO_SIZE+(IO_SIZE-diff_length))) + { + /* Fill first intern buffer */ + size_t read_length; + + length= IO_ROUND_DN(Count) - diff_length; + if ((read_length= mysql_file_read(info->file,Buffer, length, + info->myflags)) == (size_t) -1) + { + info->error= -1; + unlock_append_buffer(info); + return 1; + } + Count-=read_length; + Buffer+=read_length; + pos_in_file+=read_length; + + if (read_length != length) + { + /* + We only got part of data; Read the rest of the data from the + write buffer + */ + goto read_append_buffer; + } + diff_length=0; + } + + max_length= info->read_length-diff_length; + if (max_length > (info->end_of_file - pos_in_file)) + max_length= (size_t) (info->end_of_file - pos_in_file); + if (!max_length) + { + if (Count) + goto read_append_buffer; + length=0; /* Didn't read any more chars */ + } + else + { + length= mysql_file_read(info->file,info->buffer, max_length, info->myflags); + if (length == (size_t) -1) + { + info->error= -1; + unlock_append_buffer(info); + return 1; + } + if (length < Count) + { + memcpy(Buffer, info->buffer, length); + Count -= length; + Buffer += length; + + /* + added the line below to make + DBUG_ASSERT(pos_in_file==info->end_of_file) pass. + otherwise this does not appear to be needed + */ + pos_in_file += length; + goto read_append_buffer; + } + } + unlock_append_buffer(info); + info->read_pos=info->buffer+Count; + info->read_end=info->buffer+length; + info->pos_in_file=pos_in_file; + memcpy(Buffer,info->buffer,(size_t) Count); + return 0; + +read_append_buffer: + + /* + Read data from the current write buffer. + Count should never be == 0 here (The code will work even if count is 0) + */ + + { + /* First copy the data to Count */ + size_t len_in_buff = (size_t) (info->write_pos - info->append_read_pos); + size_t copy_len; + size_t transfer_len; + + DBUG_ASSERT(info->append_read_pos <= info->write_pos); + copy_len=MY_MIN(Count, len_in_buff); + memcpy(Buffer, info->append_read_pos, copy_len); + info->append_read_pos += copy_len; + Count -= copy_len; + if (Count) + info->error= (int) (save_count - Count); + + /* Fill read buffer with data from write buffer */ + memcpy(info->buffer, info->append_read_pos, + (size_t) (transfer_len=len_in_buff - copy_len)); + info->read_pos= info->buffer; + info->read_end= info->buffer+transfer_len; + info->append_read_pos=info->write_pos; + info->pos_in_file=pos_in_file+copy_len; + info->end_of_file+=len_in_buff; + } + unlock_append_buffer(info); + return Count ? 1 : 0; +} + + +/* Read one byte when buffer is empty */ + +int _my_b_get(IO_CACHE *info) +{ + uchar buff; + if ((*(info)->read_function)(info,&buff,1)) + return my_b_EOF; + return (int) (uchar) buff; +} + +/* + Write a byte buffer to IO_CACHE and flush to disk + if IO_CACHE is full. + + RETURN VALUE + 1 On error on write + 0 On success + -1 On error; my_errno contains error code. +*/ + +int _my_b_cache_write(IO_CACHE *info, const uchar *Buffer, size_t Count) +{ + if (Buffer != info->write_buffer) + { + Count= IO_ROUND_DN(Count); + if (!Count) + return 0; + } + + if (info->seek_not_done) + { + /* + Whenever a function which operates on IO_CACHE flushes/writes + some part of the IO_CACHE to disk it will set the property + "seek_not_done" to indicate this to other functions operating + on the IO_CACHE. + */ + if (mysql_file_seek(info->file, info->pos_in_file, MY_SEEK_SET, + MYF(info->myflags & MY_WME)) == MY_FILEPOS_ERROR) + { + info->error= -1; + return 1; + } + info->seek_not_done=0; + } + if (mysql_file_write(info->file, Buffer, Count, info->myflags | MY_NABP)) + return info->error= -1; + + info->pos_in_file+= Count; + return 0; +} + + +/* + In case of a shared I/O cache with a writer we normally do direct + write cache to read cache copy. Simulate this here by direct + caller buffer to read cache copy. Do it after the write so that + the cache readers actions on the flushed part can go in parallel + with the write of the extra stuff. copy_to_read_buffer() + synchronizes writer and readers so that after this call the + readers can act on the extra stuff while the writer can go ahead + and prepare the next output. copy_to_read_buffer() relies on + info->pos_in_file. +*/ +static int _my_b_cache_write_r(IO_CACHE *info, const uchar *Buffer, size_t Count) +{ + my_off_t old_pos_in_file= info->pos_in_file; + int res= _my_b_cache_write(info, Buffer, Count); + if (res) + return res; + + DBUG_ASSERT(!(info->myflags & MY_ENCRYPT)); + DBUG_ASSERT(info->share); + copy_to_read_buffer(info, Buffer, old_pos_in_file); + + return 0; +} + + +/* + Append a block to the write buffer. + This is done with the buffer locked to ensure that we don't read from + the write buffer before we are ready with it. +*/ + +int my_b_append(IO_CACHE *info, const uchar *Buffer, size_t Count) +{ + size_t rest_length,length; + + MEM_CHECK_DEFINED(Buffer, Count); + + /* + Assert that we cannot come here with a shared cache. If we do one + day, we might need to add a call to copy_to_read_buffer(). + */ + DBUG_ASSERT(!info->share); + DBUG_ASSERT(!(info->myflags & MY_ENCRYPT)); + + lock_append_buffer(info); + rest_length= (size_t) (info->write_end - info->write_pos); + if (Count <= rest_length) + goto end; + memcpy(info->write_pos, Buffer, rest_length); + Buffer+=rest_length; + Count-=rest_length; + info->write_pos+=rest_length; + if (my_b_flush_io_cache(info,0)) + { + unlock_append_buffer(info); + return 1; + } + if (Count >= IO_SIZE) + { /* Fill first intern buffer */ + length= IO_ROUND_DN(Count); + if (mysql_file_write(info->file,Buffer, length, info->myflags | MY_NABP)) + { + unlock_append_buffer(info); + return info->error= -1; + } + Count-=length; + Buffer+=length; + info->end_of_file+=length; + } + +end: + memcpy(info->write_pos,Buffer,(size_t) Count); + info->write_pos+=Count; + unlock_append_buffer(info); + return 0; +} + + +int my_b_safe_write(IO_CACHE *info, const uchar *Buffer, size_t Count) +{ + /* + Sasha: We are not writing this with the ? operator to avoid hitting + a possible compiler bug. At least gcc 2.95 cannot deal with + several layers of ternary operators that evaluated comma(,) operator + expressions inside - I do have a test case if somebody wants it + */ + if (info->type == SEQ_READ_APPEND) + return my_b_append(info, Buffer, Count); + return my_b_write(info, Buffer, Count); +} + + +/* + Write a block to disk where part of the data may be inside the record + buffer. As all write calls to the data goes through the cache, + we will never get a seek over the end of the buffer +*/ + +int my_block_write(IO_CACHE *info, const uchar *Buffer, size_t Count, + my_off_t pos) +{ + size_t length; + int error=0; + + /* + Assert that we cannot come here with a shared cache. If we do one + day, we might need to add a call to copy_to_read_buffer(). + */ + DBUG_ASSERT(!info->share); + DBUG_ASSERT(!(info->myflags & MY_ENCRYPT)); + + if (pos < info->pos_in_file) + { + /* Of no overlap, write everything without buffering */ + if (pos + Count <= info->pos_in_file) + return (int)mysql_file_pwrite(info->file, Buffer, Count, pos, + info->myflags | MY_NABP); + /* Write the part of the block that is before buffer */ + length= (uint) (info->pos_in_file - pos); + if (mysql_file_pwrite(info->file, Buffer, length, pos, info->myflags | MY_NABP)) + info->error= error= -1; + Buffer+=length; + pos+= length; + Count-= length; + } + + /* Check if we want to write inside the used part of the buffer.*/ + length= (size_t) (info->write_end - info->buffer); + if (pos < info->pos_in_file + length) + { + size_t offset= (size_t) (pos - info->pos_in_file); + length-=offset; + if (length > Count) + length=Count; + memcpy(info->buffer+offset, Buffer, length); + Buffer+=length; + Count-= length; + /* Fix length of buffer if the new data was larger */ + if (info->buffer+length > info->write_pos) + info->write_pos=info->buffer+length; + if (!Count) + return (error); + } + /* Write at the end of the current buffer; This is the normal case */ + if (_my_b_write(info, Buffer, Count)) + error= -1; + return error; +} + + + /* Flush write cache */ + +#define LOCK_APPEND_BUFFER if (need_append_buffer_lock) \ + lock_append_buffer(info); +#define UNLOCK_APPEND_BUFFER if (need_append_buffer_lock) \ + unlock_append_buffer(info); + +int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) +{ + size_t length; + my_bool append_cache= (info->type == SEQ_READ_APPEND); + DBUG_ENTER("my_b_flush_io_cache"); + DBUG_PRINT("enter", ("cache: %p", info)); + + if (!append_cache) + need_append_buffer_lock= 0; + + if (info->type == WRITE_CACHE || append_cache) + { + if (info->file == -1) + { + if (real_open_cached_file(info)) + DBUG_RETURN((info->error= -1)); + } + LOCK_APPEND_BUFFER; + + if ((length=(size_t) (info->write_pos - info->write_buffer))) + { + if (append_cache) + { + if (mysql_file_write(info->file, info->write_buffer, length, + info->myflags | MY_NABP)) + { + info->error= -1; + DBUG_RETURN(-1); + } + info->end_of_file+= info->write_pos - info->append_read_pos; + info->append_read_pos= info->write_buffer; + DBUG_ASSERT(info->end_of_file == mysql_file_tell(info->file, MYF(0))); + } + else + { + int res= info->write_function(info, info->write_buffer, length); + if (res) + DBUG_RETURN(res); + + set_if_bigger(info->end_of_file, info->pos_in_file); + } + info->write_end= (info->write_buffer + info->buffer_length - + ((info->pos_in_file + length) & (IO_SIZE - 1))); + info->write_pos= info->write_buffer; + ++info->disk_writes; + UNLOCK_APPEND_BUFFER; + DBUG_RETURN(info->error); + } + } + UNLOCK_APPEND_BUFFER; + DBUG_RETURN(0); +} + +/* + Free an IO_CACHE object + + SYNOPSOS + end_io_cache() + info IO_CACHE Handle to free + + NOTES + It's currently safe to call this if one has called init_io_cache() + on the 'info' object, even if init_io_cache() failed. + This function is also safe to call twice with the same handle. + Note that info->file is not reset as the caller may still use ut for my_close() + + RETURN + 0 ok + # Error +*/ + +int end_io_cache(IO_CACHE *info) +{ + int error=0; + DBUG_ENTER("end_io_cache"); + DBUG_PRINT("enter",("cache: %p", info)); + + /* + Every thread must call remove_io_thread(). The last one destroys + the share elements. + */ + DBUG_ASSERT(!info->share || !info->share->total_threads); + + if (info->alloced_buffer) + { + info->alloced_buffer=0; + if (info->file != -1) /* File doesn't exist */ + error= my_b_flush_io_cache(info,1); + my_free(info->buffer); + info->buffer=info->read_pos=(uchar*) 0; + } + if (info->type == SEQ_READ_APPEND) + { + /* Destroy allocated mutex */ + mysql_mutex_destroy(&info->append_buffer_lock); + } + info->share= 0; + info->type= TYPE_NOT_SET; /* Ensure that flush_io_cache() does nothing */ + info->write_end= 0; /* Ensure that my_b_write() fails */ + info->write_function= 0; /* my_b_write will crash if used */ + DBUG_RETURN(error); +} /* end_io_cache */ + + +/********************************************************************** + Testing of MF_IOCACHE +**********************************************************************/ + +#ifdef MAIN + +#include + +void die(const char* fmt, ...) +{ + va_list va_args; + va_start(va_args,fmt); + fprintf(stderr,"Error:"); + vfprintf(stderr, fmt,va_args); + fprintf(stderr,", errno=%d\n", errno); + va_end(va_args); + exit(1); +} + +int open_file(const char* fname, IO_CACHE* info, int cache_size) +{ + int fd; + if ((fd=my_open(fname,O_CREAT | O_RDWR,MYF(MY_WME))) < 0) + die("Could not open %s", fname); + if (init_io_cache(info, fd, cache_size, SEQ_READ_APPEND, 0,0,MYF(MY_WME))) + die("failed in init_io_cache()"); + return fd; +} + +void close_file(IO_CACHE* info) +{ + end_io_cache(info); + my_close(info->file, MYF(MY_WME)); +} + +int main(int argc, char** argv) +{ + IO_CACHE sra_cache; /* SEQ_READ_APPEND */ + MY_STAT status; + const char* fname="/tmp/iocache.test"; + int cache_size=16384; + char llstr_buf[22]; + int max_block,total_bytes=0; + int i,num_loops=100,error=0; + char *p; + char* block, *block_end; + MY_INIT(argv[0]); + max_block = cache_size*3; + if (!(block=(char*)my_malloc(max_block,MYF(MY_WME)))) + die("Not enough memory to allocate test block"); + block_end = block + max_block; + for (p = block,i=0; p < block_end;i++) + { + *p++ = (char)i; + } + if (my_stat(fname,&status, MYF(0)) && + my_delete(fname,MYF(MY_WME))) + { + die("Delete of %s failed, aborting", fname); + } + open_file(fname,&sra_cache, cache_size); + for (i = 0; i < num_loops; i++) + { + char buf[4]; + int block_size = abs(rand() % max_block); + int4store(buf, block_size); + if (my_b_append(&sra_cache,buf,4) || + my_b_append(&sra_cache, block, block_size)) + die("write failed"); + total_bytes += 4+block_size; + } + close_file(&sra_cache); + my_free(block); + if (!my_stat(fname,&status,MYF(MY_WME))) + die("%s failed to stat, but I had just closed it,\ + wonder how that happened"); + printf("Final size of %s is %s, wrote %d bytes\n",fname, + llstr(status.st_size,llstr_buf), + total_bytes); + my_delete(fname, MYF(MY_WME)); + /* check correctness of tests */ + if (total_bytes != status.st_size) + { + fprintf(stderr,"Not the same number of bytes actually in file as bytes \ +supposedly written\n"); + error=1; + } + exit(error); + return 0; +} +#endif diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c new file mode 100644 index 00000000..4622b686 --- /dev/null +++ b/mysys/mf_iocache2.c @@ -0,0 +1,494 @@ +/* Copyright (c) 2000, 2018, Oracle and/or its affiliates. + Copyright (c) 2009, 2018, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + More functions to be used with IO_CACHE files +*/ + +#include "mysys_priv.h" +#include +#include +#include + +/** + Copy the cache to the file. Copying can be constrained to @c count + number of bytes when the parameter is less than SIZE_T_MAX. The + cache will be optionally re-inited to a read cache and will read + from the beginning of the cache. If a failure to write fully + occurs, the cache is only copied partially. + + TODO + Make this function solid by handling partial reads from the cache + in a correct manner: it should be atomic. + + @param cache IO_CACHE to copy from + @param file File to copy to + @param count the copied size or the max of the type + when the whole cache is to be copied. + @return + 0 All OK + 1 An error occurred +*/ +int +my_b_copy_to_file(IO_CACHE *cache, FILE *file, + size_t count) +{ + size_t curr_write, bytes_in_cache; + DBUG_ENTER("my_b_copy_to_file"); + + bytes_in_cache= my_b_bytes_in_cache(cache); + do + { + curr_write= MY_MIN(bytes_in_cache, count); + if (my_fwrite(file, cache->read_pos, curr_write, + MYF(MY_WME | MY_NABP)) == (size_t) -1) + DBUG_RETURN(1); + + cache->read_pos += curr_write; + count -= curr_write; + } while (count && (bytes_in_cache= my_b_fill(cache))); + if(cache->error == -1) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + +int my_b_copy_all_to_file(IO_CACHE *cache, FILE *file) +{ + DBUG_ENTER("my_b_copy_all_to_file"); + /* Reinit the cache to read from the beginning of the cache */ + if (reinit_io_cache(cache, READ_CACHE, 0L, FALSE, FALSE)) + DBUG_RETURN(1); + DBUG_RETURN(my_b_copy_to_file(cache, file, SIZE_T_MAX)); +} + +my_off_t my_b_append_tell(IO_CACHE* info) +{ + /* + Sometimes we want to make sure that the variable is not put into + a register in debugging mode so we can see its value in the core + */ +#ifndef DBUG_OFF +# define dbug_volatile volatile +#else +# define dbug_volatile +#endif + + /* + Prevent optimizer from putting res in a register when debugging + we need this to be able to see the value of res when the assert fails + */ + dbug_volatile my_off_t res; + + /* + We need to lock the append buffer mutex to keep flush_io_cache() + from messing with the variables that we need in order to provide the + answer to the question. + */ + mysql_mutex_lock(&info->append_buffer_lock); + +#ifndef DBUG_OFF + /* + Make sure EOF is where we think it is. Note that we cannot just use + my_tell() because we have a reader thread that could have left the + file offset in a non-EOF location + */ + { + volatile my_off_t save_pos; + save_pos= mysql_file_tell(info->file, MYF(0)); + mysql_file_seek(info->file, 0, MY_SEEK_END, MYF(0)); + /* + Save the value of my_tell in res so we can see it when studying coredump + */ + DBUG_ASSERT(info->end_of_file - (info->append_read_pos-info->write_buffer) + == (res= mysql_file_tell(info->file, MYF(0)))); + mysql_file_seek(info->file, save_pos, MY_SEEK_SET, MYF(0)); + } +#endif + res = info->end_of_file + (info->write_pos-info->append_read_pos); + mysql_mutex_unlock(&info->append_buffer_lock); + return res; +} + +my_off_t my_b_safe_tell(IO_CACHE *info) +{ + if (unlikely(info->type == SEQ_READ_APPEND)) + return my_b_append_tell(info); + return my_b_tell(info); +} + +/* + Make next read happen at the given position + For write cache, make next write happen at the given position +*/ + +void my_b_seek(IO_CACHE *info,my_off_t pos) +{ + my_off_t offset; + DBUG_ENTER("my_b_seek"); + DBUG_PRINT("enter",("pos: %lu", (ulong) pos)); + + /* + TODO: + Verify that it is OK to do seek in the non-append + area in SEQ_READ_APPEND cache + a) see if this always works + b) see if there is a better way to make it work + */ + if (info->type == SEQ_READ_APPEND) + (void) flush_io_cache(info); + + offset=(pos - info->pos_in_file); + + if (info->type == READ_CACHE || info->type == SEQ_READ_APPEND) + { + /* TODO: explain why this works if pos < info->pos_in_file */ + if ((ulonglong) offset < (ulonglong) (info->read_end - info->buffer)) + { + /* The read is in the current buffer; Reuse it */ + info->read_pos = info->buffer + offset; + DBUG_VOID_RETURN; + } + else + { + /* Force a new read on next my_b_read */ + info->read_pos=info->read_end=info->buffer; + } + } + else if (info->type == WRITE_CACHE) + { + /* If write is in current buffer, reuse it */ + if ((ulonglong) offset < + (ulonglong) (info->write_end - info->write_buffer)) + { + info->write_pos = info->write_buffer + offset; + DBUG_VOID_RETURN; + } + (void) flush_io_cache(info); + /* Correct buffer end so that we write in increments of IO_SIZE */ + info->write_end=(info->write_buffer+info->buffer_length- + (pos & (IO_SIZE-1))); + } + info->pos_in_file=pos; + info->seek_not_done=1; + DBUG_VOID_RETURN; +} + +int my_b_pread(IO_CACHE *info, uchar *Buffer, size_t Count, my_off_t pos) +{ + if (info->myflags & MY_ENCRYPT) + { + my_b_seek(info, pos); + return my_b_read(info, Buffer, Count); + } + + /* backward compatibility behavior. XXX remove it? */ + if (mysql_file_pread(info->file, Buffer, Count, pos, info->myflags | MY_NABP)) + return info->error= -1; + return 0; +} + +/* + Read a string ended by '\n' into a buffer of 'max_length' size. + Returns number of characters read, 0 on error. + last byte is set to '\0' + If buffer is full then to[max_length-1] will be set to \0. +*/ + +size_t my_b_gets(IO_CACHE *info, char *to, size_t max_length) +{ + char *start = to; + size_t length; + max_length--; /* Save place for end \0 */ + + /* Calculate number of characters in buffer */ + if (!(length= my_b_bytes_in_cache(info)) && + !(length= my_b_fill(info))) + return 0; + + for (;;) + { + uchar *pos, *end; + if (length > max_length) + length=max_length; + for (pos=info->read_pos,end=pos+length ; pos < end ;) + { + if ((*to++ = *pos++) == '\n') + { + info->read_pos=pos; + *to='\0'; + return (size_t) (to-start); + } + } + if (!(max_length-=length)) + { + /* Found enough charcters; Return found string */ + info->read_pos=pos; + *to='\0'; + return (size_t) (to-start); + } + if (!(length=my_b_fill(info))) + return 0; + } +} + + +my_off_t my_b_filelength(IO_CACHE *info) +{ + if (info->type == WRITE_CACHE) + return my_b_tell(info); + + info->seek_not_done= 1; + return mysql_file_seek(info->file, 0, MY_SEEK_END, MYF(0)); +} + + +my_bool +my_b_write_backtick_quote(IO_CACHE *info, const char *str, size_t len) +{ + const uchar *start; + const uchar *p= (const uchar *)str; + const uchar *end= p + len; + size_t count; + + if (my_b_write(info, (uchar *)"`", 1)) + return 1; + for (;;) + { + start= p; + while (p < end && *p != '`') + ++p; + count= p - start; + if (count && my_b_write(info, start, count)) + return 1; + if (p >= end) + break; + if (my_b_write(info, (uchar *)"``", 2)) + return 1; + ++p; + } + return (my_b_write(info, (uchar *)"`", 1)); +} + +/* + Simple printf version. Supports '%s', '%d', '%u', "%ld" and "%lu" + Used for logging in MariaDB + + @return 0 ok + 1 error +*/ + +my_bool my_b_printf(IO_CACHE *info, const char* fmt, ...) +{ + size_t result; + va_list args; + va_start(args,fmt); + result=my_b_vprintf(info, fmt, args); + va_end(args); + return result == (size_t) -1; +} + + +size_t my_b_vprintf(IO_CACHE *info, const char* fmt, va_list args) +{ + size_t out_length= 0; + uint minimum_width; /* as yet unimplemented */ + uint minimum_width_sign; + uint precision; /* as yet unimplemented for anything but %b */ + my_bool is_zero_padded; + my_bool backtick_quoting; + + /* + Store the location of the beginning of a format directive, for the + case where we learn we shouldn't have been parsing a format string + at all, and we don't want to lose the flag/precision/width/size + information. + */ + const char* backtrack; + + for (; *fmt != '\0'; fmt++) + { + /* Copy everything until '%' or end of string */ + const char *start=fmt; + size_t length; + + for (; (*fmt != '\0') && (*fmt != '%'); fmt++) ; + + length= (size_t) (fmt - start); + out_length+=length; + if (my_b_write(info, (const uchar*) start, length)) + goto err; + + if (*fmt == '\0') /* End of format */ + return out_length; + + /* + By this point, *fmt must be a percent; Keep track of this location and + skip over the percent character. + */ + DBUG_ASSERT(*fmt == '%'); + backtrack= fmt; + fmt++; + + is_zero_padded= FALSE; + backtick_quoting= FALSE; + minimum_width_sign= 1; + minimum_width= 0; + precision= 0; + /* Skip if max size is used (to be compatible with printf) */ + +process_flags: + switch (*fmt) + { + case '-': + minimum_width_sign= -1; fmt++; goto process_flags; + case '0': + is_zero_padded= TRUE; fmt++; goto process_flags; + case '`': + backtick_quoting= TRUE; fmt++; goto process_flags; + case '#': + /** @todo Implement "#" conversion flag. */ fmt++; goto process_flags; + case ' ': + /** @todo Implement " " conversion flag. */ fmt++; goto process_flags; + case '+': + /** @todo Implement "+" conversion flag. */ fmt++; goto process_flags; + } + + if (*fmt == '*') + { + precision= (int) va_arg(args, int); + fmt++; + } + else + { + while (my_isdigit(&my_charset_latin1, *fmt)) { + minimum_width=(minimum_width * 10) + (*fmt - '0'); + fmt++; + } + } + minimum_width*= minimum_width_sign; + + if (*fmt == '.') + { + fmt++; + if (*fmt == '*') { + precision= (int) va_arg(args, int); + fmt++; + } + else + { + while (my_isdigit(&my_charset_latin1, *fmt)) { + precision=(precision * 10) + (*fmt - '0'); + fmt++; + } + } + } + + if (*fmt == 's') /* String parameter */ + { + reg2 char *par = va_arg(args, char *); + size_t length2 = strlen(par); + /* TODO: implement precision */ + if (backtick_quoting) + { + size_t total= my_b_write_backtick_quote(info, par, length2); + if (total == (size_t)-1) + goto err; + out_length+= total; + } + else + { + out_length+= length2; + if (my_b_write(info, (uchar*) par, length2)) + goto err; + } + } + else if (*fmt == 'c') /* char type parameter */ + { + char par[2]; + par[0] = va_arg(args, int); + if (my_b_write(info, (uchar*) par, 1)) + goto err; + } + else if (*fmt == 'b') /* Sized buffer parameter, only precision makes sense */ + { + char *par = va_arg(args, char *); + out_length+= precision; + if (my_b_write(info, (uchar*) par, precision)) + goto err; + } + else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ + { + register int iarg; + size_t length2; + char buff[32]; + + iarg = va_arg(args, int); + if (*fmt == 'd') + length2= (size_t) (int10_to_str((long) iarg,buff, -10) - buff); + else + length2= (uint) (int10_to_str((long) (uint) iarg,buff,10)- buff); + + /* minimum width padding */ + if (minimum_width > length2) + { + uchar *buffz; + + buffz= (uchar*) my_alloca(minimum_width - length2); + if (is_zero_padded) + memset(buffz, '0', minimum_width - length2); + else + memset(buffz, ' ', minimum_width - length2); + if (my_b_write(info, buffz, minimum_width - length2)) + { + my_afree(buffz); + goto err; + } + my_afree(buffz); + } + + out_length+= length2; + if (my_b_write(info, (uchar*) buff, length2)) + goto err; + } + else if ((*fmt == 'l' && (fmt[1] == 'd' || fmt[1] == 'u'))) + /* long parameter */ + { + register long iarg; + size_t length2; + char buff[32]; + + iarg = va_arg(args, long); + if (*++fmt == 'd') + length2= (size_t) (int10_to_str(iarg,buff, -10) - buff); + else + length2= (size_t) (int10_to_str(iarg,buff,10)- buff); + out_length+= length2; + if (my_b_write(info, (uchar*) buff, length2)) + goto err; + } + else + { + /* %% or unknown code */ + if (my_b_write(info, (uchar*) backtrack, (size_t) (fmt-backtrack))) + goto err; + out_length+= fmt-backtrack; + } + } + return out_length; + +err: + return (size_t) -1; +} diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c new file mode 100644 index 00000000..4b0fef18 --- /dev/null +++ b/mysys/mf_keycache.c @@ -0,0 +1,6577 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2017, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + + +/** + @file + The file contains the following modules: + + Simple Key Cache Module + + Partitioned Key Cache Module + + Key Cache Interface Module + +*/ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include "my_static.h" +#include +#include +#include +#include +#include "probes_mysql.h" + +/****************************************************************************** + Simple Key Cache Module + + The module contains implementations of all key cache interface functions + employed by partitioned key caches. + +******************************************************************************/ + +/* + These functions handle keyblock cacheing for ISAM and MyISAM tables. + + One cache can handle many files. + It must contain buffers of the same blocksize. + + init_key_cache() should be used to init cache handler. + + The free list (free_block_list) is a stack like structure. + When a block is freed by free_block(), it is pushed onto the stack. + When a new block is required it is first tried to pop one from the stack. + If the stack is empty, it is tried to get a never-used block from the pool. + If this is empty too, then a block is taken from the LRU ring, flushing it + to disk, if necessary. This is handled in find_key_block(). + With the new free list, the blocks can have three temperatures: + hot, warm and cold (which is free). This is remembered in the block header + by the enum BLOCK_TEMPERATURE temperature variable. Remembering the + temperature is necessary to correctly count the number of warm blocks, + which is required to decide when blocks are allowed to become hot. Whenever + a block is inserted to another (sub-)chain, we take the old and new + temperature into account to decide if we got one more or less warm block. + blocks_unused is the sum of never used blocks in the pool and of currently + free blocks. blocks_used is the number of blocks fetched from the pool and + as such gives the maximum number of in-use blocks at any time. + + Key Cache Locking + ================= + + All key cache locking is done with a single mutex per key cache: + keycache->cache_lock. This mutex is locked almost all the time + when executing code in this file (mf_keycache.c). + However it is released for I/O and some copy operations. + + The cache_lock is also released when waiting for some event. Waiting + and signalling is done via condition variables. In most cases the + thread waits on its thread->suspend condition variable. Every thread + has a my_thread_var structure, which contains this variable and a + '*next' and '**prev' pointer. These pointers are used to insert the + thread into a wait queue. + + A thread can wait for one block and thus be in one wait queue at a + time only. + + Before starting to wait on its condition variable with + mysql_cond_wait(), the thread enters itself to a specific wait queue + with link_into_queue() (double linked with '*next' + '**prev') or + wait_on_queue() (single linked with '*next'). + + Another thread, when releasing a resource, looks up the waiting thread + in the related wait queue. It sends a signal with + mysql_cond_signal() to the waiting thread. + + NOTE: Depending on the particular wait situation, either the sending + thread removes the waiting thread from the wait queue with + unlink_from_queue() or release_whole_queue() respectively, or the waiting + thread removes itself. + + There is one exception from this locking scheme when one thread wants + to reuse a block for some other address. This works by first marking + the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all + threads that are reading the block to finish. Each block has a + reference to a condition variable (condvar). It holds a reference to + the thread->suspend condition variable for the waiting thread (if such + a thread exists). When that thread is signaled, the reference is + cleared. The number of readers of a block is registered in + block->hash_link->requests. See wait_for_readers() / remove_reader() + for details. This is similar to the above, but it clearly means that + only one thread can wait for a particular block. There is no queue in + this case. Strangely enough block->convar is used for waiting for the + assigned hash_link only. More precisely it is used to wait for all + requests to be unregistered from the assigned hash_link. + + The resize_queue serves two purposes: + 1. Threads that want to do a resize wait there if in_resize is set. + This is not used in the server. The server refuses a second resize + request if one is already active. keycache->in_init is used for the + synchronization. See set_var.cc. + 2. Threads that want to access blocks during resize wait here during + the re-initialization phase. + When the resize is done, all threads on the queue are signalled. + Hypothetical resizers can compete for resizing, and read/write + requests will restart to request blocks from the freshly resized + cache. If the cache has been resized too small, it is disabled and + 'can_be_used' is false. In this case read/write requests bypass the + cache. Since they increment and decrement 'cnt_for_resize_op', the + next resizer can wait on the queue 'waiting_for_resize_cnt' until all + I/O finished. +*/ + +/* declare structures that is used by st_key_cache */ + +struct st_block_link; +typedef struct st_block_link BLOCK_LINK; +struct st_keycache_page; +typedef struct st_keycache_page KEYCACHE_PAGE; +struct st_hash_link; +typedef struct st_hash_link HASH_LINK; + +/* info about requests in a waiting queue */ +typedef struct st_keycache_wqueue +{ + struct st_my_thread_var *last_thread; /* circular list of waiting threads */ +} KEYCACHE_WQUEUE; + +/* Default size of hash for changed files */ +#define MIN_CHANGED_BLOCKS_HASH_SIZE 128 + +/* Control block for a simple (non-partitioned) key cache */ + +typedef struct st_simple_key_cache_cb +{ + my_bool key_cache_inited; /* <=> control block is allocated */ + my_bool in_resize; /* true during resize operation */ + my_bool resize_in_flush; /* true during flush of resize operation */ + my_bool can_be_used; /* usage of cache for read/write is allowed */ + size_t key_cache_mem_size; /* specified size of the cache memory */ + size_t allocated_mem_size; /* size of the memory actually allocated */ + uint key_cache_block_size; /* size of the page buffer of a cache block */ + size_t min_warm_blocks; /* min number of warm blocks; */ + size_t age_threshold; /* age threshold for hot blocks */ + ulonglong keycache_time; /* total number of block link operations */ + uint hash_entries; /* max number of entries in the hash table */ + uint changed_blocks_hash_size; /* Number of hash buckets for file blocks */ + int hash_links; /* max number of hash links */ + int hash_links_used; /* number of hash links currently used */ + int disk_blocks; /* max number of blocks in the cache */ + size_t blocks_used; /* maximum number of concurrently used blocks */ + size_t blocks_unused; /* number of currently unused blocks */ + size_t blocks_changed; /* number of currently dirty blocks */ + size_t warm_blocks; /* number of blocks in warm sub-chain */ + ulong cnt_for_resize_op; /* counter to block resize operation */ + long blocks_available; /* number of blocks available in the LRU chain */ + HASH_LINK **hash_root; /* arr. of entries into hash table buckets */ + HASH_LINK *hash_link_root; /* memory for hash table links */ + HASH_LINK *free_hash_list; /* list of free hash links */ + BLOCK_LINK *free_block_list; /* list of free blocks */ + BLOCK_LINK *block_root; /* memory for block links */ + uchar *block_mem; /* memory for block buffers */ + BLOCK_LINK *used_last; /* ptr to the last block of the LRU chain */ + BLOCK_LINK *used_ins; /* ptr to the insertion block in LRU chain */ + mysql_mutex_t cache_lock; /* to lock access to the cache structure */ + KEYCACHE_WQUEUE resize_queue; /* threads waiting during resize operation */ + /* + Waiting for a zero resize count. Using a queue for symmetry though + only one thread can wait here. + */ + KEYCACHE_WQUEUE waiting_for_resize_cnt; + KEYCACHE_WQUEUE waiting_for_hash_link; /* waiting for a free hash link */ + KEYCACHE_WQUEUE waiting_for_block; /* requests waiting for a free block */ + BLOCK_LINK **changed_blocks; /* hash for dirty file bl.*/ + BLOCK_LINK **file_blocks; /* hash for other file bl.*/ + + /* Statistics variables. These are reset in reset_key_cache_counters(). */ + ulong global_blocks_changed; /* number of currently dirty blocks */ + ulonglong global_cache_w_requests;/* number of write requests (write hits) */ + ulonglong global_cache_write; /* number of writes from cache to files */ + ulonglong global_cache_r_requests;/* number of read requests (read hits) */ + ulonglong global_cache_read; /* number of reads from files to cache */ + + int blocks; /* max number of blocks in the cache */ + uint hash_factor; /* factor used to calculate hash function */ + my_bool in_init; /* Set to 1 in MySQL during init/resize */ +} SIMPLE_KEY_CACHE_CB; + +/* + Some compilation flags have been added specifically for this module + to control the following: + - not to let a thread to yield the control when reading directly + from key cache, which might improve performance in many cases; + to enable this add: + #define SERIALIZED_READ_FROM_CACHE + - to set an upper bound for number of threads simultaneously + using the key cache; this setting helps to determine an optimal + size for hash table and improve performance when the number of + blocks in the key cache much less than the number of threads + accessing it; + to set this number equal to add + #define MAX_THREADS + - to substitute calls of mysql_cond_wait for calls of + mysql_cond_timedwait (wait with timeout set up); + this setting should be used only when you want to trap a deadlock + situation, which theoretically should not happen; + to set timeout equal to seconds add + #define KEYCACHE_TIMEOUT + - to enable the module traps and to send debug information from + key cache module to a special debug log add: + #define KEYCACHE_DEBUG + the name of this debug log file can be set through: + #define KEYCACHE_DEBUG_LOG + if the name is not defined, it's set by default; + if the KEYCACHE_DEBUG flag is not set up and we are in a debug + mode, i.e. when ! defined(DBUG_OFF), the debug information from the + module is sent to the regular debug log. + + Example of the settings: + #define SERIALIZED_READ_FROM_CACHE + #define MAX_THREADS 100 + #define KEYCACHE_TIMEOUT 1 + #define KEYCACHE_DEBUG + #define KEYCACHE_DEBUG_LOG "my_key_cache_debug.log" +*/ + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) + +/* types of condition variables */ +#define COND_FOR_REQUESTED 0 +#define COND_FOR_SAVED 1 +#define COND_FOR_READERS 2 + +typedef mysql_cond_t KEYCACHE_CONDVAR; + +/* descriptor of the page in the key cache block buffer */ +struct st_keycache_page +{ + int file; /* file to which the page belongs to */ + my_off_t filepos; /* position of the page in the file */ +}; + +/* element in the chain of a hash table bucket */ +struct st_hash_link +{ + struct st_hash_link *next, **prev; /* to connect links in the same bucket */ + struct st_block_link *block; /* reference to the block for the page: */ + File file; /* from such a file */ + my_off_t diskpos; /* with such an offset */ + uint requests; /* number of requests for the page */ +}; + +/* simple states of a block */ +#define BLOCK_ERROR 1U/* an error occurred when performing file i/o */ +#define BLOCK_READ 2U/* file block is in the block buffer */ +#define BLOCK_IN_SWITCH 4U/* block is preparing to read new page */ +#define BLOCK_REASSIGNED 8U/* blk does not accept requests for old page */ +#define BLOCK_IN_FLUSH 16U/* block is selected for flush */ +#define BLOCK_CHANGED 32U/* block buffer contains a dirty page */ +#define BLOCK_IN_USE 64U/* block is not free */ +#define BLOCK_IN_EVICTION 128U/* block is selected for eviction */ +#define BLOCK_IN_FLUSHWRITE 256U/* block is in write to file */ +#define BLOCK_FOR_UPDATE 512U/* block is selected for buffer modification */ + +/* page status, returned by find_key_block */ +#define PAGE_READ 0 +#define PAGE_TO_BE_READ 1 +#define PAGE_WAIT_TO_BE_READ 2 + +/* block temperature determines in which (sub-)chain the block currently is */ +enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT }; + +/* key cache block */ +struct st_block_link +{ + struct st_block_link + *next_used, **prev_used; /* to connect links in the LRU chain (ring) */ + struct st_block_link + *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ + struct st_hash_link *hash_link; /* backward ptr to referring hash_link */ + KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */ + uint requests; /* number of requests for the block */ + uchar *buffer; /* buffer for the block page */ + uint offset; /* beginning of modified data in the buffer */ + uint length; /* end of data in the buffer */ + uint status; /* state of the block */ + enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ + uint hits_left; /* number of hits left until promotion */ + ulonglong last_hit_time; /* timestamp of the last hit */ + KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ +}; + +KEY_CACHE dflt_key_cache_var; +KEY_CACHE *dflt_key_cache= &dflt_key_cache_var; + +#define FLUSH_CACHE 2000 /* sort this many blocks at once */ + +static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache); +static void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup); +static void wait_on_queue(KEYCACHE_WQUEUE *wqueue, + mysql_mutex_t *mutex); +static void release_whole_queue(KEYCACHE_WQUEUE *wqueue); +static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block); +#ifndef DBUG_OFF +static void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache, + const char *where, my_bool lock); +#endif +#define KEYCACHE_BASE_EXPR(f, pos) \ + ((ulong) ((pos) / keycache->key_cache_block_size) + (ulong) (f)) +#define KEYCACHE_HASH(f, pos) \ + ((KEYCACHE_BASE_EXPR(f, pos) / keycache->hash_factor) & \ + (keycache->hash_entries-1)) +#define FILE_HASH(f, cache) ((uint) (f) & (cache->changed_blocks_hash_size-1)) + +#define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log" + +#if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG) +#define KEYCACHE_DEBUG_LOG DEFAULT_KEYCACHE_DEBUG_LOG +#endif + +#if defined(KEYCACHE_DEBUG_LOG) +static FILE *keycache_debug_log=NULL; +static void keycache_debug_print(const char *fmt,...); +#define KEYCACHE_DEBUG_OPEN \ + if (!keycache_debug_log) \ + { \ + keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w"); \ + (void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ); \ + } + +#define KEYCACHE_DEBUG_CLOSE \ + if (keycache_debug_log) \ + { \ + fclose(keycache_debug_log); \ + keycache_debug_log= 0; \ + } +#else +#define KEYCACHE_DEBUG_OPEN +#define KEYCACHE_DEBUG_CLOSE +#endif /* defined(KEYCACHE_DEBUG_LOG) */ + +#if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) +#define KEYCACHE_DBUG_PRINT(l, m) \ + { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \ + keycache_debug_print m; } + +#define KEYCACHE_DBUG_ASSERT(a) \ + { if (! (a) && keycache_debug_log) fclose(keycache_debug_log); \ + assert(a); } +#else +#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) +#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) +#endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */ + +#if defined(KEYCACHE_DEBUG) || defined(DBUG_TRACE) +static long keycache_thread_id; +#define KEYCACHE_THREAD_TRACE(l) \ + KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id)) + +#define KEYCACHE_THREAD_TRACE_BEGIN(l) \ + { struct st_my_thread_var *thread_var= my_thread_var; \ + keycache_thread_id= thread_var->id; \ + KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) } + +#define KEYCACHE_THREAD_TRACE_END(l) \ + KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id)) +#else +#define KEYCACHE_THREAD_TRACE_BEGIN(l) +#define KEYCACHE_THREAD_TRACE_END(l) +#define KEYCACHE_THREAD_TRACE(l) +#endif /* defined(KEYCACHE_DEBUG) || defined(DBUG_TRACE) */ + +#define BLOCK_NUMBER(b) \ + ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK))) +#define HASH_LINK_NUMBER(h) \ + ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK))) + +#if (defined(KEYCACHE_TIMEOUT) && !defined(_WIN32)) || defined(KEYCACHE_DEBUG) +static int keycache_pthread_cond_wait(mysql_cond_t *cond, + mysql_mutex_t *mutex); +#else +#define keycache_pthread_cond_wait(C, M) mysql_cond_wait(C, M) +#endif + +#if defined(KEYCACHE_DEBUG) +static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex); +static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex); +static int keycache_pthread_cond_signal(mysql_cond_t *cond); +#else +#define keycache_pthread_mutex_lock(M) mysql_mutex_lock(M) +#define keycache_pthread_mutex_unlock(M) mysql_mutex_unlock(M) +#define keycache_pthread_cond_signal(C) mysql_cond_signal(C) +#endif /* defined(KEYCACHE_DEBUG) */ + +#if !defined(DBUG_OFF) +#if defined(inline) +#undef inline +#endif +#define inline /* disabled inline for easier debugging */ +static int fail_hlink(HASH_LINK *hlink); +static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache); +#endif +#ifdef DBUG_ASSERT_EXISTS +static int fail_block(BLOCK_LINK *block); +#endif + +static inline uint next_power(uint value) +{ + return (uint) my_round_up_to_next_power((uint32) value) << 1; +} + + +/* + Initialize a simple key cache + + SYNOPSIS + init_simple_key_cache() + keycache pointer to the control block of a simple key cache + key_cache_block_size size of blocks to keep cached data + use_mem memory to use for the key cache buferrs/structures + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + + DESCRIPTION + This function is the implementation of the init_key_cache interface + function that is employed by simple (non-partitioned) key caches. + The function builds a simple key cache and initializes the control block + structure of the type SIMPLE_KEY_CACHE_CB that is used for this key cache. + The parameter keycache is supposed to point to this structure. + The parameter key_cache_block_size specifies the size of the blocks in + the key cache to be built. The parameters division_limit and age_threshold + determine the initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for key cache blocks + and auxiliary structures. + + RETURN VALUE + number of blocks in the key cache, if successful, + <= 0 - otherwise. + + NOTES. + if keycache->key_cache_inited != 0 we assume that the key cache + is already initialized. This is for now used by myisamchk, but shouldn't + be something that a program should rely on! + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. +*/ + +static +int init_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, + uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint changed_blocks_hash_size) +{ + size_t blocks, hash_links; + size_t length; + int error; + DBUG_ENTER("init_simple_key_cache"); + DBUG_ASSERT(key_cache_block_size >= 512); + + KEYCACHE_DEBUG_OPEN; + if (keycache->key_cache_inited && keycache->disk_blocks > 0) + { + DBUG_PRINT("warning",("key cache already in use")); + DBUG_RETURN(0); + } + + keycache->blocks_used= keycache->blocks_unused= 0; + keycache->global_blocks_changed= 0; + keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0; + keycache->global_cache_read= keycache->global_cache_write= 0; + keycache->disk_blocks= -1; + if (! keycache->key_cache_inited) + { + keycache->key_cache_inited= 1; + keycache->hash_factor= 1; + /* + Initialize these variables once only. + Their value must survive re-initialization during resizing. + */ + keycache->in_resize= 0; + keycache->resize_in_flush= 0; + keycache->cnt_for_resize_op= 0; + keycache->waiting_for_resize_cnt.last_thread= NULL; + keycache->in_init= 0; + mysql_mutex_init(key_KEY_CACHE_cache_lock, + &keycache->cache_lock, MY_MUTEX_INIT_FAST); + keycache->resize_queue.last_thread= NULL; + } + + keycache->key_cache_mem_size= use_mem; + keycache->key_cache_block_size= key_cache_block_size; + DBUG_PRINT("info", ("key_cache_block_size: %u", + key_cache_block_size)); + + blocks= use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) + + sizeof(HASH_LINK*) * 5/4 + key_cache_block_size); + + /* Changed blocks hash needs to be a power of 2 */ + changed_blocks_hash_size= my_round_up_to_next_power(MY_MAX(changed_blocks_hash_size, + MIN_CHANGED_BLOCKS_HASH_SIZE)); + + /* It doesn't make sense to have too few blocks (less than 8) */ + if (blocks >= 8) + { + for ( ; ; ) + { + /* Set my_hash_entries to the next bigger 2 power */ + if ((keycache->hash_entries= next_power((uint)blocks)) < blocks * 5/4) + keycache->hash_entries<<= 1; + hash_links= 2 * blocks; +#if defined(MAX_THREADS) + if (hash_links < MAX_THREADS + blocks - 1) + hash_links= MAX_THREADS + blocks - 1; +#endif + while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) + + ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) + + ALIGN_SIZE(sizeof(HASH_LINK*) * + keycache->hash_entries) + + sizeof(BLOCK_LINK*)* ((size_t)changed_blocks_hash_size*2))) + + (blocks * keycache->key_cache_block_size) > use_mem && blocks > 8) + blocks--; + keycache->allocated_mem_size= blocks * keycache->key_cache_block_size; + if ((keycache->block_mem= my_large_malloc(&keycache->allocated_mem_size, + MYF(0)))) + { + /* + Allocate memory for blocks, hash_links and hash entries; + For each block 2 hash links are allocated + */ + if (my_multi_malloc_large(key_memory_KEY_CACHE, MYF(MY_ZEROFILL), + &keycache->block_root, + (ulonglong) (blocks * sizeof(BLOCK_LINK)), + &keycache->hash_root, + (ulonglong) (sizeof(HASH_LINK*) * + keycache->hash_entries), + &keycache->hash_link_root, + (ulonglong) (hash_links * sizeof(HASH_LINK)), + &keycache->changed_blocks, + (ulonglong) (sizeof(BLOCK_LINK*) * + changed_blocks_hash_size), + &keycache->file_blocks, + (ulonglong) (sizeof(BLOCK_LINK*) * + changed_blocks_hash_size), + NullS)) + break; + my_large_free(keycache->block_mem, keycache->allocated_mem_size); + keycache->block_mem= 0; + } + if (blocks < 8) + { + my_errno= ENOMEM; + my_error(EE_OUTOFMEMORY, MYF(ME_FATAL), + blocks * keycache->key_cache_block_size); + goto err; + } + blocks= blocks / 4*3; + } + keycache->blocks_unused= blocks; + keycache->disk_blocks= (int) blocks; + keycache->hash_links= (int)hash_links; + keycache->hash_links_used= 0; + keycache->free_hash_list= NULL; + keycache->blocks_used= keycache->blocks_changed= 0; + + keycache->global_blocks_changed= 0; + keycache->blocks_available=0; /* For debugging */ + + /* The LRU chain is empty after initialization */ + keycache->used_last= NULL; + keycache->used_ins= NULL; + keycache->free_block_list= NULL; + keycache->keycache_time= 0; + keycache->warm_blocks= 0; + keycache->min_warm_blocks= (division_limit ? + blocks * division_limit / 100 + 1 : + blocks); + keycache->age_threshold= (age_threshold ? + blocks * age_threshold / 100 : + blocks); + keycache->changed_blocks_hash_size= changed_blocks_hash_size; + keycache->can_be_used= 1; + + keycache->waiting_for_hash_link.last_thread= NULL; + keycache->waiting_for_block.last_thread= NULL; + DBUG_PRINT("exit", + ("disk_blocks: %d block_root: %p hash_entries: %d\ + hash_root: %p hash_links: %d hash_link_root: %p", + keycache->disk_blocks, keycache->block_root, + keycache->hash_entries, keycache->hash_root, + keycache->hash_links, keycache->hash_link_root)); + } + else + { + /* key_buffer_size is specified too small. Disable the cache. */ + keycache->can_be_used= 0; + } + + keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0; + DBUG_RETURN((int) keycache->disk_blocks); + +err: + error= my_errno; + keycache->disk_blocks= 0; + keycache->blocks= 0; + if (keycache->block_mem) + { + my_large_free((uchar*) keycache->block_mem, keycache->allocated_mem_size); + keycache->block_mem= NULL; + } + if (keycache->block_root) + { + my_free(keycache->block_root); + keycache->block_root= NULL; + } + my_errno= error; + keycache->can_be_used= 0; + DBUG_RETURN(0); +} + + +/* + Prepare for resizing a simple key cache + + SYNOPSIS + prepare_resize_simple_key_cache() + keycache pointer to the control block of a simple key cache + release_lock <=> release the key cache lock before return + + DESCRIPTION + This function flushes all dirty pages from a simple key cache and after + this it destroys the key cache calling end_simple_key_cache. The function + takes the parameter keycache as a pointer to the control block + structure of the type SIMPLE_KEY_CACHE_CB for this key cache. + The parameter release_lock says whether the key cache lock must be + released before return from the function. + + RETURN VALUE + 0 - on success, + 1 - otherwise. + + NOTES + This function is the called by resize_simple_key_cache and + resize_partitioned_key_cache that resize simple and partitioned key caches + respectively. +*/ + +static +int prepare_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, + my_bool release_lock) +{ + int res= 0; + DBUG_ENTER("prepare_resize_simple_key_cache"); + + keycache_pthread_mutex_lock(&keycache->cache_lock); + + /* + We may need to wait for another thread which is doing a resize + already. This cannot happen in the MySQL server though. It allows + one resizer only. In set_var.cc keycache->in_init is used to block + multiple attempts. + */ + while (keycache->in_resize) + { + /* purecov: begin inspected */ + wait_on_queue(&keycache->resize_queue, &keycache->cache_lock); + /* purecov: end */ + } + + /* + Mark the operation in progress. This blocks other threads from doing + a resize in parallel. It prohibits new blocks to enter the cache. + Read/write requests can bypass the cache during the flush phase. + */ + keycache->in_resize= 1; + + /* Need to flush only if keycache is enabled. */ + if (keycache->can_be_used && keycache->disk_blocks != -1) + { + /* Start the flush phase. */ + keycache->resize_in_flush= 1; + + if (flush_all_key_blocks(keycache)) + { + /* TODO: if this happens, we should write a warning in the log file ! */ + keycache->resize_in_flush= 0; + keycache->can_be_used= 0; + res= 1; + goto finish; + } + DBUG_SLOW_ASSERT(cache_empty(keycache)); + + /* End the flush phase. */ + keycache->resize_in_flush= 0; + } + + /* + Some direct read/write operations (bypassing the cache) may still be + unfinished. Wait until they are done. If the key cache can be used, + direct I/O is done in increments of key_cache_block_size. That is, + every block is checked if it is in the cache. We need to wait for + pending I/O before re-initializing the cache, because we may change + the block size. Otherwise they could check for blocks at file + positions where the new block division has none. We do also want to + wait for I/O done when (if) the cache was disabled. It must not + run in parallel with normal cache operation. + */ + while (keycache->cnt_for_resize_op) + wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock); + + end_simple_key_cache(keycache, 0); + +finish: + if (release_lock) + keycache_pthread_mutex_unlock(&keycache->cache_lock); + DBUG_RETURN(res); +} + + +/* + Finalize resizing a simple key cache + + SYNOPSIS + finish_resize_simple_key_cache() + keycache pointer to the control block of a simple key cache + + DESCRIPTION + This function performs finalizing actions for the operation of + resizing a simple key cache. The function takes the parameter + keycache as a pointer to the control block structure of the type + SIMPLE_KEY_CACHE_CB for this key cache. The function sets the flag + in_resize in this structure to FALSE. + + RETURN VALUE + none + + NOTES + This function is the called by resize_simple_key_cache and + resize_partitioned_key_cache that resize simple and partitioned key caches + respectively. +*/ + +static +void finish_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache) +{ + DBUG_ENTER("finish_resize_simple_key_cache"); + + mysql_mutex_assert_owner(&keycache->cache_lock); + + /* + Mark the resize finished. This allows other threads to start a + resize or to request new cache blocks. + */ + keycache->in_resize= 0; + + + /* Signal waiting threads. */ + release_whole_queue(&keycache->resize_queue); + + + keycache_pthread_mutex_unlock(&keycache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Resize a simple key cache + + SYNOPSIS + resize_simple_key_cache() + keycache pointer to the control block of a simple key cache + key_cache_block_size size of blocks to keep cached data + use_mem memory to use for the key cache buffers/structures + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the resize_key_cache interface + function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key + cache to be resized. + The parameter key_cache_block_size specifies the new size of the blocks in + the key cache. The parameters division_limit and age_threshold + determine the new initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for key cache blocks + and auxiliary structures in the new key cache. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first calls the function prepare_resize_simple_key_cache + to flush all dirty blocks from key cache, to free memory used + for key cache blocks and auxiliary structures. After this the + function builds a new key cache with new parameters. + + This implementation doesn't block the calls and executions of other + functions from the key cache interface. However it assumes that the + calls of resize_simple_key_cache itself are serialized. + + The function starts the operation only when all other threads + performing operations with the key cache let her to proceed + (when cnt_for_resize=0). +*/ + +static +int resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, + uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint changed_blocks_hash_size) +{ + int blocks= 0; + DBUG_ENTER("resize_simple_key_cache"); + + DBUG_ASSERT(keycache->key_cache_inited); + + /* + Note that the cache_lock mutex and the resize_queue are left untouched. + We do not lose the cache_lock and will release it only at the end of + this function. + */ + if (prepare_resize_simple_key_cache(keycache, 0)) + goto finish; + + /* The following will work even if use_mem is 0 */ + blocks= init_simple_key_cache(keycache, key_cache_block_size, use_mem, + division_limit, age_threshold, + changed_blocks_hash_size); + +finish: + finish_resize_simple_key_cache(keycache); + + DBUG_RETURN(blocks); +} + + +/* + Increment counter blocking resize key cache operation +*/ +static inline void inc_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache) +{ + keycache->cnt_for_resize_op++; +} + + +/* + Decrement counter blocking resize key cache operation; + Signal the operation to proceed when counter becomes equal zero +*/ +static inline void dec_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache) +{ + if (!--keycache->cnt_for_resize_op) + release_whole_queue(&keycache->waiting_for_resize_cnt); +} + + +/* + Change key cache parameters of a simple key cache + + SYNOPSIS + change_simple_key_cache_param() + keycache pointer to the control block of a simple key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the change_key_cache_param interface + function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key + cache where new values of the division limit and the age threshold used + for midpoint insertion strategy are to be set. The parameters + division_limit and age_threshold provide these new values. + + RETURN VALUE + none + + NOTES. + Presently the function resets the key cache parameters concerning + midpoint insertion strategy - division_limit and age_threshold. + This function changes some parameters of a given key cache without + reformatting it. The function does not touch the contents the key + cache blocks. +*/ + +static +void change_simple_key_cache_param(SIMPLE_KEY_CACHE_CB *keycache, uint division_limit, + uint age_threshold) +{ + DBUG_ENTER("change_simple_key_cache_param"); + keycache_pthread_mutex_lock(&keycache->cache_lock); + if (division_limit) + keycache->min_warm_blocks= (keycache->disk_blocks * + division_limit / 100 + 1); + if (age_threshold) + keycache->age_threshold= (keycache->disk_blocks * + age_threshold / 100); + keycache_pthread_mutex_unlock(&keycache->cache_lock); + DBUG_VOID_RETURN; +} + + +/* + Destroy a simple key cache + + SYNOPSIS + end_simple_key_cache() + keycache pointer to the control block of a simple key cache + cleanup <=> complete free (free also mutex for key cache) + + DESCRIPTION + This function is the implementation of the end_key_cache interface + function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key + cache to be destroyed. + The function frees the memory allocated for the key cache blocks and + auxiliary structures. If the value of the parameter cleanup is TRUE + then even the key cache mutex is freed. + + RETURN VALUE + none +*/ + +static +void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup) +{ + DBUG_ENTER("end_simple_key_cache"); + DBUG_PRINT("enter", ("key_cache: %p", keycache)); + + if (!keycache->key_cache_inited) + DBUG_VOID_RETURN; + + if (keycache->disk_blocks > 0) + { + if (keycache->block_mem) + { + my_large_free((uchar*) keycache->block_mem, keycache->allocated_mem_size); + keycache->block_mem= NULL; + my_free(keycache->block_root); + keycache->block_root= NULL; + } + keycache->disk_blocks= -1; + /* Reset blocks_changed to be safe if flush_all_key_blocks is called */ + keycache->blocks_changed= 0; + } + + DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu " + "writes: %lu r_requests: %lu reads: %lu", + keycache->blocks_used, keycache->global_blocks_changed, + (ulong) keycache->global_cache_w_requests, + (ulong) keycache->global_cache_write, + (ulong) keycache->global_cache_r_requests, + (ulong) keycache->global_cache_read)); + + /* + Reset these values to be able to detect a disabled key cache. + See Bug#44068 (RESTORE can disable the MyISAM Key Cache). + */ + keycache->blocks_used= 0; + keycache->blocks_unused= 0; + + if (cleanup) + { + mysql_mutex_destroy(&keycache->cache_lock); + keycache->key_cache_inited= keycache->can_be_used= 0; + KEYCACHE_DEBUG_CLOSE; + } + DBUG_VOID_RETURN; +} /* end_key_cache */ + + +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +static void link_into_queue(KEYCACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + DBUG_ASSERT(!thread->next && !thread->prev); + + if (! (last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + DBUG_ASSERT(last->next->prev == &last->next); + /* Add backlink to previous element */ + thread->prev= last->next->prev; + /* Fix first in list to point backwords to current */ + last->next->prev= &thread->next; + /* Next should point to the first element in list */ + thread->next= last->next; + /* Fix old element to point to new one */ + last->next= thread; + } + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", (ulong) thread->id)); + DBUG_ASSERT(thread->next && thread->prev); + + if (thread->next == thread) + { + /* The queue contains only one member */ + wqueue->last_thread= NULL; + } + else + { + /* Remove current element from list */ + thread->next->prev= thread->prev; + *thread->prev= thread->next; + /* If first element, change list pointer to point to previous element */ + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +#ifdef DBUG_ASSERT_EXISTS + /* + This makes it easier to see it's not in a chain during debugging. + And some DBUG_ASSERT() rely on it. + */ + thread->prev= NULL; +#endif +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + wait_on_queue() + wqueue Pointer to the queue structure. + mutex Cache_lock to acquire after awake. + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. + + The function protects against stray signals by verifying that the + current thread is unlinked from the queue when awaking. However, + since several threads can wait for the same event, it might be + necessary for the caller of the function to check again if the + condition for awake is indeed matched. +*/ + +static void wait_on_queue(KEYCACHE_WQUEUE *wqueue, + mysql_mutex_t *mutex) +{ + struct st_my_thread_var *last; + struct st_my_thread_var *thread= my_thread_var; + DBUG_ASSERT(!thread->next); + DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */ + mysql_mutex_assert_owner(mutex); + + /* Add to queue. */ + if (! (last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; + + /* + Wait until thread is removed from queue by the signaling thread. + The loop protects against stray signals. + */ + do + { + KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", (ulong) thread->id)); + keycache_pthread_cond_wait(&thread->suspend, mutex); + } + while (thread->next); +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + release_whole_queue() + wqueue pointer to the queue structure + + RETURN VALUE + none + + NOTES. + See notes for wait_on_queue(). + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +static void release_whole_queue(KEYCACHE_WQUEUE *wqueue) +{ + struct st_my_thread_var *last; + struct st_my_thread_var *next; + struct st_my_thread_var *thread; + + /* Queue may be empty. */ + if (!(last= wqueue->last_thread)) + return; + + next= last->next; /* First (oldest) element */ + do + { + thread=next; + DBUG_ASSERT(thread && thread->init == 1); + KEYCACHE_DBUG_PRINT("release_whole_queue: signal", + ("thread %ld", (ulong) thread->id)); + /* Take thread from queue. */ + next= thread->next; + thread->next= NULL; + + /* Signal the thread. */ + keycache_pthread_cond_signal(&thread->suspend); + } + while (thread != last); + + /* Now queue is definitely empty. */ + wqueue->last_thread= NULL; +} + + +/* + Unlink a block from the chain of dirty/clean blocks +*/ + +static inline void unlink_changed(BLOCK_LINK *block) +{ + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + if (block->next_changed) + block->next_changed->prev_changed= block->prev_changed; + *block->prev_changed= block->next_changed; + +#ifdef DBUG_ASSERT_EXISTS + /* + This makes it easier to see it's not in a chain during debugging. + And some DBUG_ASSERT() rely on it. + */ + block->next_changed= NULL; + block->prev_changed= NULL; +#endif +} + + +/* + Link a block into the chain of dirty/clean blocks +*/ + +static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead) +{ + DBUG_ASSERT(!block->next_changed); + DBUG_ASSERT(!block->prev_changed); + block->prev_changed= phead; + if ((block->next_changed= *phead)) + (*phead)->prev_changed= &block->next_changed; + *phead= block; +} + + +/* + Link a block in a chain of clean blocks of a file. + + SYNOPSIS + link_to_file_list() + keycache Key cache handle + block Block to relink + file File to be linked to + unlink If to unlink first + + DESCRIPTION + Unlink a block from whichever chain it is linked in, if it's + asked for, and link it to the chain of clean blocks of the + specified file. + + NOTE + Please do never set/clear BLOCK_CHANGED outside of + link_to_file_list() or link_to_changed_list(). + You would risk to damage correct counting of changed blocks + and to find blocks in the wrong hash. + + RETURN + void +*/ + +static void link_to_file_list(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block, int file, + my_bool unlink_block) +{ + DBUG_ASSERT(block->status & BLOCK_IN_USE); + DBUG_ASSERT(block->hash_link && block->hash_link->block == block); + DBUG_ASSERT(block->hash_link->file == file); + if (unlink_block) + unlink_changed(block); + link_changed(block, &keycache->file_blocks[FILE_HASH(file, keycache)]); + if (block->status & BLOCK_CHANGED) + { + block->status&= ~BLOCK_CHANGED; + keycache->blocks_changed--; + keycache->global_blocks_changed--; + } +} + + +/* + Re-link a block from the clean chain to the dirty chain of a file. + + SYNOPSIS + link_to_changed_list() + keycache key cache handle + block block to relink + + DESCRIPTION + Unlink a block from the chain of clean blocks of a file + and link it to the chain of dirty blocks of the same file. + + NOTE + Please do never set/clear BLOCK_CHANGED outside of + link_to_file_list() or link_to_changed_list(). + You would risk to damage correct counting of changed blocks + and to find blocks in the wrong hash. + + RETURN + void +*/ + +static void link_to_changed_list(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block) +{ + DBUG_ASSERT(block->status & BLOCK_IN_USE); + DBUG_ASSERT(!(block->status & BLOCK_CHANGED)); + DBUG_ASSERT(block->hash_link && block->hash_link->block == block); + + unlink_changed(block); + link_changed(block, + &keycache->changed_blocks[FILE_HASH(block->hash_link->file, keycache)]); + block->status|=BLOCK_CHANGED; + keycache->blocks_changed++; + keycache->global_blocks_changed++; +} + + +/* + Link a block to the LRU chain at the beginning or at the end of + one of two parts. + + SYNOPSIS + link_block() + keycache pointer to a key cache data structure + block pointer to the block to link to the LRU chain + hot <-> to link the block into the hot subchain + at_end <-> to link the block at the end of the subchain + + RETURN VALUE + none + + NOTES. + The LRU ring is represented by a circular list of block structures. + The list is double-linked of the type (**prev,*next) type. + The LRU ring is divided into two parts - hot and warm. + There are two pointers to access the last blocks of these two + parts. The beginning of the warm part follows right after the + end of the hot part. + Only blocks of the warm part can be used for eviction. + The first block from the beginning of this subchain is always + taken for eviction (keycache->last_used->next) + + LRU chain: +------+ H O T +------+ + +----| end |----...<----| beg |----+ + | +------+last +------+ | + v<-link in latest hot (new end) | + | link in latest warm (new end)->^ + | +------+ W A R M +------+ | + +----| beg |---->...----| end |----+ + +------+ +------+ins + first for eviction + + It is also possible that the block is selected for eviction and thus + not linked in the LRU ring. +*/ + +static void link_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block, + my_bool hot, my_bool at_end) +{ + BLOCK_LINK *ins; + BLOCK_LINK **pins; + + DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ + DBUG_ASSERT(!block->requests); + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + DBUG_ASSERT(!block->next_used); + DBUG_ASSERT(!block->prev_used); + if (!hot && keycache->waiting_for_block.last_thread) + { + /* Signal that in the LRU warm sub-chain an available block has appeared */ + struct st_my_thread_var *last_thread= + keycache->waiting_for_block.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + HASH_LINK *hash_link= (HASH_LINK *) first_thread->keycache_link; + struct st_my_thread_var *thread; + do + { + thread= next_thread; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if ((HASH_LINK *) thread->keycache_link == hash_link) + { + KEYCACHE_DBUG_PRINT("link_block: signal", + ("thread %ld", (ulong) thread->id)); + keycache_pthread_cond_signal(&thread->suspend); + unlink_from_queue(&keycache->waiting_for_block, thread); + block->requests++; + } + } + while (thread != last_thread); + hash_link->block= block; + /* + NOTE: We assigned the block to the hash_link and signalled the + requesting thread(s). But it is possible that other threads runs + first. These threads see the hash_link assigned to a block which + is assigned to another hash_link and not marked BLOCK_IN_SWITCH. + This can be a problem for functions that do not select the block + via its hash_link: flush and free. They do only see a block which + is in a "normal" state and don't know that it will be evicted soon. + + We cannot set BLOCK_IN_SWITCH here because only one of the + requesting threads must handle the eviction. All others must wait + for it to complete. If we set the flag here, the threads would not + know who is in charge of the eviction. Without the flag, the first + thread takes the stick and sets the flag. + + But we need to note in the block that is has been selected for + eviction. It must not be freed. The evicting thread will not + expect the block in the free list. Before freeing we could also + check if block->requests > 1. But I think including another flag + in the check of block->status is slightly more efficient and + probably easier to read. + */ + block->status|= BLOCK_IN_EVICTION; + KEYCACHE_THREAD_TRACE("link_block: after signaling"); +#if defined(KEYCACHE_DEBUG) + KEYCACHE_DBUG_PRINT("link_block", + ("linked,unlinked block %u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(block), block->status, + block->requests, keycache->blocks_available)); +#endif + return; + } + pins= hot ? &keycache->used_ins : &keycache->used_last; + ins= *pins; + if (ins) + { + ins->next_used->prev_used= &block->next_used; + block->next_used= ins->next_used; + block->prev_used= &ins->next_used; + ins->next_used= block; + if (at_end) + *pins= block; + } + else + { + /* The LRU ring is empty. Let the block point to itself. */ + keycache->used_last= keycache->used_ins= block->next_used= block; + block->prev_used= &block->next_used; + } + KEYCACHE_THREAD_TRACE("link_block"); +#if defined(KEYCACHE_DEBUG) + keycache->blocks_available++; + KEYCACHE_DBUG_PRINT("link_block", + ("linked block %u:%1u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(block), at_end, block->status, + block->requests, keycache->blocks_available)); + KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <= + keycache->blocks_used); +#endif +} + + +/* + Unlink a block from the LRU chain + + SYNOPSIS + unlink_block() + keycache pointer to a key cache data structure + block pointer to the block to unlink from the LRU chain + + RETURN VALUE + none + + NOTES. + See NOTES for link_block +*/ + +static void unlink_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block) +{ + DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ + DBUG_ASSERT(!block->requests); + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + DBUG_ASSERT(block->next_used && block->prev_used && + (block->next_used->prev_used == &block->next_used) && + (*block->prev_used == block)); + if (block->next_used == block) + /* The list contains only one member */ + keycache->used_last= keycache->used_ins= NULL; + else + { + block->next_used->prev_used= block->prev_used; + *block->prev_used= block->next_used; + if (keycache->used_last == block) + keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used); + if (keycache->used_ins == block) + keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used); + } + block->next_used= NULL; +#ifdef DBUG_ASSERT_EXISTS + /* + This makes it easier to see it's not in a chain during debugging. + And some DBUG_ASSERT() rely on it. + */ + block->prev_used= NULL; +#endif + + KEYCACHE_THREAD_TRACE("unlink_block"); +#if defined(KEYCACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0); + keycache->blocks_available--; + KEYCACHE_DBUG_PRINT("unlink_block", + ("unlinked block %u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(block), block->status, + block->requests, keycache->blocks_available)); +#endif +} + + +/* + Register requests for a block. + + SYNOPSIS + reg_requests() + keycache Pointer to a key cache data structure. + block Pointer to the block to register a request on. + count Number of requests. Always 1. + + NOTE + The first request unlinks the block from the LRU ring. This means + that it is protected against eveiction. + + RETURN + void +*/ +static void reg_requests(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block, int count) +{ + DBUG_ASSERT(block->status & BLOCK_IN_USE); + DBUG_ASSERT(block->hash_link); + + if (!block->requests) + unlink_block(keycache, block); + block->requests+=count; +} + + +/* + Unregister request for a block + linking it to the LRU chain if it's the last request + + SYNOPSIS + unreg_request() + keycache pointer to a key cache data structure + block pointer to the block to link to the LRU chain + at_end <-> to link the block at the end of the LRU chain + + RETURN VALUE + none + + NOTES. + Every linking to the LRU ring decrements by one a special block + counter (if it's positive). If the at_end parameter is TRUE the block is + added either at the end of warm sub-chain or at the end of hot sub-chain. + It is added to the hot subchain if its counter is zero and number of + blocks in warm sub-chain is not less than some low limit (determined by + the division_limit parameter). Otherwise the block is added to the warm + sub-chain. If the at_end parameter is FALSE the block is always added + at beginning of the warm sub-chain. + Thus a warm block can be promoted to the hot sub-chain when its counter + becomes zero for the first time. + At the same time the block at the very beginning of the hot subchain + might be moved to the beginning of the warm subchain if it stays untouched + for a too long time (this time is determined by parameter age_threshold). + + It is also possible that the block is selected for eviction and thus + not linked in the LRU ring. +*/ + +static void unreg_request(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block, int at_end) +{ + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ + DBUG_ASSERT(block->requests); + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + DBUG_ASSERT(!block->next_used); + DBUG_ASSERT(!block->prev_used); + /* + Unregister the request, but do not link erroneous blocks into the + LRU ring. + */ + if (!--block->requests && !(block->status & BLOCK_ERROR)) + { + my_bool hot; + if (block->hits_left) + block->hits_left--; + hot= !block->hits_left && at_end && + keycache->warm_blocks > keycache->min_warm_blocks; + if (hot) + { + if (block->temperature == BLOCK_WARM) + keycache->warm_blocks--; + block->temperature= BLOCK_HOT; + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", + keycache->warm_blocks)); + } + link_block(keycache, block, hot, (my_bool)at_end); + block->last_hit_time= keycache->keycache_time; + keycache->keycache_time++; + /* + At this place, the block might be in the LRU ring or not. If an + evicter was waiting for a block, it was selected for eviction and + not linked in the LRU ring. + */ + + /* + Check if we should link a hot block to the warm block sub-chain. + It is possible that we select the same block as above. But it can + also be another block. In any case a block from the LRU ring is + selected. In other words it works even if the above block was + selected for eviction and not linked in the LRU ring. Since this + happens only if the LRU ring is empty, the block selected below + would be NULL and the rest of the function skipped. + */ + block= keycache->used_ins; + if (block && keycache->keycache_time - block->last_hit_time > + keycache->age_threshold) + { + unlink_block(keycache, block); + link_block(keycache, block, 0, 0); + if (block->temperature != BLOCK_WARM) + { + keycache->warm_blocks++; + block->temperature= BLOCK_WARM; + } + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", + keycache->warm_blocks)); + } + } +} + +/* + Remove a reader of the page in block +*/ + +static void remove_reader(BLOCK_LINK *block) +{ + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(block->hash_link && block->hash_link->block == block); + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + DBUG_ASSERT(!block->next_used); + DBUG_ASSERT(!block->prev_used); + DBUG_ASSERT(block->hash_link->requests); + if (! --block->hash_link->requests && block->condvar) + keycache_pthread_cond_signal(block->condvar); +} + + +/* + Wait until the last reader of the page in block + signals on its termination +*/ + +static void wait_for_readers(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED))); + DBUG_ASSERT(block->hash_link); + DBUG_ASSERT(block->hash_link->block == block); + /* Linked in file_blocks or changed_blocks hash. */ + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + /* Not linked in LRU ring. */ + DBUG_ASSERT(!block->next_used); + DBUG_ASSERT(!block->prev_used); + while (block->hash_link->requests) + { + KEYCACHE_DBUG_PRINT("wait_for_readers: wait", + ("suspend thread %ld block %u", + (ulong) thread->id, BLOCK_NUMBER(block))); + /* There must be no other waiter. We have no queue here. */ + DBUG_ASSERT(!block->condvar); + block->condvar= &thread->suspend; + keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock); + block->condvar= NULL; + } +} + + +/* + Add a hash link to a bucket in the hash_table +*/ + +static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link) +{ + if (*start) + (*start)->prev= &hash_link->next; + hash_link->next= *start; + hash_link->prev= start; + *start= hash_link; +} + + +/* + Remove a hash link from the hash table +*/ + +static void unlink_hash(SIMPLE_KEY_CACHE_CB *keycache, HASH_LINK *hash_link) +{ + KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", + (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests)); + KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); + if ((*hash_link->prev= hash_link->next)) + hash_link->next->prev= hash_link->prev; + hash_link->block= NULL; + if (keycache->waiting_for_hash_link.last_thread) + { + /* Signal that a free hash link has appeared */ + struct st_my_thread_var *last_thread= + keycache->waiting_for_hash_link.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->keycache_link); + struct st_my_thread_var *thread; + + hash_link->file= first_page->file; + hash_link->diskpos= first_page->filepos; + do + { + KEYCACHE_PAGE *page; + thread= next_thread; + page= (KEYCACHE_PAGE *) thread->keycache_link; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if (page->file == hash_link->file && page->filepos == hash_link->diskpos) + { + KEYCACHE_DBUG_PRINT("unlink_hash: signal", + ("thread %ld", (ulong) thread->id)); + keycache_pthread_cond_signal(&thread->suspend); + unlink_from_queue(&keycache->waiting_for_hash_link, thread); + } + } + while (thread != last_thread); + link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file, + hash_link->diskpos)], + hash_link); + return; + } + hash_link->next= keycache->free_hash_list; + keycache->free_hash_list= hash_link; +} + + +/* + Get the hash link for a page +*/ + +static HASH_LINK *get_hash_link(SIMPLE_KEY_CACHE_CB *keycache, + int file, my_off_t filepos) +{ + reg1 HASH_LINK *hash_link, **start; +#if defined(KEYCACHE_DEBUG) + int cnt; +#endif + + KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", + (uint) file,(ulong) filepos)); + +restart: + /* + Find the bucket in the hash table for the pair (file, filepos); + start contains the head of the bucket list, + hash_link points to the first member of the list + */ + hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]); +#if defined(KEYCACHE_DEBUG) + cnt= 0; +#endif + /* Look for an element for the pair (file, filepos) in the bucket chain */ + while (hash_link && + (hash_link->diskpos != filepos || hash_link->file != file)) + { + hash_link= hash_link->next; +#if defined(KEYCACHE_DEBUG) + cnt++; + if (! (cnt <= keycache->hash_links_used)) + { + int i; + for (i=0, hash_link= *start ; + i < cnt ; i++, hash_link= hash_link->next) + { + KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", + (uint) hash_link->file,(ulong) hash_link->diskpos)); + } + } + KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used); +#endif + } + if (! hash_link) + { + /* There is no hash link in the hash table for the pair (file, filepos) */ + if (keycache->free_hash_list) + { + hash_link= keycache->free_hash_list; + keycache->free_hash_list= hash_link->next; + } + else if (keycache->hash_links_used < keycache->hash_links) + { + hash_link= &keycache->hash_link_root[keycache->hash_links_used++]; + } + else + { + /* Wait for a free hash link */ + struct st_my_thread_var *thread= my_thread_var; + KEYCACHE_PAGE page; + KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); + page.file= file; + page.filepos= filepos; + thread->keycache_link= (void *) &page; + link_into_queue(&keycache->waiting_for_hash_link, thread); + KEYCACHE_DBUG_PRINT("get_hash_link: wait", + ("suspend thread %ld", (ulong) thread->id)); + keycache_pthread_cond_wait(&thread->suspend, + &keycache->cache_lock); + thread->keycache_link= NULL; + goto restart; + } + hash_link->file= file; + hash_link->diskpos= filepos; + link_hash(start, hash_link); + } + /* Register the request for the page */ + hash_link->requests++; + + return hash_link; +} + + +/* + Get a block for the file page requested by a keycache read/write operation; + If the page is not in the cache return a free block, if there is none + return the lru block after saving its buffer if the page is dirty. + + SYNOPSIS + + find_key_block() + keycache pointer to a key cache data structure + file handler for the file to read page from + filepos position of the page in the file + init_hits_left how initialize the block counter for the page + wrmode <-> get for writing + page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ} + + RETURN VALUE + Pointer to the found block if successful, 0 - otherwise + + NOTES. + For the page from file positioned at filepos the function checks whether + the page is in the key cache specified by the first parameter. + If this is the case it immediately returns the block. + If not, the function first chooses a block for this page. If there is + no not used blocks in the key cache yet, the function takes the block + at the very beginning of the warm sub-chain. It saves the page in that + block if it's dirty before returning the pointer to it. + The function returns in the page_st parameter the following values: + PAGE_READ - if page already in the block, + PAGE_TO_BE_READ - if it is to be read yet by the current thread + WAIT_TO_BE_READ - if it is to be read by another thread + If an error occurs THE BLOCK_ERROR bit is set in the block status. + It might happen that there are no blocks in LRU chain (in warm part) - + all blocks are unlinked for some read/write operations. Then the function + waits until first of this operations links any block back. +*/ + +static BLOCK_LINK *find_key_block(SIMPLE_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, + int init_hits_left, + int wrmode, int *page_st) +{ + HASH_LINK *hash_link; + BLOCK_LINK *block; + int error= 0; + int page_status; + + DBUG_ENTER("find_key_block"); + KEYCACHE_THREAD_TRACE("find_key_block:begin"); + DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", + file, (ulong) filepos, wrmode)); + KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d", + file, (ulong) filepos, + wrmode)); +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_keycache2", + test_key_cache(keycache, "start of find_key_block", 0);); +#endif + +restart: + /* + If the flush phase of a resize operation fails, the cache is left + unusable. This will be detected only after "goto restart". + */ + if (!keycache->can_be_used) + DBUG_RETURN(0); + + /* + Find the hash_link for the requested file block (file, filepos). We + do always get a hash_link here. It has registered our request so + that no other thread can use it for another file block until we + release the request (which is done by remove_reader() usually). The + hash_link can have a block assigned to it or not. If there is a + block, it may be assigned to this hash_link or not. In cases where a + block is evicted from the cache, it is taken from the LRU ring and + referenced by the new hash_link. But the block can still be assigned + to its old hash_link for some time if it needs to be flushed first, + or if there are other threads still reading it. + + Summary: + hash_link is always returned. + hash_link->block can be: + - NULL or + - not assigned to this hash_link or + - assigned to this hash_link. If assigned, the block can have + - invalid data (when freshly assigned) or + - valid data. Valid data can be + - changed over the file contents (dirty) or + - not changed (clean). + */ + hash_link= get_hash_link(keycache, file, filepos); + DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos)); + + page_status= -1; + if ((block= hash_link->block) && + block->hash_link == hash_link && (block->status & BLOCK_READ)) + { + /* Assigned block with valid (changed or unchanged) contents. */ + page_status= PAGE_READ; + } + /* + else (page_status == -1) + - block == NULL or + - block not assigned to this hash_link or + - block assigned but not yet read from file (invalid data). + */ + + if (keycache->in_resize) + { + /* This is a request during a resize operation */ + + if (!block) + { + struct st_my_thread_var *thread; + + /* + The file block is not in the cache. We don't need it in the + cache: we are going to read or write directly to file. Cancel + the request. We can simply decrement hash_link->requests because + we did not release cache_lock since increasing it. So no other + thread can wait for our request to become released. + */ + if (hash_link->requests == 1) + { + /* + We are the only one to request this hash_link (this file/pos). + Free the hash_link. + */ + hash_link->requests--; + unlink_hash(keycache, hash_link); + DBUG_RETURN(0); + } + + /* + More requests on the hash_link. Someone tries to evict a block + for this hash_link (could have started before resizing started). + This means that the LRU ring is empty. Otherwise a block could + be assigned immediately. Behave like a thread that wants to + evict a block for this file/pos. Add to the queue of threads + waiting for a block. Wait until there is one assigned. + + Refresh the request on the hash-link so that it cannot be reused + for another file/pos. + */ + thread= my_thread_var; + thread->keycache_link= (void *) hash_link; + link_into_queue(&keycache->waiting_for_block, thread); + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", (ulong) thread->id)); + keycache_pthread_cond_wait(&thread->suspend, + &keycache->cache_lock); + } while (thread->next); + thread->keycache_link= NULL; + /* + A block should now be assigned to the hash_link. But it may + still need to be evicted. Anyway, we should re-check the + situation. page_status must be set correctly. + */ + hash_link->requests--; + goto restart; + } /* end of if (!block) */ + + /* + There is a block for this file/pos in the cache. Register a + request on it. This unlinks it from the LRU ring (if it is there) + and hence protects it against eviction (if not already in + eviction). We need this for returning the block to the caller, for + calling remove_reader() (for debugging purposes), and for calling + free_block(). The only case where we don't need the request is if + the block is in eviction. In that case we have to unregister the + request later. + */ + reg_requests(keycache, block, 1); + + if (page_status != PAGE_READ) + { + /* + - block not assigned to this hash_link or + - block assigned but not yet read from file (invalid data). + + This must be a block in eviction. It will be read soon. We need + to wait here until this happened. Otherwise the caller could + access a wrong block or a block which is in read. While waiting + we cannot lose hash_link nor block. We have registered a request + on the hash_link. Everything can happen to the block but changes + in the hash_link -> block relationship. In other words: + everything can happen to the block but free or another completed + eviction. + + Note that we bahave like a secondary requestor here. We just + cannot return with PAGE_WAIT_TO_BE_READ. This would work for + read requests and writes on dirty blocks that are not in flush + only. Waiting here on COND_FOR_REQUESTED works in all + situations. + */ + DBUG_ASSERT(((block->hash_link != hash_link) && + (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) || + ((block->hash_link == hash_link) && + !(block->status & BLOCK_READ))); + wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock); + /* + Here we can trust that the block has been assigned to this + hash_link (block->hash_link == hash_link) and read into the + buffer (BLOCK_READ). The worst things possible here are that the + block is in free (BLOCK_REASSIGNED). But the block is still + assigned to the hash_link. The freeing thread waits until we + release our request on the hash_link. The block must not be + again in eviction because we registered an request on it before + starting to wait. + */ + DBUG_ASSERT(block->hash_link == hash_link); + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))); + } + /* + The block is in the cache. Assigned to the hash_link. Valid data. + Note that in case of page_st == PAGE_READ, the block can be marked + for eviction. In any case it can be marked for freeing. + */ + + if (!wrmode) + { + /* A reader can just read the block. */ + *page_st= PAGE_READ; + DBUG_ASSERT((hash_link->file == file) && + (hash_link->diskpos == filepos) && + (block->hash_link == hash_link)); + DBUG_RETURN(block); + } + + /* + This is a writer. No two writers for the same block can exist. + This must be assured by locks outside of the key cache. + */ + DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block)); + + while (block->status & BLOCK_IN_FLUSH) + { + /* + Wait until the block is flushed to file. Do not release the + request on the hash_link yet to prevent that the block is freed + or reassigned while we wait. While we wait, several things can + happen to the block, including another flush. But the block + cannot be reassigned to another hash_link until we release our + request on it. But it can be marked BLOCK_REASSIGNED from free + or eviction, while they wait for us to release the hash_link. + */ + wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock); + /* + If the flush phase failed, the resize could have finished while + we waited here. + */ + if (!keycache->in_resize) + { + remove_reader(block); + unreg_request(keycache, block, 1); + goto restart; + } + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block)); + DBUG_ASSERT(block->hash_link == hash_link); + } + + if (block->status & BLOCK_CHANGED) + { + /* + We want to write a block with changed contents. If the cache + block size is bigger than the callers block size (e.g. MyISAM), + the caller may replace part of the block only. Changes of the + other part of the block must be preserved. Since the block has + not yet been selected for flush, we can still add our changes. + */ + *page_st= PAGE_READ; + DBUG_ASSERT((hash_link->file == file) && + (hash_link->diskpos == filepos) && + (block->hash_link == hash_link)); + DBUG_RETURN(block); + } + + /* + This is a write request for a clean block. We do not want to have + new dirty blocks in the cache while resizing. We will free the + block and write directly to file. If the block is in eviction or + in free, we just let it go. + + Unregister from the hash_link. This must be done before freeing + the block. And it must be done if not freeing the block. Because + we could have waited above, we need to call remove_reader(). Other + threads could wait for us to release our request on the hash_link. + */ + remove_reader(block); + + /* If the block is not in eviction and not in free, we can free it. */ + if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | + BLOCK_REASSIGNED))) + { + /* + Free block as we are going to write directly to file. + Although we have an exlusive lock for the updated key part, + the control can be yielded by the current thread as we might + have unfinished readers of other key parts in the block + buffer. Still we are guaranteed not to have any readers + of the key part we are writing into until the block is + removed from the cache as we set the BLOCK_REASSIGNED + flag (see the code below that handles reading requests). + */ + free_block(keycache, block); + } + else + { + /* + The block will be evicted/freed soon. Don't touch it in any way. + Unregister the request that we registered above. + */ + unreg_request(keycache, block, 1); + + /* + The block is still assigned to the hash_link (the file/pos that + we are going to write to). Wait until the eviction/free is + complete. Otherwise the direct write could complete before all + readers are done with the block. So they could read outdated + data. + + Since we released our request on the hash_link, it can be reused + for another file/pos. Hence we cannot just check for + block->hash_link == hash_link. As long as the resize is + proceeding the block cannot be reassigned to the same file/pos + again. So we can terminate the loop when the block is no longer + assigned to this file/pos. + */ + do + { + wait_on_queue(&block->wqueue[COND_FOR_SAVED], + &keycache->cache_lock); + /* + If the flush phase failed, the resize could have finished + while we waited here. + */ + if (!keycache->in_resize) + goto restart; + } while (block->hash_link && + (block->hash_link->file == file) && + (block->hash_link->diskpos == filepos)); + } + DBUG_RETURN(0); + } + + if (page_status == PAGE_READ && + (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | + BLOCK_REASSIGNED))) + { + /* + This is a request for a block to be removed from cache. The block + is assigned to this hash_link and contains valid data, but is + marked for eviction or to be freed. Possible reasons why it has + not yet been evicted/freed can be a flush before reassignment + (BLOCK_IN_SWITCH), readers of the block have not finished yet + (BLOCK_REASSIGNED), or the evicting thread did not yet awake after + the block has been selected for it (BLOCK_IN_EVICTION). + */ + + KEYCACHE_DBUG_PRINT("find_key_block", + ("request for old page in block %u " + "wrmode: %d block->status: %d", + BLOCK_NUMBER(block), wrmode, block->status)); + /* + Only reading requests can proceed until the old dirty page is flushed, + all others are to be suspended, then resubmitted + */ + if (!wrmode && !(block->status & BLOCK_REASSIGNED)) + { + /* + This is a read request and the block not yet reassigned. We can + register our request and proceed. This unlinks the block from + the LRU ring and protects it against eviction. + */ + reg_requests(keycache, block, 1); + } + else + { + /* + Either this is a write request for a block that is in eviction + or in free. We must not use it any more. Instead we must evict + another block. But we cannot do this before the eviction/free is + done. Otherwise we would find the same hash_link + block again + and again. + + Or this is a read request for a block in eviction/free that does + not require a flush, but waits for readers to finish with the + block. We do not read this block to let the eviction/free happen + as soon as possible. Again we must wait so that we don't find + the same hash_link + block again and again. + */ + DBUG_ASSERT(hash_link->requests); + hash_link->requests--; + KEYCACHE_DBUG_PRINT("find_key_block", + ("request waiting for old page to be saved")); + wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock); + KEYCACHE_DBUG_PRINT("find_key_block", + ("request for old page resubmitted")); + /* + The block is no longer assigned to this hash_link. + Get another one. + */ + goto restart; + } + } + else + { + /* + This is a request for a new block or for a block not to be removed. + Either + - block == NULL or + - block not assigned to this hash_link or + - block assigned but not yet read from file, + or + - block assigned with valid (changed or unchanged) data and + - it will not be reassigned/freed. + */ + if (! block) + { + /* No block is assigned to the hash_link yet. */ + if (keycache->blocks_unused) + { + if (keycache->free_block_list) + { + /* There is a block in the free list. */ + block= keycache->free_block_list; + keycache->free_block_list= block->next_used; + block->next_used= NULL; + } + else + { + size_t block_mem_offset; + /* There are some never used blocks, take first of them */ + DBUG_ASSERT(keycache->blocks_used < + (ulong) keycache->disk_blocks); + block= &keycache->block_root[keycache->blocks_used]; + block_mem_offset= + ((size_t) keycache->blocks_used) * keycache->key_cache_block_size; + block->buffer= ADD_TO_PTR(keycache->block_mem, + block_mem_offset, + uchar*); + keycache->blocks_used++; + DBUG_ASSERT(!block->next_used); + } + DBUG_ASSERT(!block->prev_used); + DBUG_ASSERT(!block->next_changed); + DBUG_ASSERT(!block->prev_changed); + DBUG_ASSERT(!block->hash_link); + DBUG_ASSERT(!block->status); + DBUG_ASSERT(!block->requests); + keycache->blocks_unused--; + block->status= BLOCK_IN_USE; + block->length= 0; + block->offset= keycache->key_cache_block_size; + block->requests= 1; + block->temperature= BLOCK_COLD; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + block->hash_link= hash_link; + hash_link->block= block; + link_to_file_list(keycache, block, file, 0); + page_status= PAGE_TO_BE_READ; + KEYCACHE_DBUG_PRINT("find_key_block", + ("got free or never used block %u", + BLOCK_NUMBER(block))); + } + else + { + /* + There are no free blocks and no never used blocks, use a block + from the LRU ring. + */ + + if (! keycache->used_last) + { + /* + The LRU ring is empty. Wait until a new block is added to + it. Several threads might wait here for the same hash_link, + all of them must get the same block. While waiting for a + block, after a block is selected for this hash_link, other + threads can run first before this one awakes. During this + time interval other threads find this hash_link pointing to + the block, which is still assigned to another hash_link. In + this case the block is not marked BLOCK_IN_SWITCH yet, but + it is marked BLOCK_IN_EVICTION. + */ + + struct st_my_thread_var *thread= my_thread_var; + thread->keycache_link= (void *) hash_link; + link_into_queue(&keycache->waiting_for_block, thread); + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", (ulong) thread->id)); + keycache_pthread_cond_wait(&thread->suspend, + &keycache->cache_lock); + } + while (thread->next); + thread->keycache_link= NULL; + /* Assert that block has a request registered. */ + DBUG_ASSERT(hash_link->block->requests); + /* Assert that block is not in LRU ring. */ + DBUG_ASSERT(!hash_link->block->next_used); + DBUG_ASSERT(!hash_link->block->prev_used); + } + /* + If we waited above, hash_link->block has been assigned by + link_block(). Otherwise it is still NULL. In the latter case + we need to grab a block from the LRU ring ourselves. + */ + block= hash_link->block; + if (! block) + { + /* Select the last block from the LRU ring. */ + block= keycache->used_last->next_used; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + hash_link->block= block; + /* + Register a request on the block. This unlinks it from the + LRU ring and protects it against eviction. + */ + DBUG_ASSERT(!block->requests); + reg_requests(keycache, block,1); + /* + We do not need to set block->status|= BLOCK_IN_EVICTION here + because we will set block->status|= BLOCK_IN_SWITCH + immediately without releasing the lock in between. This does + also support debugging. When looking at the block, one can + see if the block has been selected by link_block() after the + LRU ring was empty, or if it was grabbed directly from the + LRU ring in this branch. + */ + } + + /* + If we had to wait above, there is a small chance that another + thread grabbed this block for the same file block already. But + in most cases the first condition is true. + */ + if (block->hash_link != hash_link && + ! (block->status & BLOCK_IN_SWITCH) ) + { + /* this is a primary request for a new page */ + block->status|= BLOCK_IN_SWITCH; + + KEYCACHE_DBUG_PRINT("find_key_block", + ("got block %u for new page", BLOCK_NUMBER(block))); + + if (block->status & BLOCK_CHANGED) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + if (block->status & BLOCK_IN_FLUSH) + { + /* + The block is marked for flush. If we do not wait here, + it could happen that we write the block, reassign it to + another file block, then, before the new owner can read + the new file block, the flusher writes the cache block + (which still has the old contents) to the new file block! + */ + wait_on_queue(&block->wqueue[COND_FOR_SAVED], + &keycache->cache_lock); + /* + The block is marked BLOCK_IN_SWITCH. It should be left + alone except for reading. No free, no write. + */ + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED | + BLOCK_CHANGED | + BLOCK_FOR_UPDATE))); + } + else + { + block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE; + /* + BLOCK_IN_EVICTION may be true or not. Other flags must + have a fixed value. + */ + DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) == + (BLOCK_READ | BLOCK_IN_SWITCH | + BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE | + BLOCK_CHANGED | BLOCK_IN_USE)); + DBUG_ASSERT(block->hash_link); + + keycache_pthread_mutex_unlock(&keycache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + error= (int)my_pwrite(block->hash_link->file, + block->buffer + block->offset, + block->length - block->offset, + block->hash_link->diskpos + block->offset, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + keycache_pthread_mutex_lock(&keycache->cache_lock); + + /* Block status must not have changed. */ + DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) == + (BLOCK_READ | BLOCK_IN_SWITCH | + BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE | + BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block)); + keycache->global_cache_write++; + } + } + + block->status|= BLOCK_REASSIGNED; + /* + The block comes from the LRU ring. It must have a hash_link + assigned. + */ + DBUG_ASSERT(block->hash_link); + if (block->hash_link) + { + /* + All pending requests for this page must be resubmitted. + This must be done before waiting for readers. They could + wait for the flush to complete. And we must also do it + after the wait. Flushers might try to free the block while + we wait. They would wait until the reassignment is + complete. Also the block status must reflect the correct + situation: The block is not changed nor in flush any more. + Note that we must not change the BLOCK_CHANGED flag + outside of link_to_file_list() so that it is always in the + correct queue and the *blocks_changed counters are + correct. + */ + block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE); + link_to_file_list(keycache, block, block->hash_link->file, 1); + release_whole_queue(&block->wqueue[COND_FOR_SAVED]); + /* + The block is still assigned to its old hash_link. + Wait until all pending read requests + for this page are executed + (we could have avoided this waiting, if we had read + a page in the cache in a sweep, without yielding control) + */ + wait_for_readers(keycache, block); + DBUG_ASSERT(block->hash_link && block->hash_link->block == block && + block->prev_changed); + /* The reader must not have been a writer. */ + DBUG_ASSERT(!(block->status & BLOCK_CHANGED)); + + /* Wake flushers that might have found the block in between. */ + release_whole_queue(&block->wqueue[COND_FOR_SAVED]); + + /* Remove the hash link for the old file block from the hash. */ + unlink_hash(keycache, block->hash_link); + + /* + For sanity checks link_to_file_list() asserts that block + and hash_link refer to each other. Hence we need to assign + the hash_link first, but then we would not know if it was + linked before. Hence we would not know if to unlink it. So + unlink it here and call link_to_file_list(..., FALSE). + */ + unlink_changed(block); + } + block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ; + block->length= 0; + block->offset= keycache->key_cache_block_size; + block->hash_link= hash_link; + link_to_file_list(keycache, block, file, 0); + page_status= PAGE_TO_BE_READ; + + KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); + KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); + } + else + { + /* + Either (block->hash_link == hash_link), + or (block->status & BLOCK_IN_SWITCH). + + This is for secondary requests for a new file block only. + Either it is already assigned to the new hash_link meanwhile + (if we had to wait due to empty LRU), or it is already in + eviction by another thread. Since this block has been + grabbed from the LRU ring and attached to this hash_link, + another thread cannot grab the same block from the LRU ring + anymore. If the block is in eviction already, it must become + attached to the same hash_link and as such destined for the + same file block. + */ + KEYCACHE_DBUG_PRINT("find_key_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + } + else + { + /* + Block is not NULL. This hash_link points to a block. + Either + - block not assigned to this hash_link (yet) or + - block assigned but not yet read from file, + or + - block assigned with valid (changed or unchanged) data and + - it will not be reassigned/freed. + + The first condition means hash_link points to a block in + eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet. + But then it is marked BLOCK_IN_EVICTION. See the NOTE in + link_block(). In both cases it is destined for this hash_link + and its file block address. When this hash_link got its block + address, the block was removed from the LRU ring and cannot be + selected for eviction (for another hash_link) again. + + Register a request on the block. This is another protection + against eviction. + */ + DBUG_ASSERT(((block->hash_link != hash_link) && + (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) || + ((block->hash_link == hash_link) && + !(block->status & BLOCK_READ)) || + ((block->status & BLOCK_READ) && + !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))); + reg_requests(keycache, block, 1); + KEYCACHE_DBUG_PRINT("find_key_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + + KEYCACHE_DBUG_ASSERT(page_status != -1); + /* Same assert basically, but be very sure. */ + KEYCACHE_DBUG_ASSERT(block); + /* Assert that block has a request and is not in LRU ring. */ + DBUG_ASSERT(block->requests); + DBUG_ASSERT(!block->next_used); + DBUG_ASSERT(!block->prev_used); + /* Assert that we return the correct block. */ + DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) || + ((block->hash_link->file == file) && + (block->hash_link->diskpos == filepos))); + *page_st=page_status; + KEYCACHE_DBUG_PRINT("find_key_block", + ("fd: %d pos: %lu block->status: %u page_status: %d", + file, (ulong) filepos, block->status, + page_status)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_keycache2", + test_key_cache(keycache, "end of find_key_block",0);); +#endif + KEYCACHE_THREAD_TRACE("find_key_block:end"); + DBUG_RETURN(block); +} + + +/* + Read into a key cache block buffer from disk. + + SYNOPSIS + + read_block_{primary|secondary}() + keycache pointer to a key cache data structure + block block to which buffer the data is to be read + read_length size of data to be read + min_length at least so much data must be read + + RETURN VALUE + None + + NOTES. + The function either reads a page data from file to the block buffer, + or waits until another thread reads it. What page to read is determined + by a block parameter - reference to a hash link for this page. + If an error occurs THE BLOCK_ERROR bit is set in the block status. + We do not report error when the size of successfully read + portion is less than read_length, but not less than min_length. +*/ + +static void read_block_primary(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block, uint read_length, + uint min_length) +{ + size_t got_length; + + /* On entry cache_lock is locked */ + + KEYCACHE_THREAD_TRACE("read_block_primary"); + + /* + This code is executed only by threads that submitted primary + requests. Until block->status contains BLOCK_READ, all other + request for the block become secondary requests. For a primary + request the block must be properly initialized. + */ + DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) || + fail_block(block)); + DBUG_ASSERT((block->length == 0) || fail_block(block)); + DBUG_ASSERT((block->offset == keycache->key_cache_block_size) || + fail_block(block)); + DBUG_ASSERT((block->requests > 0) || fail_block(block)); + + KEYCACHE_DBUG_PRINT("read_block_primary", + ("page to be read by primary request")); + + keycache->global_cache_read++; + /* Page is not in buffer yet, is to be read from disk */ + keycache_pthread_mutex_unlock(&keycache->cache_lock); + /* + Here other threads may step in and register as secondary readers. + They will register in block->wqueue[COND_FOR_REQUESTED]. + */ + got_length= my_pread(block->hash_link->file, block->buffer, + read_length, block->hash_link->diskpos, MYF(0)); + keycache_pthread_mutex_lock(&keycache->cache_lock); + /* + The block can now have been marked for free (in case of + FLUSH_RELEASE). Otherwise the state must be unchanged. + */ + DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED | + BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) || + fail_block(block)); + DBUG_ASSERT((block->length == 0) || fail_block(block)); + DBUG_ASSERT((block->offset == keycache->key_cache_block_size) || + fail_block(block)); + DBUG_ASSERT((block->requests > 0) || fail_block(block)); + + if (got_length < min_length) + block->status|= BLOCK_ERROR; + else + { + block->status|= BLOCK_READ; + block->length= (uint)got_length; + /* + Do not set block->offset here. If this block is marked + BLOCK_CHANGED later, we want to flush only the modified part. So + only a writer may set block->offset down from + keycache->key_cache_block_size. + */ + } + KEYCACHE_DBUG_PRINT("read_block_primary", + ("primary request: new page in cache")); + /* Signal that all pending requests for this page now can be processed */ + release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]); + + DBUG_ASSERT(keycache->can_be_used); +} + + +static void read_block_secondary(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block) +{ + KEYCACHE_THREAD_TRACE("read_block_secondary"); + + /* + This code is executed only by threads that submitted secondary + requests. At this point it could happen that the cache block is + not yet assigned to the hash_link for the requested file block. + But at awake from the wait this should be the case. Unfortunately + we cannot assert this here because we do not know the hash_link + for the requested file block nor the file and position. So we have + to assert this in the caller. + */ + KEYCACHE_DBUG_PRINT("read_block_secondary", + ("secondary request waiting for new page to be read")); + + wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock); + + KEYCACHE_DBUG_PRINT("read_block_secondary", + ("secondary request: new page in cache")); + + DBUG_ASSERT(keycache->can_be_used); + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); +} + + +/* + Read a block of data from a simple key cache into a buffer + + SYNOPSIS + + simple_key_cache_read() + keycache pointer to the control block of a simple key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + length length of the buffer + block_length length of the read data from a key cache block + return_buffer return pointer to the key cache buffer with the data + + DESCRIPTION + This function is the implementation of the key_cache_read interface + function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key + cache. + In a general case the function reads a block of data from the key cache + into the buffer buff of the size specified by the parameter length. The + beginning of the block of data to be read is specified by the parameters + file and filepos. The length of the read data is the same as the length + of the buffer. The data is read into the buffer in key_cache_block_size + increments. If the next portion of the data is not found in any key cache + block, first it is read from file into the key cache. + If the parameter return_buffer is not ignored and its value is TRUE, and + the data to be read of the specified size block_length can be read from one + key cache buffer, then the function returns a pointer to the data in the + key cache buffer. + The function takse into account parameters block_length and return buffer + only in a single-threaded environment. + The parameter 'level' is used only by the midpoint insertion strategy + when the data or its portion cannot be found in the key cache. + + RETURN VALUE + Returns address from where the data is placed if successful, 0 - otherwise. + + NOTES + Filepos must be a multiple of 'block_length', but it doesn't + have to be a multiple of key_cache_block_size; +*/ + +uchar *simple_key_cache_read(SIMPLE_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int return_buffer __attribute__((unused))) +{ + my_bool locked_and_incremented= FALSE; + int error=0; + uchar *start= buff; + DBUG_ENTER("simple_key_cache_read"); + DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", + (uint) file, (ulong) filepos, length)); + + if (keycache->key_cache_inited) + { + /* Key cache is used */ + reg1 BLOCK_LINK *block; + uint read_length; + uint offset; + int page_st; + + if (MYSQL_KEYCACHE_READ_START_ENABLED()) + { + MYSQL_KEYCACHE_READ_START(my_filename(file), length, + (ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } + + /* + When the key cache is once initialized, we use the cache_lock to + reliably distinguish the cases of normal operation, resizing, and + disabled cache. We always increment and decrement + 'cnt_for_resize_op' so that a resizer can wait for pending I/O. + */ + keycache_pthread_mutex_lock(&keycache->cache_lock); + /* + Cache resizing has two phases: Flushing and re-initializing. In + the flush phase read requests are allowed to bypass the cache for + blocks not in the cache. find_key_block() returns NULL in this + case. + + After the flush phase new I/O requests must wait until the + re-initialization is done. The re-initialization can be done only + if no I/O request is in progress. The reason is that + key_cache_block_size can change. With enabled cache, I/O is done + in chunks of key_cache_block_size. Every chunk tries to use a + cache block first. If the block size changes in the middle, a + block could be missed and old data could be read. + */ + while (keycache->in_resize && !keycache->resize_in_flush) + wait_on_queue(&keycache->resize_queue, &keycache->cache_lock); + /* Register the I/O for the next resize. */ + inc_counter_for_resize_op(keycache); + locked_and_incremented= TRUE; + /* Requested data may not always be aligned to cache blocks. */ + offset= (uint) (filepos % keycache->key_cache_block_size); + /* Read data in key_cache_block_size increments */ + do + { + /* Cache could be disabled in a later iteration. */ + if (!keycache->can_be_used) + { + KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used")); + goto no_key_cache; + } + /* Start reading at the beginning of the cache block. */ + filepos-= offset; + /* Do not read beyond the end of the cache block. */ + read_length= length; + set_if_smaller(read_length, keycache->key_cache_block_size-offset); + KEYCACHE_DBUG_ASSERT(read_length > 0); + + /* Request the cache block that matches file/pos. */ + keycache->global_cache_r_requests++; + + MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size); + + block=find_key_block(keycache, file, filepos, level, 0, &page_st); + if (!block) + { + /* + This happens only for requests submitted during key cache + resize. The block is not in the cache and shall not go in. + Read directly from file. + */ + keycache->global_cache_read++; + keycache_pthread_mutex_unlock(&keycache->cache_lock); + error= (my_pread(file, (uchar*) buff, read_length, + filepos + offset, MYF(MY_NABP)) != 0); + keycache_pthread_mutex_lock(&keycache->cache_lock); + goto next_block; + } + if (!(block->status & BLOCK_ERROR)) + { + if (page_st == PAGE_TO_BE_READ) + { + MYSQL_KEYCACHE_READ_MISS(); + read_block_primary(keycache, block, + keycache->key_cache_block_size, read_length+offset); + } + else if (page_st == PAGE_WAIT_TO_BE_READ) + { + MYSQL_KEYCACHE_READ_MISS(); + /* The requested page is to be read into the block buffer */ + read_block_secondary(keycache, block); + + /* + A secondary request must now have the block assigned to the + requested file block. + */ + DBUG_ASSERT(block->hash_link->file == file); + DBUG_ASSERT(block->hash_link->diskpos == filepos); + } + else if (block->length < read_length + offset) + { + /* + Impossible if nothing goes wrong: + this could only happen if we are using a file with + small key blocks and are trying to read outside the file + */ + my_errno= -1; + block->status|= BLOCK_ERROR; + } + else + { + MYSQL_KEYCACHE_READ_HIT(); + } + } + + /* block status may have added BLOCK_ERROR in the above 'if'. */ + if (!(block->status & BLOCK_ERROR)) + { + { + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); +#if !defined(SERIALIZED_READ_FROM_CACHE) + keycache_pthread_mutex_unlock(&keycache->cache_lock); +#endif + + /* Copy data from the cache buffer */ + memcpy(buff, block->buffer+offset, (size_t) read_length); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + keycache_pthread_mutex_lock(&keycache->cache_lock); + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); +#endif + } + } + + remove_reader(block); + + /* Error injection for coverage testing. */ + DBUG_EXECUTE_IF("key_cache_read_block_error", + block->status|= BLOCK_ERROR;); + + /* Do not link erroneous blocks into the LRU ring, but free them. */ + if (!(block->status & BLOCK_ERROR)) + { + /* + Link the block into the LRU ring if it's the last submitted + request for the block. This enables eviction for the block. + */ + unreg_request(keycache, block, 1); + } + else + { + free_block(keycache, block); + error= 1; + break; + } + + next_block: + buff+= read_length; + filepos+= read_length+offset; + offset= 0; + + } while ((length-= read_length)); + if (MYSQL_KEYCACHE_READ_DONE_ENABLED()) + { + MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } + goto end; + } + KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized")); + +no_key_cache: + /* Key cache is not used */ + + keycache->global_cache_r_requests++; + keycache->global_cache_read++; + + if (locked_and_incremented) + keycache_pthread_mutex_unlock(&keycache->cache_lock); + if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP))) + error= 1; + if (locked_and_incremented) + keycache_pthread_mutex_lock(&keycache->cache_lock); + +end: + if (locked_and_incremented) + { + dec_counter_for_resize_op(keycache); + keycache_pthread_mutex_unlock(&keycache->cache_lock); + } + DBUG_PRINT("exit", ("error: %d", error )); + DBUG_RETURN(error ? (uchar*) 0 : start); +} + + +/* + Insert a block of file data from a buffer into a simple key cache + + SYNOPSIS + simple_key_cache_insert() + keycache pointer to the control block of a simple key cache + file handler for the file to insert data from + filepos position of the block of data in the file to insert + level determines the weight of the data + buff buffer to read data from + length length of the data in the buffer + + DESCRIPTION + This function is the implementation of the key_cache_insert interface + function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key + cache. + The function writes a block of file data from a buffer into the key cache. + The buffer is specified with the parameters buff and length - the pointer + to the beginning of the buffer and its size respectively. It's assumed + the buffer contains the data from 'file' allocated from the position + filepos. The data is copied from the buffer in key_cache_block_size + increments. + The parameter level is used to set one characteristic for the key buffers + loaded with the data from buff. The characteristic is used only by the + midpoint insertion strategy. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + The function is used by MyISAM to move all blocks from a index file to + the key cache. It can be performed in parallel with reading the file data + from the key buffers by other threads. + +*/ + +static +int simple_key_cache_insert(SIMPLE_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length) +{ + int error= 0; + DBUG_ENTER("key_cache_insert"); + DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", + (uint) file,(ulong) filepos, length)); + + if (keycache->key_cache_inited) + { + /* Key cache is used */ + reg1 BLOCK_LINK *block; + uint read_length; + uint offset; + int page_st; + my_bool locked_and_incremented= FALSE; + + /* + When the keycache is once initialized, we use the cache_lock to + reliably distinguish the cases of normal operation, resizing, and + disabled cache. We always increment and decrement + 'cnt_for_resize_op' so that a resizer can wait for pending I/O. + */ + keycache_pthread_mutex_lock(&keycache->cache_lock); + /* + We do not load index data into a disabled cache nor into an + ongoing resize. + */ + if (!keycache->can_be_used || keycache->in_resize) + goto no_key_cache; + /* Register the pseudo I/O for the next resize. */ + inc_counter_for_resize_op(keycache); + locked_and_incremented= TRUE; + /* Loaded data may not always be aligned to cache blocks. */ + offset= (uint) (filepos % keycache->key_cache_block_size); + /* Load data in key_cache_block_size increments. */ + do + { + /* Cache could be disabled or resizing in a later iteration. */ + if (!keycache->can_be_used || keycache->in_resize) + goto no_key_cache; + /* Start loading at the beginning of the cache block. */ + filepos-= offset; + /* Do not load beyond the end of the cache block. */ + read_length= length; + set_if_smaller(read_length, keycache->key_cache_block_size-offset); + KEYCACHE_DBUG_ASSERT(read_length > 0); + + /* The block has been read by the caller already. */ + keycache->global_cache_read++; + /* Request the cache block that matches file/pos. */ + keycache->global_cache_r_requests++; + block= find_key_block(keycache, file, filepos, level, 0, &page_st); + if (!block) + { + /* + This happens only for requests submitted during key cache + resize. The block is not in the cache and shall not go in. + Stop loading index data. + */ + goto no_key_cache; + } + if (!(block->status & BLOCK_ERROR)) + { + if (page_st == PAGE_WAIT_TO_BE_READ) + { + /* + this is a secondary request for a block to be read into the + cache. The block is in eviction. It is not yet assigned to + the requested file block (It does not point to the right + hash_link). So we cannot call remove_reader() on the block. + And we cannot access the hash_link directly here. We need to + wait until the assignment is complete. read_block_secondary() + executes the correct wait. + */ + read_block_secondary(keycache, block); + + /* + A secondary request must now have the block assigned to the + requested file block. + */ + DBUG_ASSERT(block->hash_link->file == file); + DBUG_ASSERT(block->hash_link->diskpos == filepos); + } + else if (page_st == PAGE_TO_BE_READ && + (offset || (read_length < keycache->key_cache_block_size))) + { + /* + this is a primary request for a block to be read into the + cache and the supplied data does not fill the whole block. + + This function is called on behalf of a LOAD INDEX INTO CACHE + statement, which is a read-only task and allows other + readers. It is possible that a parallel running reader tries + to access this block. If it needs more data than has been + supplied here, it would report an error. To be sure that we + have all data in the block that is available in the file, we + read the block ourselves. + + Though reading again what the caller did read already is an + expensive operation, we need to do this for correctness. + */ + read_block_primary(keycache, block, keycache->key_cache_block_size, + read_length + offset); + } + else if (page_st == PAGE_TO_BE_READ) + { + /* + This is a new block in the cache. If we come here, we have + data for the whole block. + */ + DBUG_ASSERT(block->hash_link->requests); + DBUG_ASSERT(block->status & BLOCK_IN_USE); + DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || + (block->status & BLOCK_READ)); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + keycache_pthread_mutex_unlock(&keycache->cache_lock); + /* + Here other threads may step in and register as secondary readers. + They will register in block->wqueue[COND_FOR_REQUESTED]. + */ +#endif + + /* Copy data from buff */ + memcpy(block->buffer+offset, buff, (size_t) read_length); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + keycache_pthread_mutex_lock(&keycache->cache_lock); + DBUG_ASSERT(block->status & BLOCK_IN_USE); + DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || + (block->status & BLOCK_READ)); +#endif + /* + After the data is in the buffer, we can declare the block + valid. Now other threads do not need to register as + secondary readers any more. They can immediately access the + block. + */ + block->status|= BLOCK_READ; + block->length= read_length+offset; + /* + Do not set block->offset here. If this block is marked + BLOCK_CHANGED later, we want to flush only the modified part. So + only a writer may set block->offset down from + keycache->key_cache_block_size. + */ + KEYCACHE_DBUG_PRINT("key_cache_insert", + ("primary request: new page in cache")); + /* Signal all pending requests. */ + release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]); + } + else + { + /* + page_st == PAGE_READ. The block is in the buffer. All data + must already be present. Blocks are always read with all + data available on file. Assert that the block does not have + less contents than the preloader supplies. If the caller has + data beyond block->length, it means that a file write has + been done while this block was in cache and not extended + with the new data. If the condition is met, we can simply + ignore the block. + */ + DBUG_ASSERT((page_st == PAGE_READ) && + (read_length + offset <= block->length)); + } + + /* + A secondary request must now have the block assigned to the + requested file block. It does not hurt to check it for primary + requests too. + */ + DBUG_ASSERT(block->hash_link->file == file); + DBUG_ASSERT(block->hash_link->diskpos == filepos); + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + } /* end of if (!(block->status & BLOCK_ERROR)) */ + + remove_reader(block); + + /* Error injection for coverage testing. */ + DBUG_EXECUTE_IF("key_cache_insert_block_error", + block->status|= BLOCK_ERROR; errno=EIO;); + + /* Do not link erroneous blocks into the LRU ring, but free them. */ + if (!(block->status & BLOCK_ERROR)) + { + /* + Link the block into the LRU ring if it's the last submitted + request for the block. This enables eviction for the block. + */ + unreg_request(keycache, block, 1); + } + else + { + free_block(keycache, block); + error= 1; + break; + } + + buff+= read_length; + filepos+= read_length+offset; + offset= 0; + + } while ((length-= read_length)); + + no_key_cache: + if (locked_and_incremented) + dec_counter_for_resize_op(keycache); + keycache_pthread_mutex_unlock(&keycache->cache_lock); + } + DBUG_RETURN(error); +} + + +/* + Write a buffer into a simple key cache + + SYNOPSIS + + simple_key_cache_write() + keycache pointer to the control block of a simple key cache + file handler for the file to write data to + file_extra maps of key cache partitions containing + dirty pages from file + filepos position in the file to write data to + level determines the weight of the data + buff buffer with the data + length length of the buffer + dont_write if is 0 then all dirty pages involved in writing + should have been flushed from key cache + + DESCRIPTION + This function is the implementation of the key_cache_write interface + function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key + cache. + In a general case the function copies data from a buffer into the key + cache. The buffer is specified with the parameters buff and length - + the pointer to the beginning of the buffer and its size respectively. + It's assumed the buffer contains the data to be written into 'file' + starting from the position filepos. The data is copied from the buffer + in key_cache_block_size increments. + If the value of the parameter dont_write is FALSE then the function + also writes the data into file. + The parameter level is used to set one characteristic for the key buffers + filled with the data from buff. The characteristic is employed only by + the midpoint insertion strategy. + The parameter file_extra currently makes sense only for simple key caches + that are elements of a partitioned key cache. It provides a pointer to the + shared bitmap of the partitions that may contains dirty pages for the file. + This bitmap is used to optimize the function + flush_partitioned_key_cache_blocks. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. +*/ + +static +int simple_key_cache_write(SIMPLE_KEY_CACHE_CB *keycache, + File file, void *file_extra __attribute__((unused)), + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int dont_write) +{ + my_bool locked_and_incremented= FALSE; + int error=0; + DBUG_ENTER("simple_key_cache_write"); + DBUG_PRINT("enter", + ("fd: %u pos: %lu length: %u block_length: %u" + " key_block_length: %u", + (uint) file, (ulong) filepos, length, block_length, + keycache ? keycache->key_cache_block_size : 0)); + + if (!dont_write) + { + /* purecov: begin inspected */ + /* Not used in the server. */ + /* Force writing from buff into disk. */ + keycache->global_cache_w_requests++; + keycache->global_cache_write++; + if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL))) + DBUG_RETURN(1); + /* purecov: end */ + } + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_keycache", + test_key_cache(keycache, "start of key_cache_write", 1);); +#endif + + if (keycache->key_cache_inited) + { + /* Key cache is used */ + reg1 BLOCK_LINK *block; + uint read_length; + uint offset; + int page_st; + + if (MYSQL_KEYCACHE_WRITE_START_ENABLED()) + { + MYSQL_KEYCACHE_WRITE_START(my_filename(file), length, + (ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } + + /* + When the key cache is once initialized, we use the cache_lock to + reliably distinguish the cases of normal operation, resizing, and + disabled cache. We always increment and decrement + 'cnt_for_resize_op' so that a resizer can wait for pending I/O. + */ + keycache_pthread_mutex_lock(&keycache->cache_lock); + /* + Cache resizing has two phases: Flushing and re-initializing. In + the flush phase write requests can modify dirty blocks that are + not yet in flush. Otherwise they are allowed to bypass the cache. + find_key_block() returns NULL in both cases (clean blocks and + non-cached blocks). + + After the flush phase new I/O requests must wait until the + re-initialization is done. The re-initialization can be done only + if no I/O request is in progress. The reason is that + key_cache_block_size can change. With enabled cache I/O is done in + chunks of key_cache_block_size. Every chunk tries to use a cache + block first. If the block size changes in the middle, a block + could be missed and data could be written below a cached block. + */ + while (keycache->in_resize && !keycache->resize_in_flush) + wait_on_queue(&keycache->resize_queue, &keycache->cache_lock); + /* Register the I/O for the next resize. */ + inc_counter_for_resize_op(keycache); + locked_and_incremented= TRUE; + /* Requested data may not always be aligned to cache blocks. */ + offset= (uint) (filepos % keycache->key_cache_block_size); + /* Write data in key_cache_block_size increments. */ + do + { + /* Cache could be disabled in a later iteration. */ + if (!keycache->can_be_used) + goto no_key_cache; + + MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size); + /* Start writing at the beginning of the cache block. */ + filepos-= offset; + /* Do not write beyond the end of the cache block. */ + read_length= length; + set_if_smaller(read_length, keycache->key_cache_block_size-offset); + KEYCACHE_DBUG_ASSERT(read_length > 0); + + /* Request the cache block that matches file/pos. */ + keycache->global_cache_w_requests++; + block= find_key_block(keycache, file, filepos, level, 1, &page_st); + if (!block) + { + /* + This happens only for requests submitted during key cache + resize. The block is not in the cache and shall not go in. + Write directly to file. + */ + if (dont_write) + { + /* Used in the server. */ + keycache->global_cache_write++; + keycache_pthread_mutex_unlock(&keycache->cache_lock); + if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + error=1; + keycache_pthread_mutex_lock(&keycache->cache_lock); + } + goto next_block; + } + /* + Prevent block from flushing and from being selected for to be + freed. This must be set when we release the cache_lock. + However, we must not set the status of the block before it is + assigned to this file/pos. + */ + if (page_st != PAGE_WAIT_TO_BE_READ) + block->status|= BLOCK_FOR_UPDATE; + /* + We must read the file block first if it is not yet in the cache + and we do not replace all of its contents. + + In cases where the cache block is big enough to contain (parts + of) index blocks of different indexes, our request can be + secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is + reading the file block. If the read completes after us, it + overwrites our new contents with the old contents. So we have to + wait for the other thread to complete the read of this block. + read_block_primary|secondary() takes care for the wait. + */ + if (!(block->status & BLOCK_ERROR)) + { + if (page_st == PAGE_TO_BE_READ && + (offset || read_length < keycache->key_cache_block_size)) + { + read_block_primary(keycache, block, + offset + read_length >= keycache->key_cache_block_size? + offset : keycache->key_cache_block_size, + offset); + /* + Prevent block from flushing and from being selected for to be + freed. This must be set when we release the cache_lock. + Here we set it in case we could not set it above. + */ + block->status|= BLOCK_FOR_UPDATE; + } + else if (page_st == PAGE_WAIT_TO_BE_READ) + { + read_block_secondary(keycache, block); + block->status|= BLOCK_FOR_UPDATE; + } + } + /* + The block should always be assigned to the requested file block + here. It need not be BLOCK_READ when overwriting the whole block. + */ + DBUG_ASSERT(block->hash_link->file == file); + DBUG_ASSERT(block->hash_link->diskpos == filepos); + DBUG_ASSERT(block->status & BLOCK_IN_USE); + DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ)); + /* + The block to be written must not be marked BLOCK_REASSIGNED. + Otherwise it could be freed in dirty state or reused without + another flush during eviction. It must also not be in flush. + Otherwise the old contens may have been flushed already and + the flusher could clear BLOCK_CHANGED without flushing the + new changes again. + */ + DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED)); + + while (block->status & BLOCK_IN_FLUSHWRITE) + { + /* + Another thread is flushing the block. It was dirty already. + Wait until the block is flushed to file. Otherwise we could + modify the buffer contents just while it is written to file. + An unpredictable file block contents would be the result. + While we wait, several things can happen to the block, + including another flush. But the block cannot be reassigned to + another hash_link until we release our request on it. + */ + wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock); + DBUG_ASSERT(keycache->can_be_used); + DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); + /* Still must not be marked for free. */ + DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED)); + DBUG_ASSERT(block->hash_link && (block->hash_link->block == block)); + } + + /* + We could perhaps release the cache_lock during access of the + data like in the other functions. Locks outside of the key cache + assure that readers and a writer do not access the same range of + data. Parallel accesses should happen only if the cache block + contains multiple index block(fragment)s. So different parts of + the buffer would be read/written. An attempt to flush during + memcpy() is prevented with BLOCK_FOR_UPDATE. + */ + if (!(block->status & BLOCK_ERROR)) + { +#if !defined(SERIALIZED_READ_FROM_CACHE) + keycache_pthread_mutex_unlock(&keycache->cache_lock); +#endif + memcpy(block->buffer+offset, buff, (size_t) read_length); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + keycache_pthread_mutex_lock(&keycache->cache_lock); +#endif + } + + if (!dont_write) + { + /* Not used in the server. buff has been written to disk at start. */ + if ((block->status & BLOCK_CHANGED) && + (!offset && read_length >= keycache->key_cache_block_size)) + link_to_file_list(keycache, block, block->hash_link->file, 1); + } + else if (! (block->status & BLOCK_CHANGED)) + link_to_changed_list(keycache, block); + block->status|=BLOCK_READ; + /* + Allow block to be selected for to be freed. Since it is marked + BLOCK_CHANGED too, it won't be selected for to be freed without + a flush. + */ + block->status&= ~BLOCK_FOR_UPDATE; + set_if_smaller(block->offset, offset); + set_if_bigger(block->length, read_length+offset); + + /* Threads may be waiting for the changes to be complete. */ + release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]); + + /* + If only a part of the cache block is to be replaced, and the + rest has been read from file, then the cache lock has been + released for I/O and it could be possible that another thread + wants to evict or free the block and waits for it to be + released. So we must not just decrement hash_link->requests, but + also wake a waiting thread. + */ + remove_reader(block); + + /* Error injection for coverage testing. */ + DBUG_EXECUTE_IF("key_cache_write_block_error", + block->status|= BLOCK_ERROR;); + + /* Do not link erroneous blocks into the LRU ring, but free them. */ + if (!(block->status & BLOCK_ERROR)) + { + /* + Link the block into the LRU ring if it's the last submitted + request for the block. This enables eviction for the block. + */ + unreg_request(keycache, block, 1); + } + else + { + /* Pretend a "clean" block to avoid complications. */ + block->status&= ~(BLOCK_CHANGED); + free_block(keycache, block); + error= 1; + break; + } + + next_block: + buff+= read_length; + filepos+= read_length+offset; + offset= 0; + + } while ((length-= read_length)); + goto end; + } + +no_key_cache: + /* Key cache is not used */ + if (dont_write) + { + /* Used in the server. */ + keycache->global_cache_w_requests++; + keycache->global_cache_write++; + if (locked_and_incremented) + keycache_pthread_mutex_unlock(&keycache->cache_lock); + if (my_pwrite(file, (uchar*) buff, length, filepos, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + error=1; + if (locked_and_incremented) + keycache_pthread_mutex_lock(&keycache->cache_lock); + } + +end: + if (locked_and_incremented) + { + dec_counter_for_resize_op(keycache); + keycache_pthread_mutex_unlock(&keycache->cache_lock); + } + + if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED()) + { + MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("exec", + test_key_cache(keycache, "end of key_cache_write", 1);); +#endif + DBUG_RETURN(error); +} + + +/* + Free block. + + SYNOPSIS + free_block() + keycache Pointer to a key cache data structure + block Pointer to the block to free + + DESCRIPTION + Remove reference to block from hash table. + Remove block from the chain of clean blocks. + Add block to the free list. + + NOTE + Block must not be free (status == 0). + Block must not be in free_block_list. + Block must not be in the LRU ring. + Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH). + Block must not be in free (BLOCK_REASSIGNED). + Block must not be in flush (BLOCK_IN_FLUSH). + Block must not be dirty (BLOCK_CHANGED). + Block must not be in changed_blocks (dirty) hash. + Block must be in file_blocks (clean) hash. + Block must refer to a hash_link. + Block must have a request registered on it. +*/ + +static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block) +{ + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block %u to be freed, hash_link %p status: %u", + BLOCK_NUMBER(block), block->hash_link, + block->status)); + /* + Assert that the block is not free already. And that it is in a clean + state. Note that the block might just be assigned to a hash_link and + not yet read (BLOCK_READ may not be set here). In this case a reader + is registered in the hash_link and free_block() will wait for it + below. + */ + DBUG_ASSERT((block->status & BLOCK_IN_USE) && + !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | + BLOCK_REASSIGNED | BLOCK_IN_FLUSH | + BLOCK_CHANGED | BLOCK_FOR_UPDATE))); + /* Assert that the block is in a file_blocks chain. */ + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + /* Assert that the block is not in the LRU ring. */ + DBUG_ASSERT(!block->next_used && !block->prev_used); + /* + IMHO the below condition (if()) makes no sense. I can't see how it + could be possible that free_block() is entered with a NULL hash_link + pointer. The only place where it can become NULL is in free_block() + (or before its first use ever, but for those blocks free_block() is + not called). I don't remove the conditional as it cannot harm, but + place an DBUG_ASSERT to confirm my hypothesis. Eventually the + condition (if()) can be removed. + */ + DBUG_ASSERT(block->hash_link && block->hash_link->block == block); + if (block->hash_link) + { + /* + While waiting for readers to finish, new readers might request the + block. But since we set block->status|= BLOCK_REASSIGNED, they + will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled + later. + */ + block->status|= BLOCK_REASSIGNED; + wait_for_readers(keycache, block); + /* + The block must not have been freed by another thread. Repeat some + checks. An additional requirement is that it must be read now + (BLOCK_READ). + */ + DBUG_ASSERT(block->hash_link && block->hash_link->block == block); + DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE | + BLOCK_REASSIGNED)) && + !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | + BLOCK_IN_FLUSH | BLOCK_CHANGED | + BLOCK_FOR_UPDATE))); + DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); + DBUG_ASSERT(!block->prev_used); + /* + Unset BLOCK_REASSIGNED again. If we hand the block to an evicting + thread (through unreg_request() below), other threads must not see + this flag. They could become confused. + */ + block->status&= ~BLOCK_REASSIGNED; + /* + Do not release the hash_link until the block is off all lists. + At least not if we hand it over for eviction in unreg_request(). + */ + } + + /* + Unregister the block request and link the block into the LRU ring. + This enables eviction for the block. If the LRU ring was empty and + threads are waiting for a block, then the block wil be handed over + for eviction immediately. Otherwise we will unlink it from the LRU + ring again, without releasing the lock in between. So decrementing + the request counter and updating statistics are the only relevant + operation in this case. Assert that there are no other requests + registered. + */ + DBUG_ASSERT(block->requests == 1); + unreg_request(keycache, block, 0); + /* + Note that even without releasing the cache lock it is possible that + the block is immediately selected for eviction by link_block() and + thus not added to the LRU ring. In this case we must not touch the + block any more. + */ + if (block->status & BLOCK_IN_EVICTION) + return; + + /* Error blocks are not put into the LRU ring. */ + if (!(block->status & BLOCK_ERROR)) + { + /* Here the block must be in the LRU ring. Unlink it again. */ + DBUG_ASSERT(block->next_used && block->prev_used && + *block->prev_used == block); + unlink_block(keycache, block); + } + if (block->temperature == BLOCK_WARM) + keycache->warm_blocks--; + block->temperature= BLOCK_COLD; + + /* Remove from file_blocks hash. */ + unlink_changed(block); + + /* Remove reference to block from hash table. */ + unlink_hash(keycache, block->hash_link); + block->hash_link= NULL; + + block->status= 0; + block->length= 0; + block->offset= keycache->key_cache_block_size; + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", ("block is freed")); + + /* Enforced by unlink_changed(), but just to be sure. */ + DBUG_ASSERT(!block->next_changed && !block->prev_changed); + /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */ + DBUG_ASSERT(!block->next_used && !block->prev_used); + /* Insert the free block in the free list. */ + block->next_used= keycache->free_block_list; + keycache->free_block_list= block; + /* Keep track of the number of currently unused blocks. */ + keycache->blocks_unused++; + + /* All pending requests for this page must be resubmitted. */ + release_whole_queue(&block->wqueue[COND_FOR_SAVED]); +} + + +static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b) +{ + return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 : + ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0); +} + + +/* + Flush a portion of changed blocks to disk, + free used blocks if requested +*/ + +static int flush_cached_blocks(SIMPLE_KEY_CACHE_CB *keycache, + File file, BLOCK_LINK **cache, + BLOCK_LINK **end, + enum flush_type type) +{ + int error; + int last_errno= 0; + uint count= (uint) (end-cache); + + /* Don't lock the cache during the flush */ + keycache_pthread_mutex_unlock(&keycache->cache_lock); + /* + As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH + we are guarunteed no thread will change them + */ + my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); + + keycache_pthread_mutex_lock(&keycache->cache_lock); + /* + Note: Do not break the loop. We have registered a request on every + block in 'cache'. These must be unregistered by free_block() or + unreg_request(). + */ + for ( ; cache != end ; cache++) + { + BLOCK_LINK *block= *cache; + + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block %u to be flushed", BLOCK_NUMBER(block))); + /* + If the block contents is going to be changed, we abandon the flush + for this block. flush_key_blocks_int() will restart its search and + handle the block properly. + */ + if (!(block->status & BLOCK_FOR_UPDATE)) + { + /* Blocks coming here must have a certain status. */ + DBUG_ASSERT(block->hash_link); + DBUG_ASSERT(block->hash_link->block == block); + DBUG_ASSERT(block->hash_link->file == file); + DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) == + (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE)); + block->status|= BLOCK_IN_FLUSHWRITE; + keycache_pthread_mutex_unlock(&keycache->cache_lock); + error= (int)my_pwrite(file, block->buffer + block->offset, + block->length - block->offset, + block->hash_link->diskpos + block->offset, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + keycache_pthread_mutex_lock(&keycache->cache_lock); + keycache->global_cache_write++; + if (error) + { + block->status|= BLOCK_ERROR; + if (!last_errno) + last_errno= errno ? errno : -1; + } + block->status&= ~BLOCK_IN_FLUSHWRITE; + /* Block must not have changed status except BLOCK_FOR_UPDATE. */ + DBUG_ASSERT(block->hash_link); + DBUG_ASSERT(block->hash_link->block == block); + DBUG_ASSERT(block->hash_link->file == file); + DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) == + (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE)); + /* + Set correct status and link in right queue for free or later use. + free_block() must not see BLOCK_CHANGED and it may need to wait + for readers of the block. These should not see the block in the + wrong hash. If not freeing the block, we need to have it in the + right queue anyway. + */ + link_to_file_list(keycache, block, file, 1); + } + block->status&= ~BLOCK_IN_FLUSH; + /* + Let to proceed for possible waiting requests to write to the block page. + It might happen only during an operation to resize the key cache. + */ + release_whole_queue(&block->wqueue[COND_FOR_SAVED]); + /* type will never be FLUSH_IGNORE_CHANGED here */ + if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) && + !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | + BLOCK_FOR_UPDATE))) + { + /* + Note that a request has been registered against the block in + flush_key_blocks_int(). + */ + free_block(keycache, block); + } + else + { + /* + Link the block into the LRU ring if it's the last submitted + request for the block. This enables eviction for the block. + Note that a request has been registered against the block in + flush_key_blocks_int(). + */ + unreg_request(keycache, block, 1); + } + + } /* end of for ( ; cache != end ; cache++) */ + return last_errno; +} + + +/* + Flush all key blocks for a file to disk, but don't do any mutex locks + + SYNOPSIS + flush_key_blocks_int() + keycache pointer to a key cache data structure + file handler for the file to flush to + flush_type type of the flush + + NOTES + This function doesn't do any mutex locks because it needs to be called both + from flush_key_blocks and flush_all_key_blocks (the later one does the + mutex lock in the resize_key_cache() function). + + We do only care about changed blocks that exist when the function is + entered. We do not guarantee that all changed blocks of the file are + flushed if more blocks change while this function is running. + + RETURN + 0 ok + 1 error +*/ + +static int flush_key_blocks_int(SIMPLE_KEY_CACHE_CB *keycache, + File file, enum flush_type type) +{ + BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; + int last_errno= 0; + int last_errcnt= 0; + DBUG_ENTER("flush_key_blocks_int"); + DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu", + file, keycache->blocks_used, keycache->blocks_changed)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_keycache", + test_key_cache(keycache, "start of flush_key_blocks", 0);); +#endif + + DBUG_ASSERT(type != FLUSH_KEEP_LAZY); + cache= cache_buff; + if (keycache->disk_blocks > 0 && + (!my_disable_flush_key_blocks || type != FLUSH_KEEP)) + { + /* Key cache exists and flush is not disabled */ + int error= 0; + uint count= FLUSH_CACHE; + BLOCK_LINK **pos,**end; + BLOCK_LINK *first_in_switch= NULL; + BLOCK_LINK *last_in_flush; + BLOCK_LINK *last_for_update; + BLOCK_LINK *block, *next; +#if defined(KEYCACHE_DEBUG) + uint cnt=0; +#endif + + if (type != FLUSH_IGNORE_CHANGED) + { + /* + Count how many key blocks we have to cache to be able + to flush all dirty pages with minimum seek moves + */ + count= 0; + for (block= keycache->changed_blocks[FILE_HASH(file, keycache)] ; + block ; + block= block->next_changed) + { + if ((block->hash_link->file == file) && + !(block->status & BLOCK_IN_FLUSH)) + { + count++; + KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used); + } + } + /* + Allocate a new buffer only if its bigger than the one we have. + Assure that we always have some entries for the case that new + changed blocks appear while we need to wait for something. + */ + if ((count > FLUSH_CACHE) && + !(cache= (BLOCK_LINK**) my_malloc(key_memory_KEY_CACHE, + sizeof(BLOCK_LINK*)*count, MYF(0)))) + cache= cache_buff; + /* + After a restart there could be more changed blocks than now. + So we should not let count become smaller than the fixed buffer. + */ + if (cache == cache_buff) + count= FLUSH_CACHE; + } + + /* Retrieve the blocks and write them to a buffer to be flushed */ +restart: + last_in_flush= NULL; + last_for_update= NULL; + end= (pos= cache)+count; + for (block= keycache->changed_blocks[FILE_HASH(file, keycache)] ; + block ; + block= next) + { +#if defined(KEYCACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file == file) + { + if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE))) + { + /* + Note: The special handling of BLOCK_IN_SWITCH is obsolete + since we set BLOCK_IN_FLUSH if the eviction includes a + flush. It can be removed in a later version. + */ + if (!(block->status & BLOCK_IN_SWITCH)) + { + /* + We care only for the blocks for which flushing was not + initiated by another thread and which are not in eviction. + Registering a request on the block unlinks it from the LRU + ring and protects against eviction. + */ + reg_requests(keycache, block, 1); + if (type != FLUSH_IGNORE_CHANGED) + { + /* It's not a temporary file */ + if (pos == end) + { + /* + This should happen relatively seldom. Remove the + request because we won't do anything with the block + but restart and pick it again in the next iteration. + */ + unreg_request(keycache, block, 0); + /* + This happens only if there is not enough + memory for the big block + */ + if ((error= flush_cached_blocks(keycache, file, cache, + end,type))) + { + /* Do not loop infinitely trying to flush in vain. */ + if ((last_errno == error) && (++last_errcnt > 5)) + goto err; + last_errno= error; + } + /* + Restart the scan as some other thread might have changed + the changed blocks chain: the blocks that were in switch + state before the flush started have to be excluded + */ + goto restart; + } + /* + Mark the block with BLOCK_IN_FLUSH in order not to let + other threads to use it for new pages and interfere with + our sequence of flushing dirty file pages. We must not + set this flag before actually putting the block on the + write burst array called 'cache'. + */ + block->status|= BLOCK_IN_FLUSH; + /* Add block to the array for a write burst. */ + *pos++= block; + } + else + { + /* It's a temporary file */ + DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED)); + /* + free_block() must not be called with BLOCK_CHANGED. Note + that we must not change the BLOCK_CHANGED flag outside of + link_to_file_list() so that it is always in the correct + queue and the *blocks_changed counters are correct. + */ + link_to_file_list(keycache, block, file, 1); + if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) + { + /* A request has been registered against the block above. */ + free_block(keycache, block); + } + else + { + /* + Link the block into the LRU ring if it's the last + submitted request for the block. This enables eviction + for the block. A request has been registered against + the block above. + */ + unreg_request(keycache, block, 1); + } + } + } + else + { + /* + Link the block into a list of blocks 'in switch'. + + WARNING: Here we introduce a place where a changed block + is not in the changed_blocks hash! This is acceptable for + a BLOCK_IN_SWITCH. Never try this for another situation. + Other parts of the key cache code rely on changed blocks + being in the changed_blocks hash. + */ + unlink_changed(block); + link_changed(block, &first_in_switch); + } + } + else if (type != FLUSH_KEEP) + { + /* + During the normal flush at end of statement (FLUSH_KEEP) we + do not need to ensure that blocks in flush or update by + other threads are flushed. They will be flushed by them + later. In all other cases we must assure that we do not have + any changed block of this file in the cache when this + function returns. + */ + if (block->status & BLOCK_IN_FLUSH) + { + /* Remember the last block found to be in flush. */ + last_in_flush= block; + } + else + { + /* Remember the last block found to be selected for update. */ + last_for_update= block; + } + } + } + } + if (pos != cache) + { + if ((error= flush_cached_blocks(keycache, file, cache, pos, type))) + { + /* Do not loop inifnitely trying to flush in vain. */ + if ((last_errno == error) && (++last_errcnt > 5)) + goto err; + last_errno= error; + } + /* + Do not restart here during the normal flush at end of statement + (FLUSH_KEEP). We have now flushed at least all blocks that were + changed when entering this function. In all other cases we must + assure that we do not have any changed block of this file in the + cache when this function returns. + */ + if (type != FLUSH_KEEP) + goto restart; + } + if (last_in_flush) + { + /* + There are no blocks to be flushed by this thread, but blocks in + flush by other threads. Wait until one of the blocks is flushed. + Re-check the condition for last_in_flush. We may have unlocked + the cache_lock in flush_cached_blocks(). The state of the block + could have changed. + */ + if (last_in_flush->status & BLOCK_IN_FLUSH) + wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED], + &keycache->cache_lock); + /* Be sure not to lose a block. They may be flushed in random order. */ + goto restart; + } + if (last_for_update) + { + /* + There are no blocks to be flushed by this thread, but blocks for + update by other threads. Wait until one of the blocks is updated. + Re-check the condition for last_for_update. We may have unlocked + the cache_lock in flush_cached_blocks(). The state of the block + could have changed. + */ + if (last_for_update->status & BLOCK_FOR_UPDATE) + wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED], + &keycache->cache_lock); + /* The block is now changed. Flush it. */ + goto restart; + } + + /* + Wait until the list of blocks in switch is empty. The threads that + are switching these blocks will relink them to clean file chains + while we wait and thus empty the 'first_in_switch' chain. + */ + while (first_in_switch) + { +#if defined(KEYCACHE_DEBUG) + cnt= 0; +#endif + wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED], + &keycache->cache_lock); +#if defined(KEYCACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used); +#endif + /* + Do not restart here. We have flushed all blocks that were + changed when entering this function and were not marked for + eviction. Other threads have now flushed all remaining blocks in + the course of their eviction. + */ + } + + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { + BLOCK_LINK *last_in_switch= NULL; + uint total_found= 0; + uint found; + last_for_update= NULL; + + /* + Finally free all clean blocks for this file. + During resize this may be run by two threads in parallel. + */ + do + { + found= 0; + for (block= keycache->file_blocks[FILE_HASH(file, keycache)] ; + block ; + block= next) + { + /* Remember the next block. After freeing we cannot get at it. */ + next= block->next_changed; + + /* Changed blocks cannot appear in the file_blocks hash. */ + DBUG_ASSERT(!(block->status & BLOCK_CHANGED)); + if (block->hash_link->file == file) + { + /* We must skip blocks that will be changed. */ + if (block->status & BLOCK_FOR_UPDATE) + { + last_for_update= block; + continue; + } + + /* + We must not free blocks in eviction (BLOCK_IN_EVICTION | + BLOCK_IN_SWITCH) or blocks intended to be freed + (BLOCK_REASSIGNED). + */ + if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | + BLOCK_REASSIGNED))) + { + struct st_hash_link *UNINIT_VAR(next_hash_link); + my_off_t UNINIT_VAR(next_diskpos); + File UNINIT_VAR(next_file); + uint UNINIT_VAR(next_status); + uint UNINIT_VAR(hash_requests); + + total_found++; + found++; + KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used); + + /* + Register a request. This unlinks the block from the LRU + ring and protects it against eviction. This is required + by free_block(). + */ + reg_requests(keycache, block, 1); + + /* + free_block() may need to wait for readers of the block. + This is the moment where the other thread can move the + 'next' block from the chain. free_block() needs to wait + if there are requests for the block pending. + */ + if (next && (hash_requests= block->hash_link->requests)) + { + /* Copy values from the 'next' block and its hash_link. */ + next_status= next->status; + next_hash_link= next->hash_link; + next_diskpos= next_hash_link->diskpos; + next_file= next_hash_link->file; + DBUG_ASSERT(next == next_hash_link->block); + } + + free_block(keycache, block); + /* + If we had to wait and the state of the 'next' block + changed, break the inner loop. 'next' may no longer be + part of the current chain. + + We do not want to break the loop after every free_block(), + not even only after waits. The chain might be quite long + and contain blocks for many files. Traversing it again and + again to find more blocks for this file could become quite + inefficient. + */ + if (next && hash_requests && + ((next_status != next->status) || + (next_hash_link != next->hash_link) || + (next_file != next_hash_link->file) || + (next_diskpos != next_hash_link->diskpos) || + (next != next_hash_link->block))) + break; + } + else + { + last_in_switch= block; + } + } + } /* end for block in file_blocks */ + } while (found); + + /* + If any clean block has been found, we may have waited for it to + become free. In this case it could be possible that another clean + block became dirty. This is possible if the write request existed + before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes. + */ + if (total_found) + goto restart; + + /* + To avoid an infinite loop, wait until one of the blocks marked + for update is updated. + */ + if (last_for_update) + { + /* We did not wait. Block must not have changed status. */ + DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE); + wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED], + &keycache->cache_lock); + goto restart; + } + + /* + To avoid an infinite loop wait until one of the blocks marked + for eviction is switched. + */ + if (last_in_switch) + { + /* We did not wait. Block must not have changed status. */ + DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION | + BLOCK_IN_SWITCH | + BLOCK_REASSIGNED)); + wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED], + &keycache->cache_lock); + goto restart; + } + + } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */ + + } /* if (keycache->disk_blocks > 0 */ + + DBUG_EXECUTE("check_keycache", + test_key_cache(keycache, "end of flush_key_blocks", 0);); +err: + if (cache != cache_buff) + my_free(cache); + if (last_errno) + errno=last_errno; /* Return first error */ + DBUG_RETURN(last_errno != 0); +} + + +/* + Flush all blocks for a file from key buffers of a simple key cache + + SYNOPSIS + + flush_simple_key_blocks() + keycache pointer to the control block of a simple key cache + file handler for the file to flush to + file_extra maps of key cache partitions containing + dirty pages from file (not used) + flush_type type of the flush operation + + DESCRIPTION + This function is the implementation of the flush_key_blocks interface + function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key + cache. + In a general case the function flushes the data from all dirty key + buffers related to the file 'file' into this file. The function does + exactly this if the value of the parameter type is FLUSH_KEEP. If the + value of this parameter is FLUSH_RELEASE, the function additionally + releases the key buffers containing data from 'file' for new usage. + If the value of the parameter type is FLUSH_IGNORE_CHANGED the function + just releases the key buffers containing data from 'file'. + The parameter file_extra currently is not used by this function. + + RETURN + 0 ok + 1 error + + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. +*/ + +static +int flush_simple_key_cache_blocks(SIMPLE_KEY_CACHE_CB *keycache, + File file, + void *file_extra __attribute__((unused)), + enum flush_type type) +{ + int res= 0; + DBUG_ENTER("flush_key_blocks"); + DBUG_PRINT("enter", ("keycache: %p", keycache)); + + if (!keycache->key_cache_inited) + DBUG_RETURN(0); + + keycache_pthread_mutex_lock(&keycache->cache_lock); + /* While waiting for lock, keycache could have been ended. */ + if (keycache->disk_blocks > 0) + { + inc_counter_for_resize_op(keycache); + res= flush_key_blocks_int(keycache, file, type); + dec_counter_for_resize_op(keycache); + } + keycache_pthread_mutex_unlock(&keycache->cache_lock); + DBUG_RETURN(res); +} + + +/* + Flush all blocks in the key cache to disk. + + SYNOPSIS + flush_all_key_blocks() + keycache pointer to key cache root structure + + DESCRIPTION + + Flushing of the whole key cache is done in two phases. + + 1. Flush all changed blocks, waiting for them if necessary. Loop + until there is no changed block left in the cache. + + 2. Free all clean blocks. Normally this means free all blocks. The + changed blocks were flushed in phase 1 and became clean. However we + may need to wait for blocks that are read by other threads. While we + wait, a clean block could become changed if that operation started + before the resize operation started. To be safe we must restart at + phase 1. + + When we can run through the changed_blocks and file_blocks hashes + without finding a block any more, then we are done. + + Note that we hold keycache->cache_lock all the time unless we need + to wait for something. + + RETURN + 0 OK + != 0 Error +*/ + +static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache) +{ + BLOCK_LINK *block; + uint total_found; + uint found; + uint idx; + uint changed_blocks_hash_size= keycache->changed_blocks_hash_size; + DBUG_ENTER("flush_all_key_blocks"); + + do + { + mysql_mutex_assert_owner(&keycache->cache_lock); + total_found= 0; + + /* + Phase1: Flush all changed blocks, waiting for them if necessary. + Loop until there is no changed block left in the cache. + */ + do + { + found= 0; + /* Step over the whole changed_blocks hash array. */ + for (idx= 0; idx < changed_blocks_hash_size; idx++) + { + /* + If an array element is non-empty, use the first block from its + chain to find a file for flush. All changed blocks for this + file are flushed. So the same block will not appear at this + place again with the next iteration. New writes for blocks are + not accepted during the flush. If multiple files share the + same hash bucket, one of them will be flushed per iteration + of the outer loop of phase 1. + */ + while ((block= keycache->changed_blocks[idx])) + { + found++; + /* + Flush dirty blocks but do not free them yet. They can be used + for reading until all other blocks are flushed too. + */ + if (flush_key_blocks_int(keycache, block->hash_link->file, + FLUSH_FORCE_WRITE)) + DBUG_RETURN(1); + } + } + } while (found); + + /* + Phase 2: Free all clean blocks. Normally this means free all + blocks. The changed blocks were flushed in phase 1 and became + clean. However we may need to wait for blocks that are read by + other threads. While we wait, a clean block could become changed + if that operation started before the resize operation started. To + be safe we must restart at phase 1. + */ + do + { + found= 0; + /* Step over the whole file_blocks hash array. */ + for (idx= 0; idx < changed_blocks_hash_size; idx++) + { + /* + If an array element is non-empty, use the first block from its + chain to find a file for flush. All blocks for this file are + freed. So the same block will not appear at this place again + with the next iteration. If multiple files share the + same hash bucket, one of them will be flushed per iteration + of the outer loop of phase 2. + */ + while ((block= keycache->file_blocks[idx])) + { + total_found++; + found++; + if (flush_key_blocks_int(keycache, block->hash_link->file, + FLUSH_RELEASE)) + DBUG_RETURN(1); + } + } + } while (found); + + /* + If any clean block has been found, we may have waited for it to + become free. In this case it could be possible that another clean + block became dirty. This is possible if the write request existed + before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes. + */ + } while (total_found); + +#ifndef DBUG_OFF + /* Now there should not exist any block any more. */ + for (idx= 0; idx < changed_blocks_hash_size; idx++) + { + DBUG_ASSERT(!keycache->changed_blocks[idx]); + DBUG_ASSERT(!keycache->file_blocks[idx]); + } +#endif + + DBUG_RETURN(0); +} + + +/* + Reset the counters of a simple key cache + + SYNOPSIS + reset_simple_key_cache_counters() + name the name of a key cache + keycache pointer to the control block of a simple key cache + + DESCRIPTION + This function is the implementation of the reset_key_cache_counters + interface function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key cache. + This function resets the values of all statistical counters for the key + cache to 0. + The parameter name is currently not used. + + RETURN + 0 on success (always because it can't fail) +*/ + +static +int reset_simple_key_cache_counters(const char *name __attribute__((unused)), + SIMPLE_KEY_CACHE_CB *keycache) +{ + DBUG_ENTER("reset_simple_key_cache_counters"); + if (!keycache->key_cache_inited) + { + DBUG_PRINT("info", ("Key cache %s not initialized.", name)); + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); + + keycache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + keycache->global_cache_r_requests= 0; /* Key_read_requests */ + keycache->global_cache_read= 0; /* Key_reads */ + keycache->global_cache_w_requests= 0; /* Key_write_requests */ + keycache->global_cache_write= 0; /* Key_writes */ + DBUG_RETURN(0); +} + + +#ifndef DBUG_OFF +/* + Test if disk-cache is ok +*/ +static +void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache __attribute__((unused)), + const char *where __attribute__((unused)), + my_bool lock __attribute__((unused))) +{ + /* TODO */ +} +#endif + +#if defined(KEYCACHE_TIMEOUT) + +#define KEYCACHE_DUMP_FILE "keycache_dump.txt" +#define MAX_QUEUE_LEN 100 + + +static void keycache_dump(SIMPLE_KEY_CACHE_CB *keycache) +{ + FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); + struct st_my_thread_var *last; + struct st_my_thread_var *thread; + BLOCK_LINK *block; + HASH_LINK *hash_link; + KEYCACHE_PAGE *page; + uint i; + + fprintf(keycache_dump_file, "thread:%lu\n", (ulong) thread->id); + + i=0; + thread=last=waiting_for_hash_link.last_thread; + fprintf(keycache_dump_file, "queue of threads waiting for hash link\n"); + if (thread) + do + { + thread=thread->next; + page= (KEYCACHE_PAGE *) thread->keycache_link; + fprintf(keycache_dump_file, + "thread:%lu, (file,filepos)=(%u,%lu)\n", + (ulong) thread->id,(uint) page->file,(ulong) page->filepos); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + i=0; + thread=last=waiting_for_block.last_thread; + fprintf(keycache_dump_file, "queue of threads waiting for block\n"); + if (thread) + do + { + thread=thread->next; + hash_link= (HASH_LINK *) thread->keycache_link; + fprintf(keycache_dump_file, + "thread:%lu hash_link:%u (file,filepos)=(%u,%lu)\n", + (ulong) thread->id, (uint) HASH_LINK_NUMBER(hash_link), + (uint) hash_link->file,(ulong) hash_link->diskpos); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + for (i=0 ; i< keycache->blocks_used ; i++) + { + int j; + block= &keycache->block_root[i]; + hash_link= block->hash_link; + fprintf(keycache_dump_file, + "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n", + i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1), + block->status, block->requests, block->condvar ? 1 : 0); + for (j=0 ; j < 2; j++) + { + KEYCACHE_WQUEUE *wqueue=&block->wqueue[j]; + thread= last= wqueue->last_thread; + fprintf(keycache_dump_file, "queue #%d\n", j); + if (thread) + { + do + { + thread=thread->next; + fprintf(keycache_dump_file, + "thread:%lu\n", (ulong) thread->id); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + } + } + } + fprintf(keycache_dump_file, "LRU chain:"); + block= keycache= used_last; + if (block) + { + do + { + block= block->next_used; + fprintf(keycache_dump_file, + "block:%u, ", BLOCK_NUMBER(block)); + } + while (block != keycache->used_last); + } + fprintf(keycache_dump_file, "\n"); + + fclose(keycache_dump_file); +} + +#endif /* defined(KEYCACHE_TIMEOUT) */ + +#if defined(KEYCACHE_TIMEOUT) && !defined(_WIN32) + + +static int keycache_pthread_cond_wait(mysql_cond_t *cond, + mysql_mutex_t *mutex) +{ + int rc; + struct timeval now; /* time when we started waiting */ + struct timespec timeout; /* timeout value for the wait function */ + struct timezone tz; +#if defined(KEYCACHE_DEBUG) + int cnt=0; +#endif + + /* Get current time */ + gettimeofday(&now, &tz); + /* Prepare timeout value */ + timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT; + /* + timeval uses microseconds. + timespec uses nanoseconds. + 1 nanosecond = 1000 micro seconds + */ + timeout.tv_nsec= now.tv_usec * 1000; + KEYCACHE_THREAD_TRACE_END("started waiting"); +#if defined(KEYCACHE_DEBUG) + cnt++; + if (cnt % 100 == 0) + fprintf(keycache_debug_log, "waiting...\n"); + fflush(keycache_debug_log); +#endif + rc= mysql_cond_timedwait(cond, mutex, &timeout); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + if (rc == ETIMEDOUT || rc == ETIME) + { +#if defined(KEYCACHE_DEBUG) + fprintf(keycache_debug_log,"aborted by keycache timeout\n"); + fclose(keycache_debug_log); + abort(); +#endif + keycache_dump(); + } + +#if defined(KEYCACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT); +#else + assert(rc != ETIMEDOUT); +#endif + return rc; +} +#else +#if defined(KEYCACHE_DEBUG) +static int keycache_pthread_cond_wait(mysql_cond_t *cond, + mysql_mutex_t *mutex) +{ + int rc; + KEYCACHE_THREAD_TRACE_END("started waiting"); + rc= mysql_cond_wait(cond, mutex); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + return rc; +} +#endif +#endif /* defined(KEYCACHE_TIMEOUT) && !defined(_WIN32) */ + +#if defined(KEYCACHE_DEBUG) + + +static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex) +{ + int rc; + rc= mysql_mutex_lock(mutex); + KEYCACHE_THREAD_TRACE_BEGIN(""); + return rc; +} + + +static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex) +{ + KEYCACHE_THREAD_TRACE_END(""); + mysql_mutex_unlock(mutex); +} + + +static int keycache_pthread_cond_signal(mysql_cond_t *cond) +{ + int rc; + KEYCACHE_THREAD_TRACE("signal"); + rc= mysql_cond_signal(cond); + return rc; +} + + +#if defined(KEYCACHE_DEBUG_LOG) + + +static void keycache_debug_print(const char * fmt,...) +{ + va_list args; + va_start(args,fmt); + if (keycache_debug_log) + { + (void) vfprintf(keycache_debug_log, fmt, args); + (void) fputc('\n',keycache_debug_log); + } + va_end(args); +} +#endif /* defined(KEYCACHE_DEBUG_LOG) */ + +#if defined(KEYCACHE_DEBUG_LOG) + + +void keycache_debug_log_close(void) +{ + if (keycache_debug_log) + fclose(keycache_debug_log); +} +#endif /* defined(KEYCACHE_DEBUG_LOG) */ + +#endif /* defined(KEYCACHE_DEBUG) */ + +#ifdef DBUG_ASSERT_EXISTS +#define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_)) + +static int fail_block(BLOCK_LINK *block __attribute__((unused))) +{ +#ifndef DBUG_OFF + F_B_PRT("block->next_used: %p\n", block->next_used); + F_B_PRT("block->prev_used: %p\n", block->prev_used); + F_B_PRT("block->next_changed: %p\n", block->next_changed); + F_B_PRT("block->prev_changed: %p\n", block->prev_changed); + F_B_PRT("block->hash_link: %p\n", block->hash_link); + F_B_PRT("block->status: %u\n", block->status); + F_B_PRT("block->length: %u\n", block->length); + F_B_PRT("block->offset: %u\n", block->offset); + F_B_PRT("block->requests: %u\n", block->requests); + F_B_PRT("block->temperature: %u\n", block->temperature); +#endif + return 0; /* Let the assert fail. */ +} +#endif + +#ifndef DBUG_OFF +static int fail_hlink(HASH_LINK *hlink __attribute__((unused))) +{ + F_B_PRT("hlink->next: %p\n", hlink->next); + F_B_PRT("hlink->prev: %p\n", hlink->prev); + F_B_PRT("hlink->block: %p\n", hlink->block); + F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos); + F_B_PRT("hlink->file: %d\n", hlink->file); + return 0; /* Let the assert fail. */ +} + +static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache) +{ + int errcnt= 0; + int idx; + if (keycache->disk_blocks <= 0) + return 1; + for (idx= 0; idx < keycache->disk_blocks; idx++) + { + BLOCK_LINK *block= keycache->block_root + idx; + if (block->status || block->requests || block->hash_link) + { + fprintf(stderr, "block index: %u\n", idx); + fail_block(block); + errcnt++; + } + } + for (idx= 0; idx < keycache->hash_links; idx++) + { + HASH_LINK *hash_link= keycache->hash_link_root + idx; + if (hash_link->requests || hash_link->block) + { + fprintf(stderr, "hash_link index: %u\n", idx); + fail_hlink(hash_link); + errcnt++; + } + } + if (errcnt) + { + fprintf(stderr, "blocks: %d used: %zu\n", + keycache->disk_blocks, keycache->blocks_used); + fprintf(stderr, "hash_links: %d used: %d\n", + keycache->hash_links, keycache->hash_links_used); + fprintf(stderr, "\n"); + } + return !errcnt; +} +#endif + + +/* + Get statistics for a simple key cache + + SYNOPSIS + get_simple_key_cache_statistics() + keycache pointer to the control block of a simple key cache + partition_no partition number (not used) + key_cache_stats OUT pointer to the structure for the returned statistics + + DESCRIPTION + This function is the implementation of the get_key_cache_statistics + interface function that is employed by simple (non-partitioned) key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key + cache. This function returns the statistical data for the key cache. + The parameter partition_no is not used by this function. + + RETURN + none +*/ + +static +void get_simple_key_cache_statistics(SIMPLE_KEY_CACHE_CB *keycache, + uint partition_no __attribute__((unused)), + KEY_CACHE_STATISTICS *keycache_stats) +{ + DBUG_ENTER("simple_get_key_cache_statistics"); + + keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size; + keycache_stats->block_size= (longlong) keycache->key_cache_block_size; + keycache_stats->blocks_used= keycache->blocks_used; + keycache_stats->blocks_unused= keycache->blocks_unused; + keycache_stats->blocks_changed= keycache->global_blocks_changed; + keycache_stats->blocks_warm= keycache->warm_blocks; + keycache_stats->read_requests= keycache->global_cache_r_requests; + keycache_stats->reads= keycache->global_cache_read; + keycache_stats->write_requests= keycache->global_cache_w_requests; + keycache_stats->writes= keycache->global_cache_write; + DBUG_VOID_RETURN; +} + + +/* + The array of pointer to the key cache interface functions used for simple + key caches. Any simple key cache objects including those incorporated into + partitioned keys caches exploit this array. + + The current implementation of these functions allows to call them from + the MySQL server code directly. We don't do it though. +*/ + +static KEY_CACHE_FUNCS simple_key_cache_funcs = +{ + (INIT_KEY_CACHE) init_simple_key_cache, + (RESIZE_KEY_CACHE) resize_simple_key_cache, + (CHANGE_KEY_CACHE_PARAM) change_simple_key_cache_param, + (KEY_CACHE_READ) simple_key_cache_read, + (KEY_CACHE_INSERT) simple_key_cache_insert, + (KEY_CACHE_WRITE) simple_key_cache_write, + (FLUSH_KEY_BLOCKS) flush_simple_key_cache_blocks, + (RESET_KEY_CACHE_COUNTERS) reset_simple_key_cache_counters, + (END_KEY_CACHE) end_simple_key_cache, + (GET_KEY_CACHE_STATISTICS) get_simple_key_cache_statistics, +}; + + +/****************************************************************************** + Partitioned Key Cache Module + + The module contains implementations of all key cache interface functions + employed by partitioned key caches. + + A partitioned key cache is a collection of structures for simple key caches + called key cache partitions. Any page from a file can be placed into a buffer + of only one partition. The number of the partition is calculated from + the file number and the position of the page in the file, and it's always the + same for the page. The function that maps pages into partitions takes care + of even distribution of pages among partitions. + + Partition key cache mitigate one of the major problem of simple key cache: + thread contention for key cache lock (mutex). Every call of a key cache + interface function must acquire this lock. So threads compete for this lock + even in the case when they have acquired shared locks for the file and + pages they want read from are in the key cache buffers. + When working with a partitioned key cache any key cache interface function + that needs only one page has to acquire the key cache lock only for the + partition the page is ascribed to. This makes the chances for threads not + compete for the same key cache lock better. Unfortunately if we use a + partitioned key cache with N partitions for B-tree indexes we can't say + that the chances becomes N times less. The fact is that any index lookup + operation requires reading from the root page that, for any index, is always + ascribed to the same partition. To resolve this problem we should have + employed more sophisticated mechanisms of working with root pages. + + Currently the number of partitions in a partitioned key cache is limited + by 64. We could increase this limit. Simultaneously we would have to increase + accordingly the size of the bitmap dirty_part_map from the MYISAM_SHARE + structure. + +******************************************************************************/ + +/* Control block for a partitioned key cache */ + +typedef struct st_partitioned_key_cache_cb +{ + my_bool key_cache_inited; /*<=> control block is allocated */ + SIMPLE_KEY_CACHE_CB **partition_array; /* the key cache partitions */ + size_t key_cache_mem_size; /* specified size of the cache memory */ + uint key_cache_block_size; /* size of the page buffer of a cache block */ + uint partitions; /* number of partitions in the key cache */ +} PARTITIONED_KEY_CACHE_CB; + +static +void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, + my_bool cleanup); + +static int +reset_partitioned_key_cache_counters(const char *name, + PARTITIONED_KEY_CACHE_CB *keycache); + +/* + Determine the partition to which the index block to read is ascribed + + SYNOPSIS + get_key_cache_partition() + keycache pointer to the control block of a partitioned key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + + DESCRIPTION + The function determines the number of the partition in whose buffer the + block from 'file' at the position filepos has to be placed for reading. + The function returns the control block of the simple key cache for this + partition to the caller. + + RETURN VALUE + The pointer to the control block of the partition to which the specified + file block is ascribed. +*/ + +static +SIMPLE_KEY_CACHE_CB * +get_key_cache_partition(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos) +{ + uint i= KEYCACHE_BASE_EXPR(file, filepos) % keycache->partitions; + return keycache->partition_array[i]; +} + + +/* + Determine the partition to which the index block to write is ascribed + + SYNOPSIS + get_key_cache_partition() + keycache pointer to the control block of a partitioned key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + dirty_part_map pointer to the bitmap of dirty partitions for the file + + DESCRIPTION + The function determines the number of the partition in whose buffer the + block from 'file' at the position filepos has to be placed for writing and + marks the partition as dirty in the dirty_part_map bitmap. + The function returns the control block of the simple key cache for this + partition to the caller. + + RETURN VALUE + The pointer to the control block of the partition to which the specified + file block is ascribed. +*/ + +static SIMPLE_KEY_CACHE_CB +*get_key_cache_partition_for_write(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, + ulonglong* dirty_part_map) +{ + uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions; + *dirty_part_map|= 1ULL << i; + return keycache->partition_array[i]; +} + + +/* + Initialize a partitioned key cache + + SYNOPSIS + init_partitioned_key_cache() + keycache pointer to the control block of a partitioned key cache + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for all key cache partitions + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + + DESCRIPTION + This function is the implementation of the init_key_cache + interface function that is employed by partitioned key caches. + + The function builds and initializes an array of simple key caches, + and then initializes the control block structure of the type + PARTITIONED_KEY_CACHE_CB that is used for a partitioned key + cache. The parameter keycache is supposed to point to this + structure. The number of partitions in the partitioned key cache + to be built must be passed through the field 'partitions' of this + structure. + The parameter key_cache_block_size specifies the size of the + blocks in the the simple key caches to be built. + The parameters division_limit and age_threshold determine the initial + values of those characteristics of the simple key caches that are used for + midpoint insertion strategy. The parameter use_mem specifies the total + amount of memory to be allocated for the key cache blocks in all simple key + caches and for all auxiliary structures. + + RETURN VALUE + total number of blocks in key cache partitions, if successful, + <= 0 - otherwise. + + NOTES + If keycache->key_cache_inited != 0 then we assume that the memory for + the array of partitions has been already allocated. + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. +*/ + +static +int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, + uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint changed_blocks_hash_size) +{ + int i; + size_t mem_per_cache; + size_t mem_decr; + int cnt; + SIMPLE_KEY_CACHE_CB *partition; + SIMPLE_KEY_CACHE_CB **partition_ptr; + uint partitions= keycache->partitions; + int blocks= 0; + DBUG_ENTER("partitioned_init_key_cache"); + + keycache->key_cache_block_size = key_cache_block_size; + + if (keycache->key_cache_inited) + partition_ptr= keycache->partition_array; + else + { + if(!(partition_ptr= + (SIMPLE_KEY_CACHE_CB **) my_malloc(key_memory_KEY_CACHE, + sizeof(SIMPLE_KEY_CACHE_CB *) * + partitions, MYF(MY_WME)))) + DBUG_RETURN(-1); + bzero(partition_ptr, sizeof(SIMPLE_KEY_CACHE_CB *) * partitions); + keycache->partition_array= partition_ptr; + } + + mem_per_cache = use_mem / partitions; + mem_decr= mem_per_cache / 5; + + for (i= 0; i < (int) partitions; i++) + { + my_bool key_cache_inited= keycache->key_cache_inited; + if (key_cache_inited) + partition= *partition_ptr; + else + { + if (!(partition= + (SIMPLE_KEY_CACHE_CB *) my_malloc(key_memory_KEY_CACHE, + sizeof(SIMPLE_KEY_CACHE_CB), + MYF(MY_WME)))) + continue; + partition->key_cache_inited= 0; + } + + cnt= init_simple_key_cache(partition, key_cache_block_size, mem_per_cache, + division_limit, age_threshold, + changed_blocks_hash_size); + if (cnt <= 0) + { + end_simple_key_cache(partition, 1); + if (!key_cache_inited) + { + my_free(partition); + partition= 0; + } + if ((i == 0 && cnt < 0) || i > 0) + { + /* + Here we have two cases: + 1. i == 0 and cnt < 0 + cnt < 0 => mem_per_cache is not big enough to allocate minimal + number of key blocks in the key cache of the partition. + Decrease the the number of the partitions by 1 and start again. + 2. i > 0 + There is not enough memory for one of the succeeding partitions. + Just skip this partition decreasing the number of partitions in + the key cache by one. + Do not change the value of mem_per_cache in both cases. + */ + if (key_cache_inited) + { + my_free(partition); + partition= 0; + if(key_cache_inited) + memmove(partition_ptr, partition_ptr+1, + sizeof(partition_ptr)*(partitions-i-1)); + } + if (!--partitions) + break; + } + else + { + /* + We come here when i == 0 && cnt == 0. + cnt == 0 => the memory allocator fails to allocate a block of + memory of the size mem_per_cache. Decrease the value of + mem_per_cache without changing the current number of partitions + and start again. Make sure that such a decrease may happen not + more than 5 times in total. + */ + if (use_mem <= mem_decr) + break; + use_mem-= mem_decr; + } + i--; + mem_per_cache= use_mem/partitions; + continue; + } + else + { + blocks+= cnt; + *partition_ptr++= partition; + } + } + + keycache->partitions= partitions= (uint) (partition_ptr-keycache->partition_array); + keycache->key_cache_mem_size= mem_per_cache * partitions; + for (i= 0; i < (int) partitions; i++) + keycache->partition_array[i]->hash_factor= partitions; + + keycache->key_cache_inited= 1; + + if (!partitions) + blocks= -1; + + DBUG_RETURN(blocks); +} + + +/* + Resize a partitioned key cache + + SYNOPSIS + resize_partitioned_key_cache() + keycache pointer to the control block of a partitioned key cache + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the resize_key_cache interface + function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for the + partitioned key cache to be resized. + The parameter key_cache_block_size specifies the new size of the blocks in + the simple key caches that comprise the partitioned key cache. + The parameters division_limit and age_threshold determine the new initial + values of those characteristics of the simple key cache that are used for + midpoint insertion strategy. The parameter use-mem specifies the total + amount of memory to be allocated for the key cache blocks in all new + simple key caches and for all auxiliary structures. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first calls prepare_resize_simple_key_cache for each simple + key cache effectively flushing all dirty pages from it and destroying + the key cache. Then init_partitioned_key_cache is called. This call builds + a new array of simple key caches containing the same number of elements + as the old one. After this the function calls the function + finish_resize_simple_key_cache for each simple key cache from this array. + + This implementation doesn't block the calls and executions of other + functions from the key cache interface. However it assumes that the + calls of resize_partitioned_key_cache itself are serialized. +*/ + +static +int resize_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, + uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, + uint changed_blocks_hash_size) +{ + uint i; + uint partitions= keycache->partitions; + my_bool cleanup= use_mem == 0; + int blocks= -1; + int err= 0; + DBUG_ENTER("partitioned_resize_key_cache"); + if (cleanup) + { + end_partitioned_key_cache(keycache, 0); + DBUG_RETURN(-1); + } + for (i= 0; i < partitions; i++) + { + err|= prepare_resize_simple_key_cache(keycache->partition_array[i], 1); + } + if (!err) + blocks= init_partitioned_key_cache(keycache, key_cache_block_size, + use_mem, division_limit, age_threshold, + changed_blocks_hash_size); + if (blocks > 0) + { + for (i= 0; i < partitions; i++) + { + keycache_pthread_mutex_lock(&keycache->partition_array[i]->cache_lock); + finish_resize_simple_key_cache(keycache->partition_array[i]); + } + } + DBUG_RETURN(blocks); +} + + +/* + Change key cache parameters of a partitioned key cache + + SYNOPSIS + partitioned_change_key_cache_param() + keycache pointer to the control block of a partitioned key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the change_key_cache_param interface + function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for the simple + key cache where new values of the division limit and the age threshold used + for midpoint insertion strategy are to be set. The parameters + division_limit and age_threshold provide these new values. + + RETURN VALUE + none + + NOTES + The function just calls change_simple_key_cache_param for each element from + the array of simple caches that comprise the partitioned key cache. +*/ + +static +void change_partitioned_key_cache_param(PARTITIONED_KEY_CACHE_CB *keycache, + uint division_limit, + uint age_threshold) +{ + uint i; + uint partitions= keycache->partitions; + DBUG_ENTER("partitioned_change_key_cache_param"); + for (i= 0; i < partitions; i++) + { + change_simple_key_cache_param(keycache->partition_array[i], division_limit, + age_threshold); + } + DBUG_VOID_RETURN; +} + + +/* + Destroy a partitioned key cache + + SYNOPSIS + end_partitioned_key_cache() + keycache pointer to the control block of a partitioned key cache + cleanup <=> complete free (free also control block structures + for all simple key caches) + + DESCRIPTION + This function is the implementation of the end_key_cache interface + function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for the + partitioned key cache to be destroyed. + The function frees the memory allocated for the cache blocks and + auxiliary structures used by simple key caches that comprise the + partitioned key cache. If the value of the parameter cleanup is TRUE + then even the memory used for control blocks of the simple key caches + and the array of pointers to them are freed. + + RETURN VALUE + none +*/ + +static +void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, + my_bool cleanup) +{ + uint i; + uint partitions= keycache->partitions; + DBUG_ENTER("partitioned_end_key_cache"); + DBUG_PRINT("enter", ("key_cache: %p", keycache)); + + for (i= 0; i < partitions; i++) + { + end_simple_key_cache(keycache->partition_array[i], cleanup); + } + if (cleanup) + { + for (i= 0; i < partitions; i++) + my_free(keycache->partition_array[i]); + my_free(keycache->partition_array); + keycache->key_cache_inited= 0; + } + DBUG_VOID_RETURN; +} + + +/* + Read a block of data from a partitioned key cache into a buffer + + SYNOPSIS + + partitioned_key_cache_read() + keycache pointer to the control block of a partitioned key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + length length of the buffer + block_length length of the read data from a key cache block + return_buffer return pointer to the key cache buffer with the data + + DESCRIPTION + This function is the implementation of the key_cache_read interface + function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. + In a general case the function reads a block of data from the key cache + into the buffer buff of the size specified by the parameter length. The + beginning of the block of data to be read is specified by the parameters + file and filepos. The length of the read data is the same as the length + of the buffer. The data is read into the buffer in key_cache_block_size + increments. To read each portion the function first finds out in what + partition of the key cache this portion(page) is to be saved, and calls + simple_key_cache_read with the pointer to the corresponding simple key as + its first parameter. + If the parameter return_buffer is not ignored and its value is TRUE, and + the data to be read of the specified size block_length can be read from one + key cache buffer, then the function returns a pointer to the data in the + key cache buffer. + The function takes into account parameters block_length and return buffer + only in a single-threaded environment. + The parameter 'level' is used only by the midpoint insertion strategy + when the data or its portion cannot be found in the key cache. + + RETURN VALUE + Returns address from where the data is placed if successful, 0 - otherwise. +*/ + +static +uchar *partitioned_key_cache_read(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int return_buffer __attribute__((unused))) +{ + uint r_length; + uint offset= (uint) (filepos % keycache->key_cache_block_size); + uchar *start= buff; + DBUG_ENTER("partitioned_key_cache_read"); + DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", + (uint) file, (ulong) filepos, length)); + + + /* Read data in key_cache_block_size increments */ + do + { + SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, + file, filepos); + uchar *ret_buff= 0; + r_length= length; + set_if_smaller(r_length, keycache->key_cache_block_size - offset); + ret_buff= simple_key_cache_read((void *) partition, + file, filepos, level, + buff, r_length, + block_length, return_buffer); + if (ret_buff == 0) + DBUG_RETURN(0); + filepos+= r_length; + buff+= r_length; + offset= 0; + } while ((length-= r_length)); + + DBUG_RETURN(start); +} + + +/* + Insert a block of file data from a buffer into a partitioned key cache + + SYNOPSIS + partitioned_key_cache_insert() + keycache pointer to the control block of a partitioned key cache + file handler for the file to insert data from + filepos position of the block of data in the file to insert + level determines the weight of the data + buff buffer to read data from + length length of the data in the buffer + + DESCRIPTION + This function is the implementation of the key_cache_insert interface + function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. + The function writes a block of file data from a buffer into the key cache. + The buffer is specified with the parameters buff and length - the pointer + to the beginning of the buffer and its size respectively. It's assumed + that the buffer contains the data from 'file' allocated from the position + filepos. The data is copied from the buffer in key_cache_block_size + increments. For every portion of data the function finds out in what simple + key cache from the array of partitions the data must be stored, and after + this calls simple_key_cache_insert to copy the data into a key buffer of + this simple key cache. + The parameter level is used to set one characteristic for the key buffers + loaded with the data from buff. The characteristic is used only by the + midpoint insertion strategy. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + The function is used by MyISAM to move all blocks from a index file to + the key cache. It can be performed in parallel with reading the file data + from the key buffers by other threads. +*/ + +static +int partitioned_key_cache_insert(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length) +{ + uint w_length; + uint offset= (uint) (filepos % keycache->key_cache_block_size); + DBUG_ENTER("partitioned_key_cache_insert"); + DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", + (uint) file,(ulong) filepos, length)); + + + /* Write data in key_cache_block_size increments */ + do + { + SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, + file, filepos); + w_length= length; + set_if_smaller(w_length, keycache->key_cache_block_size - offset); + if (simple_key_cache_insert((void *) partition, + file, filepos, level, + buff, w_length)) + DBUG_RETURN(1); + + filepos+= w_length; + buff+= w_length; + offset = 0; + } while ((length-= w_length)); + + DBUG_RETURN(0); +} + + +/* + Write data from a buffer into a partitioned key cache + + SYNOPSIS + + partitioned_key_cache_write() + keycache pointer to the control block of a partitioned key cache + file handler for the file to write data to + filepos position in the file to write data to + level determines the weight of the data + buff buffer with the data + length length of the buffer + dont_write if is 0 then all dirty pages involved in writing + should have been flushed from key cache + file_extra maps of key cache partitions containing + dirty pages from file + + DESCRIPTION + This function is the implementation of the key_cache_write interface + function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. + In a general case the function copies data from a buffer into the key + cache. The buffer is specified with the parameters buff and length - + the pointer to the beginning of the buffer and its size respectively. + It's assumed the buffer contains the data to be written into 'file' + starting from the position filepos. The data is copied from the buffer + in key_cache_block_size increments. For every portion of data the + function finds out in what simple key cache from the array of partitions + the data must be stored, and after this calls simple_key_cache_write to + copy the data into a key buffer of this simple key cache. + If the value of the parameter dont_write is FALSE then the function + also writes the data into file. + The parameter level is used to set one characteristic for the key buffers + filled with the data from buff. The characteristic is employed only by + the midpoint insertion strategy. + The parameter file_expra provides a pointer to the shared bitmap of + the partitions that may contains dirty pages for the file. This bitmap + is used to optimize the function flush_partitioned_key_cache_blocks. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. +*/ + +static +int partitioned_key_cache_write(PARTITIONED_KEY_CACHE_CB *keycache, + File file, void *file_extra, + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int dont_write) +{ + uint w_length; + ulonglong *part_map= (ulonglong *) file_extra; + uint offset= (uint) (filepos % keycache->key_cache_block_size); + DBUG_ENTER("partitioned_key_cache_write"); + DBUG_PRINT("enter", + ("fd: %u pos: %lu length: %u block_length: %u" + " key_block_length: %u", + (uint) file, (ulong) filepos, length, block_length, + keycache ? keycache->key_cache_block_size : 0)); + + + /* Write data in key_cache_block_size increments */ + do + { + SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition_for_write(keycache, + file, + filepos, + part_map); + w_length = length; + set_if_smaller(w_length, keycache->key_cache_block_size - offset ); + if (simple_key_cache_write(partition, + file, 0, filepos, level, + buff, w_length, block_length, + dont_write)) + DBUG_RETURN(1); + + filepos+= w_length; + buff+= w_length; + offset= 0; + } while ((length-= w_length)); + + DBUG_RETURN(0); +} + + +/* + Flush all blocks for a file from key buffers of a partitioned key cache + + SYNOPSIS + + flush_partitioned_key_cache_blocks() + keycache pointer to the control block of a partitioned key cache + file handler for the file to flush to + file_extra maps of key cache partitions containing + dirty pages from file (not used) + flush_type type of the flush operation + + DESCRIPTION + This function is the implementation of the flush_key_blocks interface + function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. + In a general case the function flushes the data from all dirty key + buffers related to the file 'file' into this file. The function does + exactly this if the value of the parameter type is FLUSH_KEEP. If the + value of this parameter is FLUSH_RELEASE, the function additionally + releases the key buffers containing data from 'file' for new usage. + If the value of the parameter type is FLUSH_IGNORE_CHANGED the function + just releases the key buffers containing data from 'file'. + The function performs the operation by calling the function + flush_simple_key_cache_blocks for the elements of the array of the + simple key caches that comprise the partitioned key_cache. If the value + of the parameter type is FLUSH_KEEP s_flush_key_blocks is called only + for the partitions with possibly dirty pages marked in the bitmap + pointed to by the parameter file_extra. + + RETURN + 0 ok + 1 error + + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. +*/ + +static +int flush_partitioned_key_cache_blocks(PARTITIONED_KEY_CACHE_CB *keycache, + File file, void *file_extra, + enum flush_type type) +{ + uint i; + uint partitions= keycache->partitions; + int err= 0; + ulonglong *dirty_part_map= (ulonglong *) file_extra; + DBUG_ENTER("partitioned_flush_key_blocks"); + DBUG_PRINT("enter", ("keycache: %p", keycache)); + + for (i= 0; i < partitions; i++) + { + SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i]; + if ((type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) && + !((*dirty_part_map) & ((ulonglong) 1 << i))) + continue; + err|= MY_TEST(flush_simple_key_cache_blocks(partition, file, 0, type)); + } + *dirty_part_map= 0; + + DBUG_RETURN(err); +} + + +/* + Reset the counters of a partitioned key cache + + SYNOPSIS + reset_partitioned_key_cache_counters() + name the name of a key cache + keycache pointer to the control block of a partitioned key cache + + DESCRIPTION + This function is the implementation of the reset_key_cache_counters + interface function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a partitioned + key cache. + This function resets the values of the statistical counters of the simple + key caches comprising partitioned key cache to 0. It does it by calling + reset_simple_key_cache_counters for each key cache partition. + The parameter name is currently not used. + + RETURN + 0 on success (always because it can't fail) +*/ + +static int +reset_partitioned_key_cache_counters(const char *name __attribute__((unused)), + PARTITIONED_KEY_CACHE_CB *keycache) +{ + uint i; + uint partitions= keycache->partitions; + DBUG_ENTER("partitioned_reset_key_cache_counters"); + + for (i = 0; i < partitions; i++) + { + reset_simple_key_cache_counters(name, keycache->partition_array[i]); + } + DBUG_RETURN(0); +} + + +/* + Get statistics for a partition key cache + + SYNOPSIS + get_partitioned_key_cache_statistics() + keycache pointer to the control block of a partitioned key cache + partition_no partition number to get statistics for + key_cache_stats OUT pointer to the structure for the returned statistics + + DESCRIPTION + This function is the implementation of the get_key_cache_statistics + interface function that is employed by partitioned key caches. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for + a partitioned key cache. + If the value of the parameter partition_no is equal to 0 then aggregated + statistics for all partitions is returned in the fields of the + structure key_cache_stat of the type KEY_CACHE_STATISTICS . Otherwise + the function returns data for the partition number partition_no of the + key cache in the structure key_cache_stat. (Here partitions are numbered + starting from 1.) + + RETURN + none +*/ + +static +void +get_partitioned_key_cache_statistics(PARTITIONED_KEY_CACHE_CB *keycache, + uint partition_no, + KEY_CACHE_STATISTICS *keycache_stats) +{ + uint i; + SIMPLE_KEY_CACHE_CB *partition; + uint partitions= keycache->partitions; + DBUG_ENTER("get_partitioned_key_cache_statistics"); + + if (partition_no != 0) + { + partition= keycache->partition_array[partition_no-1]; + get_simple_key_cache_statistics((void *) partition, 0, keycache_stats); + DBUG_VOID_RETURN; + } + bzero(keycache_stats, sizeof(KEY_CACHE_STATISTICS)); + keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size; + keycache_stats->block_size= (longlong) keycache->key_cache_block_size; + for (i = 0; i < partitions; i++) + { + partition= keycache->partition_array[i]; + keycache_stats->blocks_used+= partition->blocks_used; + keycache_stats->blocks_unused+= partition->blocks_unused; + keycache_stats->blocks_changed+= partition->global_blocks_changed; + keycache_stats->blocks_warm+= partition->warm_blocks; + keycache_stats->read_requests+= partition->global_cache_r_requests; + keycache_stats->reads+= partition->global_cache_read; + keycache_stats->write_requests+= partition->global_cache_w_requests; + keycache_stats->writes+= partition->global_cache_write; + } + DBUG_VOID_RETURN; +} + +/* + The array of pointers to the key cache interface functions used by + partitioned key caches. Any partitioned key cache object caches exploits + this array. + + The current implementation of these functions does not allow to call + them from the MySQL server code directly. The key cache interface + wrappers must be used for this purpose. +*/ + +static KEY_CACHE_FUNCS partitioned_key_cache_funcs = +{ + (INIT_KEY_CACHE) init_partitioned_key_cache, + (RESIZE_KEY_CACHE) resize_partitioned_key_cache, + (CHANGE_KEY_CACHE_PARAM) change_partitioned_key_cache_param, + (KEY_CACHE_READ) partitioned_key_cache_read, + (KEY_CACHE_INSERT) partitioned_key_cache_insert, + (KEY_CACHE_WRITE) partitioned_key_cache_write, + (FLUSH_KEY_BLOCKS) flush_partitioned_key_cache_blocks, + (RESET_KEY_CACHE_COUNTERS) reset_partitioned_key_cache_counters, + (END_KEY_CACHE) end_partitioned_key_cache, + (GET_KEY_CACHE_STATISTICS) get_partitioned_key_cache_statistics, +}; + + +/****************************************************************************** + Key Cache Interface Module + + The module contains wrappers for all key cache interface functions. + + Currently there are key caches of two types: simple key caches and + partitioned key caches. Each type (class) has its own implementation of the + basic key cache operations used the MyISAM storage engine. The pointers + to the implementation functions are stored in two static structures of the + type KEY_CACHE_FUNC: simple_key_cache_funcs - for simple key caches, and + partitioned_key_cache_funcs - for partitioned key caches. When a key cache + object is created the constructor procedure init_key_cache places a pointer + to the corresponding table into one of its fields. The procedure also + initializes a control block for the key cache oject and saves the pointer + to this block in another field of the key cache object. + When a key cache wrapper function is invoked for a key cache object to + perform a basic key cache operation it looks into the interface table + associated with the key cache oject and calls the corresponding + implementation of the operation. It passes the saved key cache control + block to this implementation. If, for some reasons, the control block + has not been fully initialized yet, the wrapper function either does not + do anything or, in the case when it perform a read/write operation, the + function do it directly through the system i/o functions. + + As we can see the model with which the key cache interface is supported + as quite conventional for interfaces in general. + +******************************************************************************/ + +static +int repartition_key_cache_internal(KEY_CACHE *keycache, + uint key_cache_block_size, size_t use_mem, + uint division_limit, uint age_threshold, + uint changed_blocks_hash_size, + uint partitions, my_bool use_op_lock); + +/* + Initialize a key cache : internal + + SYNOPSIS + init_key_cache_internal() + keycache pointer to the key cache to be initialized + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for cache buffers/structures + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + changed_blocks_hash_size Number of hash buckets to hold a link of different + files. Should be proportional to number of different + files sused. + partitions Number of partitions in the key cache + use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it + + DESCRIPTION + The function performs the actions required from init_key_cache(). + It has an additional parameter: use_op_lock. When the parameter + is TRUE than the function initializes keycache->op_lock if needed, + then locks it, and unlocks it before the return. Otherwise the actions + with the lock are omitted. + + RETURN VALUE + total number of blocks in key cache partitions, if successful, + <= 0 - otherwise. + + NOTES + if keycache->key_cache_inited != 0 we assume that the memory + for the control block of the key cache has been already allocated. +*/ + +static +int init_key_cache_internal(KEY_CACHE *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint changed_blocks_hash_size, + uint partitions, + my_bool use_op_lock) +{ + void *keycache_cb; + int blocks; + if (keycache->key_cache_inited) + { + if (use_op_lock) + pthread_mutex_lock(&keycache->op_lock); + keycache_cb= keycache->keycache_cb; + } + else + { + if (partitions == 0) + { + if (!(keycache_cb= (void *) my_malloc(key_memory_KEY_CACHE, + sizeof(SIMPLE_KEY_CACHE_CB), + MYF(0)))) + return 0; + ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; + keycache->key_cache_type= SIMPLE_KEY_CACHE; + keycache->interface_funcs= &simple_key_cache_funcs; + } + else + { + if (!(keycache_cb= (void *) my_malloc(key_memory_KEY_CACHE, + sizeof(PARTITIONED_KEY_CACHE_CB), + MYF(0)))) + return 0; + ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; + keycache->key_cache_type= PARTITIONED_KEY_CACHE; + keycache->interface_funcs= &partitioned_key_cache_funcs; + } + /* + Initialize op_lock if it's not initialized before. + The mutex may have been initialized before if we are being called + from repartition_key_cache_internal(). + */ + if (use_op_lock) + pthread_mutex_init(&keycache->op_lock, MY_MUTEX_INIT_FAST); + keycache->keycache_cb= keycache_cb; + keycache->key_cache_inited= 1; + if (use_op_lock) + pthread_mutex_lock(&keycache->op_lock); + } + + if (partitions != 0) + { + ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions= partitions; + } + keycache->can_be_used= 0; + blocks= keycache->interface_funcs->init(keycache_cb, key_cache_block_size, + use_mem, division_limit, + age_threshold, changed_blocks_hash_size); + keycache->partitions= partitions ? + ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions : + 0; + DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS); + keycache->key_cache_mem_size= + keycache->partitions ? + ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_mem_size : + ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_mem_size; + if (blocks > 0) + keycache->can_be_used= 1; + if (use_op_lock) + pthread_mutex_unlock(&keycache->op_lock); + return blocks; +} + + +/* + Initialize a key cache + + SYNOPSIS + init_key_cache() + keycache pointer to the key cache to be initialized + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for cache buffers/structures + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + partitions number of partitions in the key cache + + DESCRIPTION + The function creates a control block structure for a key cache and + places the pointer to this block in the structure keycache. + If the value of the parameter 'partitions' is 0 then a simple key cache + is created. Otherwise a partitioned key cache with the specified number + of partitions is created. + The parameter key_cache_block_size specifies the size of the blocks in + the key cache to be created. The parameters division_limit and + age_threshold determine the initial values of those characteristics of + the key cache that are used for midpoint insertion strategy. The parameter + use_mem specifies the total amount of memory to be allocated for the + key cache buffers and for all auxiliary structures. + The function calls init_key_cache_internal() to perform all these actions + with the last parameter set to TRUE. + + RETURN VALUE + total number of blocks in key cache partitions, if successful, + <= 0 - otherwise. + + NOTES + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. +*/ + +int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint changed_blocks_hash_size, + uint partitions) +{ + return init_key_cache_internal(keycache, key_cache_block_size, use_mem, + division_limit, age_threshold, + changed_blocks_hash_size, partitions, 1); +} + + +/* + Resize a key cache + + SYNOPSIS + resize_key_cache() + keycache pointer to the key cache to be resized + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + The function operates over the key cache key cache. + The parameter key_cache_block_size specifies the new size of the block + buffers in the key cache. The parameters division_limit and age_threshold + determine the new initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for the key cache + buffers and for all auxiliary structures. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES + The function does not block the calls and executions of other functions + from the key cache interface. However it assumes that the calls of + resize_key_cache itself are serialized. + + Currently the function is called when the values of the variables + key_buffer_size and/or key_cache_block_size are being reset for + the key cache keycache. +*/ + +int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, uint age_threshold, + uint changed_blocks_hash_size) +{ + int blocks= -1; + if (keycache->key_cache_inited) + { + pthread_mutex_lock(&keycache->op_lock); + if ((uint) keycache->param_partitions != keycache->partitions && use_mem) + blocks= repartition_key_cache_internal(keycache, + key_cache_block_size, use_mem, + division_limit, age_threshold, + changed_blocks_hash_size, + (uint) keycache->param_partitions, + 0); + else + { + blocks= keycache->interface_funcs->resize(keycache->keycache_cb, + key_cache_block_size, + use_mem, division_limit, + age_threshold, + changed_blocks_hash_size); + + if (keycache->partitions) + keycache->partitions= + ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->partitions; + } + + keycache->key_cache_mem_size= + keycache->partitions ? + ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->key_cache_mem_size : + ((SIMPLE_KEY_CACHE_CB *)(keycache->keycache_cb))->key_cache_mem_size; + + keycache->can_be_used= (blocks >= 0); + pthread_mutex_unlock(&keycache->op_lock); + } + return blocks; +} + + +/* + Change key cache parameters of a key cache + + SYNOPSIS + change_key_cache_param() + keycache pointer to the key cache to change parameters for + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + The function sets new values of the division limit and the age threshold + used when the key cache keycach employs midpoint insertion strategy. + The parameters division_limit and age_threshold provide these new values. + + RETURN VALUE + none + + NOTES + Currently the function is called when the values of the variables + key_cache_division_limit and/or key_cache_age_threshold are being reset + for the key cache keycache. +*/ + +void change_key_cache_param(KEY_CACHE *keycache, uint division_limit, + uint age_threshold) +{ + if (keycache->key_cache_inited) + { + pthread_mutex_lock(&keycache->op_lock); + keycache->interface_funcs->change_param(keycache->keycache_cb, + division_limit, + age_threshold); + pthread_mutex_unlock(&keycache->op_lock); + } +} + + +/* + Destroy a key cache : internal + + SYNOPSIS + end_key_cache_internal() + keycache pointer to the key cache to be destroyed + cleanup <=> complete free + use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it + + DESCRIPTION + The function performs the actions required from end_key_cache(). + It has an additional parameter: use_op_lock. When the parameter + is TRUE than the function destroys keycache->op_lock if cleanup is true. + Otherwise the action with the lock is omitted. + + RETURN VALUE + none +*/ + +static +void end_key_cache_internal(KEY_CACHE *keycache, my_bool cleanup, + my_bool use_op_lock) +{ + if (keycache->key_cache_inited) + { + keycache->interface_funcs->end(keycache->keycache_cb, cleanup); + if (cleanup) + { + if (keycache->keycache_cb) + { + my_free(keycache->keycache_cb); + keycache->keycache_cb= 0; + } + /* + We do not destroy op_lock if we are going to reuse the same key cache. + This happens if we are called from repartition_key_cache_internal(). + */ + if (use_op_lock) + pthread_mutex_destroy(&keycache->op_lock); + keycache->key_cache_inited= 0; + } + keycache->can_be_used= 0; + } +} + + +/* + Destroy a key cache + + SYNOPSIS + end_key_cache() + keycache pointer to the key cache to be destroyed + cleanup <=> complete free + + DESCRIPTION + The function frees the memory allocated for the cache blocks and + auxiliary structures used by the key cache keycache. If the value + of the parameter cleanup is TRUE then all resources used by the key + cache are to be freed. + The function calls end_key_cache_internal() to perform all these actions + with the last parameter set to TRUE. + + RETURN VALUE + none +*/ + +void end_key_cache(KEY_CACHE *keycache, my_bool cleanup) +{ + end_key_cache_internal(keycache, cleanup, 1); +} + + +/* + Read a block of data from a key cache into a buffer + + SYNOPSIS + + key_cache_read() + keycache pointer to the key cache to read data from + file handler for the file for the block of data to be read + filepos position of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + length length of the buffer + block_length length of the data read from a key cache block + return_buffer return pointer to the key cache buffer with the data + + DESCRIPTION + The function operates over buffers of the key cache keycache. + In a general case the function reads a block of data from the key cache + into the buffer buff of the size specified by the parameter length. The + beginning of the block of data to be read is specified by the parameters + file and filepos. The length of the read data is the same as the length + of the buffer. + If the parameter return_buffer is not ignored and its value is TRUE, and + the data to be read of the specified size block_length can be read from one + key cache buffer, then the function returns a pointer to the data in the + key cache buffer. + The parameter 'level' is used only by the midpoint insertion strategy + when the data or its portion cannot be found in the key cache. + The function reads data into the buffer directly from file if the control + block of the key cache has not been initialized yet. + + RETURN VALUE + Returns address from where the data is placed if successful, 0 - otherwise. + + NOTES. + Filepos must be a multiple of 'block_length', but it doesn't + have to be a multiple of key_cache_block_size; +*/ + +uchar *key_cache_read(KEY_CACHE *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length, int return_buffer) +{ + if (keycache->can_be_used) + return keycache->interface_funcs->read(keycache->keycache_cb, + file, filepos, level, + buff, length, + block_length, return_buffer); + + /* We can't use mutex here as the key cache may not be initialized */ + + if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP))) + return (uchar *) 0; + + return buff; +} + + +/* + Insert a block of file data from a buffer into a key cache + + SYNOPSIS + key_cache_insert() + keycache pointer to the key cache to insert data into + file handler for the file to insert data from + filepos position of the block of data in the file to insert + level determines the weight of the data + buff buffer to read data from + length length of the data in the buffer + + DESCRIPTION + The function operates over buffers of the key cache keycache. + The function writes a block of file data from a buffer into the key cache. + The buffer is specified with the parameters buff and length - the pointer + to the beginning of the buffer and its size respectively. It's assumed + that the buffer contains the data from 'file' allocated from the position + filepos. + The parameter level is used to set one characteristic for the key buffers + loaded with the data from buff. The characteristic is used only by the + midpoint insertion strategy. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + The function is used by MyISAM to move all blocks from a index file to + the key cache. + It is assumed that it may be performed in parallel with reading the file + data from the key buffers by other threads. +*/ + +int key_cache_insert(KEY_CACHE *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length) +{ + if (keycache->can_be_used) + return keycache->interface_funcs->insert(keycache->keycache_cb, + file, filepos, level, + buff, length); + return 0; +} + + +/* + Write data from a buffer into a key cache + + SYNOPSIS + + key_cache_write() + keycache pointer to the key cache to write data to + file handler for the file to write data to + filepos position in the file to write data to + level determines the weight of the data + buff buffer with the data + length length of the buffer + dont_write if is 0 then all dirty pages involved in writing + should have been flushed from key cache + file_extra pointer to optional file attributes + + DESCRIPTION + The function operates over buffers of the key cache keycache. + In a general case the function writes data from a buffer into the key + cache. The buffer is specified with the parameters buff and length - + the pointer to the beginning of the buffer and its size respectively. + It's assumed the buffer contains the data to be written into 'file' + starting from the position filepos. + If the value of the parameter dont_write is FALSE then the function + also writes the data into file. + The parameter level is used to set one characteristic for the key buffers + filled with the data from buff. The characteristic is employed only by + the midpoint insertion strategy. + The parameter file_expra may point to additional file attributes used + for optimization or other purposes. + The function writes data from the buffer directly into file if the control + block of the key cache has not been initialized yet. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + This implementation may exploit the fact that the function is called only + when a thread has got an exclusive lock for the key file. +*/ + +int key_cache_write(KEY_CACHE *keycache, + File file, void *file_extra, + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length, int force_write) +{ + if (keycache->can_be_used) + return keycache->interface_funcs->write(keycache->keycache_cb, + file, file_extra, + filepos, level, + buff, length, + block_length, force_write); + + /* We can't use mutex here as the key cache may not be initialized */ + if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL))) + return 1; + + return 0; +} + + +/* + Flush all blocks for a file from key buffers of a key cache + + SYNOPSIS + + flush_key_blocks() + keycache pointer to the key cache whose blocks are to be flushed + file handler for the file to flush to + file_extra maps of key cache (used for partitioned key caches) + flush_type type of the flush operation + + DESCRIPTION + The function operates over buffers of the key cache keycache. + In a general case the function flushes the data from all dirty key + buffers related to the file 'file' into this file. The function does + exactly this if the value of the parameter type is FLUSH_KEEP. If the + value of this parameter is FLUSH_RELEASE, the function additionally + releases the key buffers containing data from 'file' for new usage. + If the value of the parameter type is FLUSH_IGNORE_CHANGED the function + just releases the key buffers containing data from 'file'. + If the value of the parameter type is FLUSH_KEEP the function may use + the value of the parameter file_extra pointing to possibly dirty + partitions to optimize the operation for partitioned key caches. + + RETURN + 0 ok + 1 error + + NOTES + Any implementation of the function may exploit the fact that the function + is called only when a thread has got an exclusive lock for the key file. +*/ + +int flush_key_blocks(KEY_CACHE *keycache, + int file, void *file_extra, + enum flush_type type) +{ + if (keycache->can_be_used) + return keycache->interface_funcs->flush(keycache->keycache_cb, + file, file_extra, type); + return 0; +} + + +/* + Reset the counters of a key cache + + SYNOPSIS + reset_key_cache_counters() + name the name of a key cache (unused) + keycache pointer to the key cache for which to reset counters + + DESCRIPTION + This function resets the values of the statistical counters for the key + cache keycache. + The parameter name is currently not used. + + RETURN + 0 on success (always because it can't fail) + + NOTES + This procedure is used by process_key_caches() to reset the counters of all + currently used key caches, both the default one and the named ones. +*/ + +int reset_key_cache_counters(const char *name __attribute__((unused)), + KEY_CACHE *keycache, + void *unused __attribute__((unused))) +{ + int rc= 0; + if (keycache->key_cache_inited) + { + pthread_mutex_lock(&keycache->op_lock); + rc= keycache->interface_funcs->reset_counters(name, + keycache->keycache_cb); + pthread_mutex_unlock(&keycache->op_lock); + } + return rc; +} + + +/* + Get statistics for a key cache + + SYNOPSIS + get_key_cache_statistics() + keycache pointer to the key cache to get statistics for + partition_no partition number to get statistics for + key_cache_stats OUT pointer to the structure for the returned statistics + + DESCRIPTION + If the value of the parameter partition_no is equal to 0 then statistics + for the whole key cache keycache (aggregated statistics) is returned in the + fields of the structure key_cache_stat of the type KEY_CACHE_STATISTICS. + Otherwise the value of the parameter partition_no makes sense only for + a partitioned key cache. In this case the function returns statistics + for the partition with the specified number partition_no. + + RETURN + none +*/ + +void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no, + KEY_CACHE_STATISTICS *key_cache_stats) +{ + if (keycache->key_cache_inited) + { + pthread_mutex_lock(&keycache->op_lock); + keycache->interface_funcs->get_stats(keycache->keycache_cb, + partition_no, key_cache_stats); + pthread_mutex_unlock(&keycache->op_lock); + } +} + + +/* + Repartition a key cache : internal + + SYNOPSIS + repartition_key_cache_internal() + keycache pointer to the key cache to be repartitioned + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + partitions new number of partitions in the key cache + use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it + + DESCRIPTION + The function performs the actions required from repartition_key_cache(). + It has an additional parameter: use_op_lock. When the parameter + is TRUE then the function locks keycache->op_lock at start and + unlocks it before the return. Otherwise the actions with the lock + are omitted. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. +*/ + +static +int repartition_key_cache_internal(KEY_CACHE *keycache, + uint key_cache_block_size, size_t use_mem, + uint division_limit, uint age_threshold, + uint changed_blocks_hash_size, + uint partitions, my_bool use_op_lock) +{ + uint blocks= -1; + if (keycache->key_cache_inited) + { + if (use_op_lock) + pthread_mutex_lock(&keycache->op_lock); + keycache->interface_funcs->resize(keycache->keycache_cb, + key_cache_block_size, 0, + division_limit, age_threshold, + changed_blocks_hash_size); + end_key_cache_internal(keycache, 1, 0); + blocks= init_key_cache_internal(keycache, key_cache_block_size, use_mem, + division_limit, age_threshold, + changed_blocks_hash_size, partitions, + 0); + if (use_op_lock) + pthread_mutex_unlock(&keycache->op_lock); + } + return blocks; +} + +/* + Repartition a key cache + + SYNOPSIS + repartition_key_cache() + keycache pointer to the key cache to be repartitioned + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + partitions new number of partitions in the key cache + + DESCRIPTION + The function operates over the key cache keycache. + The parameter partitions specifies the number of partitions in the key + cache after repartitioning. If the value of this parameter is 0 then + a simple key cache must be created instead of the old one. + The parameter key_cache_block_size specifies the new size of the block + buffers in the key cache. The parameters division_limit and age_threshold + determine the new initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for the new key + cache buffers and for all auxiliary structures. + The function calls repartition_key_cache_internal() to perform all these + actions with the last parameter set to TRUE. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES + Currently the function is called when the value of the variable + key_cache_partitions is being reset for the key cache keycache. +*/ + +int repartition_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint changed_blocks_hash_size, + uint partitions) +{ + return repartition_key_cache_internal(keycache, key_cache_block_size, use_mem, + division_limit, age_threshold, + changed_blocks_hash_size, + partitions, 1); +} + diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c new file mode 100644 index 00000000..199fb109 --- /dev/null +++ b/mysys/mf_keycaches.c @@ -0,0 +1,107 @@ +/* Copyright (c) 2003, 2011, Oracle and/or its affiliates. + Copyright (c) 2010, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include "mysys_priv.h" +#include +#include +#include +#include "my_safehash.h" + +/***************************************************************************** + Functions to handle the key cache objects +*****************************************************************************/ + +/* Variable to store all key cache objects */ +static SAFE_HASH key_cache_hash; + + +my_bool multi_keycache_init(void) +{ + return safe_hash_init(&key_cache_hash, 16, (uchar*) dflt_key_cache); +} + + +void multi_keycache_free(void) +{ + safe_hash_free(&key_cache_hash); +} + +/* + Get a key cache to be used for a specific table. + + SYNOPSIS + multi_key_cache_search() + key key to find (usually table path) + uint length Length of key. + def Default value if no key cache + + NOTES + This function is coded in such a way that we will return the + default key cache even if one never called multi_keycache_init. + This will ensure that it works with old MyISAM clients. + + RETURN + key cache to use +*/ + +KEY_CACHE *multi_key_cache_search(uchar *key, uint length, + KEY_CACHE *def) +{ + if (!key_cache_hash.hash.records) + return def; + return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length, + (void*) def); +} + + +/* + Assosiate a key cache with a key + + + SYONOPSIS + multi_key_cache_set() + key key (path to table etc..) + length Length of key + key_cache cache to assococite with the table + + NOTES + This can be used both to insert a new entry and change an existing + entry +*/ + + +my_bool multi_key_cache_set(const uchar *key, uint length, + KEY_CACHE *key_cache) +{ + return safe_hash_set(&key_cache_hash, key, length, (uchar*) key_cache); +} + + +void multi_key_cache_change(KEY_CACHE *old_data, + KEY_CACHE *new_data) +{ + safe_hash_change(&key_cache_hash, (uchar*) old_data, (uchar*) new_data); +} + + diff --git a/mysys/mf_loadpath.c b/mysys/mf_loadpath.c new file mode 100644 index 00000000..0a716aa5 --- /dev/null +++ b/mysys/mf_loadpath.c @@ -0,0 +1,59 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + + /* Returns full load-path for a file. to may be = path */ + /* if path is a hard-path return path */ + /* if path starts with home-dir return path */ + /* if path starts with current dir or parent-dir unpack path */ + /* if there is no path, prepend with own_path_prefix if given */ + /* else unpack path according to current dir */ + +char * my_load_path(char * to, const char *path, + const char *own_path_prefix) +{ + char buff[FN_REFLEN+1]; + const char *from= buff; + int is_cur; + DBUG_ENTER("my_load_path"); + DBUG_PRINT("enter",("path: %s prefix: %s",path, + own_path_prefix ? own_path_prefix : "")); + + if ((path[0] == FN_HOMELIB && path[1] == FN_LIBCHAR) || + test_if_hard_path(path)) + from= path; + else if ((is_cur=(path[0] == FN_CURLIB && path[1] == FN_LIBCHAR)) || + (is_prefix(path,FN_PARENTDIR)) || + ! own_path_prefix) + { + if (is_cur) + is_cur=2; /* Remove current dir */ + if (! my_getwd(buff,(uint) (FN_REFLEN-strlen(path)+is_cur),MYF(0))) + { + size_t length= strlen(buff); + (void) strmake(buff + length, path+is_cur, FN_REFLEN - length); + } + else + from= path; /* Return org file name */ + } + else + (void) strxnmov(buff, FN_REFLEN, own_path_prefix, path, NullS); + strmake(to, from, FN_REFLEN-1); + DBUG_PRINT("exit",("to: %s",to)); + DBUG_RETURN(to); +} /* my_load_path */ diff --git a/mysys/mf_pack.c b/mysys/mf_pack.c new file mode 100644 index 00000000..b8722a2f --- /dev/null +++ b/mysys/mf_pack.c @@ -0,0 +1,456 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. + Copyright (c) 2012, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. +*/ + +#include "mysys_priv.h" +#include +#ifdef HAVE_PWD_H +#include +#endif + +static char * expand_tilde(char **path); + + /* Pack a dirname ; Changes HOME to ~/ and current dev to ./ */ + /* from is a dirname (from dirname() ?) ending with FN_LIBCHAR */ + /* to may be == from */ + +void pack_dirname(char * to, const char *from) +{ + int cwd_err; + size_t d_length,length,UNINIT_VAR(buff_length); + char * start; + char buff[FN_REFLEN + 1]; + DBUG_ENTER("pack_dirname"); + + (void) intern_filename(to,from); /* Change to intern name */ + +#ifdef FN_DEVCHAR + if ((start=strrchr(to,FN_DEVCHAR)) != 0) /* Skip device part */ + start++; + else +#endif + start=to; + + if (!(cwd_err= my_getwd(buff,FN_REFLEN,MYF(0)))) + { + buff_length= strlen(buff); + d_length= (size_t) (start-to); + if ((start == to || + (buff_length == d_length && !memcmp(buff,start,d_length))) && + *start != FN_LIBCHAR && *start) + { /* Put current dir before */ + bchange((uchar*) to, d_length, (uchar*) buff, buff_length, strlen(to)+1); + } + } + + if ((d_length= cleanup_dirname(to,to)) != 0) + { + length=0; + if (home_dir) + { + length= strlen(home_dir); + if (home_dir[length-1] == FN_LIBCHAR) + length--; /* Don't test last '/' */ + } + if (length > 1 && length < d_length) + { /* test if /xx/yy -> ~/yy */ + if (memcmp(to,home_dir,length) == 0 && to[length] == FN_LIBCHAR) + { + to[0]=FN_HOMELIB; /* Filename begins with ~ */ + (void) strmov_overlapp(to+1,to+length); + } + } + if (! cwd_err) + { /* Test if cwd is ~/... */ + if (length > 1 && length < buff_length) + { + if (memcmp(buff,home_dir,length) == 0 && buff[length] == FN_LIBCHAR) + { + buff[0]=FN_HOMELIB; + (void) strmov_overlapp(buff+1,buff+length); + } + } + if (is_prefix(to,buff)) + { + length= strlen(buff); + if (to[length]) + (void) strmov_overlapp(to,to+length); /* Remove everything before */ + else + { + to[0]= FN_CURLIB; /* Put ./ instead of cwd */ + to[1]= FN_LIBCHAR; + to[2]= '\0'; + } + } + } + } + DBUG_PRINT("exit",("to: '%s'",to)); + DBUG_VOID_RETURN; +} /* pack_dirname */ + + +/* + remove unwanted chars from dirname + + SYNOPSIS + cleanup_dirname() + to Store result here + from Dirname to fix. May be same as to + + IMPLEMENTATION + "/../" removes prev dir + "/~/" removes all before ~ + //" is same as "/", except on Win32 at start of a file + "/./" is removed + Unpacks home_dir if "~/.." used + Unpacks current dir if if "./.." used + + RETURN + # length of new name +*/ + +size_t cleanup_dirname(register char *to, const char *from) +{ + reg5 size_t length; + reg2 char * pos; + reg3 char * from_ptr; + reg4 char * start; + char parent[5], /* for "FN_PARENTDIR" */ + buff[FN_REFLEN + 1],*end_parentdir; +#ifdef BACKSLASH_MBTAIL + CHARSET_INFO *fs= fs_character_set(); +#endif + DBUG_ENTER("cleanup_dirname"); + DBUG_PRINT("enter",("from: '%s'",from)); + + start=buff; + from_ptr=(char *) from; +#ifdef FN_DEVCHAR + if ((pos=strrchr(from_ptr,FN_DEVCHAR)) != 0) + { /* Skip device part */ + length=(size_t) (pos-from_ptr)+1; + start=strnmov(buff,from_ptr,length); from_ptr+=length; + } +#endif + + parent[0]=FN_LIBCHAR; + length=(size_t) (strmov(parent+1,FN_PARENTDIR)-parent); + for (pos=start ; (*pos= *from_ptr++) != 0 ; pos++) + { +#ifdef BACKSLASH_MBTAIL + uint l; + if (my_ci_use_mb(fs) && (l= my_ismbchar(fs, from_ptr - 1, from_ptr + 2))) + { + for (l-- ; l ; *++pos= *from_ptr++, l--); + start= pos + 1; /* Don't look inside multi-byte char */ + continue; + } +#endif + if (*pos == '/') + *pos = FN_LIBCHAR; + if (*pos == FN_LIBCHAR) + { + if ((size_t) (pos-start) > length && memcmp(pos-length,parent,length) == 0) + { /* If .../../; skip prev */ + pos-=length; + if (pos != start) + { /* not /../ */ + pos--; + if (*pos == FN_HOMELIB && (pos == start || pos[-1] == FN_LIBCHAR)) + { + if (!home_dir) + { + pos+=length+1; /* Don't unpack ~/.. */ + continue; + } + pos=strmov(buff,home_dir)-1; /* Unpacks ~/.. */ + if (*pos == FN_LIBCHAR) + pos--; /* home ended with '/' */ + } + if (*pos == FN_CURLIB && (pos == start || pos[-1] == FN_LIBCHAR)) + { + if (my_getwd(curr_dir,FN_REFLEN,MYF(0))) + { + pos+=length+1; /* Don't unpack ./.. */ + continue; + } + pos=strmov(buff,curr_dir)-1; /* Unpacks ./.. */ + if (*pos == FN_LIBCHAR) + pos--; /* home ended with '/' */ + } + end_parentdir=pos; + while (pos >= start && *pos != FN_LIBCHAR) /* remove prev dir */ + pos--; + if (pos[1] == FN_HOMELIB || + (pos >= start && memcmp(pos, parent, length) == 0)) + { /* Don't remove ~user/ */ + pos=strmov(end_parentdir+1,parent); + *pos=FN_LIBCHAR; + continue; + } + } + } + else if ((size_t) (pos-start) == length-1 && + !memcmp(start,parent+1,length-1)) + start=pos; /* Starts with "../" */ + else if (pos-start > 0 && pos[-1] == FN_LIBCHAR) + { +#ifdef FN_NETWORK_DRIVES + if (pos-start != 1) +#endif + pos--; /* Remove dupplicate '/' */ + } + else if (pos-start > 1 && pos[-1] == FN_CURLIB && pos[-2] == FN_LIBCHAR) + pos-=2; /* Skip /./ */ + } + } + (void) strmov(to,buff); + DBUG_PRINT("exit",("to: '%s'",to)); + DBUG_RETURN((size_t) (pos-buff)); +} /* cleanup_dirname */ + + +/* + On system where you don't have symbolic links, the following + code will allow you to create a file: + directory-name.sym that should contain the real path + to the directory. This will be used if the directory name + doesn't exists +*/ + + +my_bool my_use_symdir=0; /* Set this if you want to use symdirs */ + +#ifdef USE_SYMDIR +void symdirget(char *dir) +{ + char buff[FN_REFLEN + 1]; + char *pos=strend(dir); + if (dir[0] && pos[-1] != FN_DEVCHAR && my_access(dir, F_OK)) + { + File file; + size_t length; + char temp= *(--pos); /* May be "/" or "\" */ + strmov(pos,".sym"); + file= my_open(dir, O_RDONLY, MYF(0)); + *pos++=temp; *pos=0; /* Restore old filename */ + if (file >= 0) + { + if ((length= my_read(file, buff, sizeof(buff) - 1, MYF(0))) > 0) + { + for (pos= buff + length ; + pos > buff && (iscntrl(pos[-1]) || isspace(pos[-1])) ; + pos --); + + /* Ensure that the symlink ends with the directory symbol */ + if (pos == buff || pos[-1] != FN_LIBCHAR) + *pos++=FN_LIBCHAR; + + strmake(dir,buff, (size_t) (pos-buff)); + } + my_close(file, MYF(0)); + } + } +} +#endif /* USE_SYMDIR */ + + +/** + Convert a directory name to a format which can be compared as strings + + @param to result buffer, FN_REFLEN chars in length; may be == from + @param from 'packed' directory name, in whatever format + @returns size of the normalized name + + @details + - Ensures that last char is FN_LIBCHAR, unless it is FN_DEVCHAR + - Uses cleanup_dirname + + It does *not* expand ~/ (although, see cleanup_dirname). Nor does it do + any case folding. All case-insensitive normalization should be done by + the caller. +*/ + +size_t normalize_dirname(char *to, const char *from) +{ + size_t length; + char buff[FN_REFLEN + 1]; + DBUG_ENTER("normalize_dirname"); + + /* + Despite the name, this actually converts the name to the system's + format (TODO: name this properly). + */ + (void) intern_filename(buff, from); + length= strlen(buff); /* Fix that '/' is last */ + if (length && +#ifdef FN_DEVCHAR + buff[length - 1] != FN_DEVCHAR && +#endif + buff[length - 1] != FN_LIBCHAR && buff[length - 1] != '/') + { + /* we need reserve 2 bytes for the trailing slash and the zero */ + if (length >= sizeof (buff) - 1) + length= sizeof (buff) - 2; + buff[length]= FN_LIBCHAR; + buff[length + 1]= '\0'; + } + + length=cleanup_dirname(to, buff); + + DBUG_RETURN(length); +} + + +/** + Fixes a directory name so that can be used by open() + + @param to Result buffer, FN_REFLEN characters. May be == from + @param from 'Packed' directory name (may contain ~) + + @details + - Uses normalize_dirname() + - Expands ~/... to home_dir/... + - Resolves MySQL's fake "foo.sym" symbolic directory names (if USE_SYMDIR) + - Changes a UNIX filename to system filename (replaces / with \ on windows) + + @returns + Length of new directory name (= length of to) +*/ + +size_t unpack_dirname(char * to, const char *from) +{ + size_t length, h_length; + char buff[FN_REFLEN+1+4],*suffix,*tilde_expansion; + DBUG_ENTER("unpack_dirname"); + + length= normalize_dirname(buff, from); + + if (buff[0] == FN_HOMELIB) + { + suffix=buff+1; tilde_expansion=expand_tilde(&suffix); + if (tilde_expansion) + { + length-= (size_t) (suffix-buff)-1; + if (length+(h_length= strlen(tilde_expansion)) <= FN_REFLEN) + { + if ((h_length > 0) && (tilde_expansion[h_length-1] == FN_LIBCHAR)) + h_length--; + if (buff+h_length < suffix) + bmove(buff+h_length,suffix,length); + else + bmove_upp((uchar*) buff+h_length+length, (uchar*) suffix+length, length); + bmove(buff,tilde_expansion,h_length); + } + } + } +#ifdef USE_SYMDIR + if (my_use_symdir) + symdirget(buff); +#endif + DBUG_RETURN(system_filename(to,buff)); /* Fix for open */ +} /* unpack_dirname */ + + + /* Expand tilde to home or user-directory */ + /* Path is reset to point at FN_LIBCHAR after ~xxx */ + +static char * expand_tilde(char **path) +{ + if (path[0][0] == FN_LIBCHAR) + return home_dir; /* ~/ expanded to home */ +#ifdef HAVE_GETPWNAM + { + char *str,save; + struct passwd *user_entry; + + if (!(str=strchr(*path,FN_LIBCHAR))) + str=strend(*path); + save= *str; *str= '\0'; + user_entry=getpwnam(*path); + *str=save; + endpwent(); + if (user_entry) + { + *path=str; + return user_entry->pw_dir; + } + } +#endif + return (char *) 0; +} + + +/* + Fix filename so it can be used by open, create + + SYNOPSIS + unpack_filename() + to Store result here. Must be at least of size FN_REFLEN. + from Filename in unix format (with ~) + + RETURN + # length of to + + NOTES + to may be == from + ~ will only be expanded if total length < FN_REFLEN +*/ + + +size_t unpack_filename(char * to, const char *from) +{ + size_t length, n_length, buff_length; + char buff[FN_REFLEN + 1]; + DBUG_ENTER("unpack_filename"); + + length=dirname_part(buff, from, &buff_length);/* copy & convert dirname */ + n_length=unpack_dirname(buff,buff); + if (n_length+strlen(from+length) < FN_REFLEN) + { + (void) strmov(buff+n_length,from+length); + length= system_filename(to,buff); /* Fix to usably filename */ + } + else + length= system_filename(to,from); /* Fix to usably filename */ + DBUG_RETURN(length); +} /* unpack_filename */ + + + /* Convert filename (unix standard) to system standard */ + /* Used before system command's like open(), create() .. */ + /* Returns used length of to; total length should be FN_REFLEN */ + +size_t system_filename(char *to, const char *from) +{ + return (size_t) (strmake(to,from,FN_REFLEN-1)-to); +} + + /* Fix a filename to intern (UNIX format) */ + +char *intern_filename(char *to, const char *from) +{ + size_t length, to_length; + char buff[FN_REFLEN + 1]; + if (from == to) + { /* Dirname may destroy from */ + (void) strnmov(buff, from, FN_REFLEN); + from=buff; + } + length= dirname_part(to, from, &to_length); /* Copy dirname & fix chars */ + (void) strnmov(to + to_length, from + length, FN_REFLEN - to_length); + return (to); +} /* intern_filename */ diff --git a/mysys/mf_path.c b/mysys/mf_path.c new file mode 100644 index 00000000..cba80599 --- /dev/null +++ b/mysys/mf_path.c @@ -0,0 +1,120 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +static char *find_file_in_path(char *to,const char *name); + + /* Finds where program can find it's files. + pre_pathname is found by first locking at progname (argv[0]). + if progname contains path the path is returned. + else if progname is found in path, return it + else if progname is given and POSIX environment variable "_" is set + then path is taken from "_". + If filename doesn't contain a path append MY_BASEDIR_VERSION or + MY_BASEDIR if defined, else append "/my/running". + own_path_name_part is concatinated to result. + my_path puts result in to and returns to */ + +char * my_path(char * to, const char *progname, + const char *own_pathname_part) +{ + char *start, *end, *prog; + size_t to_length; + DBUG_ENTER("my_path"); + + start=to; /* Return this */ + if (progname && (dirname_part(to, progname, &to_length) || + find_file_in_path(to,progname) || + ((prog=getenv("_")) != 0 && + dirname_part(to, prog, &to_length)))) + { + (void) intern_filename(to,to); + if (!test_if_hard_path(to)) + { + if (!my_getwd(curr_dir,FN_REFLEN,MYF(0))) + bchange((uchar*) to, 0, (uchar*) curr_dir, strlen(curr_dir), strlen(to)+1); + } + } + else + { + if ((end = getenv("MY_BASEDIR_VERSION")) == 0 && + (end = getenv("MY_BASEDIR")) == 0) + { +#ifdef DEFAULT_BASEDIR + end= (char*) DEFAULT_BASEDIR; +#else + end= (char*) "/my/"; +#endif + } + (void) intern_filename(to,end); + to=strend(to); + if (to != start && to[-1] != FN_LIBCHAR) + *to++ = FN_LIBCHAR; + (void) strmov(to,own_pathname_part); + } + DBUG_PRINT("exit",("to: '%s'",start)); + DBUG_RETURN(start); +} /* my_path */ + + + /* test if file without filename is found in path */ + /* Returns to if found and to has dirpart if found, else NullS */ + +#if defined(_WIN32) +#define F_OK 0 +#define PATH_SEP ';' +#define PROGRAM_EXTENSION ".exe" +#else +#define PATH_SEP ':' +#endif + +static char *find_file_in_path(char *to, const char *name) +{ + char *path,*pos,dir[2]; + const char *ext=""; + + if (!(path=getenv("PATH"))) + return NullS; + dir[0]=FN_LIBCHAR; dir[1]=0; +#ifdef PROGRAM_EXTENSION + if (!fn_ext(name)[0]) + ext=PROGRAM_EXTENSION; +#endif + + for (pos=path ; (pos=strchr(pos,PATH_SEP)) ; path= ++pos) + { + if (path != pos) + { + strxmov(strnmov(to,path,(uint) (pos-path)),dir,name,ext,NullS); + if (!access(to,F_OK)) + { + to[(uint) (pos-path)+1]=0; /* Return path only */ + return to; + } + } + } +#ifdef _WIN32 + to[0]=FN_CURLIB; + strxmov(to+1,dir,name,ext,NullS); + if (!access(to,F_OK)) /* Test in current dir */ + { + to[2]=0; /* Leave ".\" */ + return to; + } +#endif + return NullS; /* File not found */ +} diff --git a/mysys/mf_qsort.c b/mysys/mf_qsort.c new file mode 100644 index 00000000..b516639a --- /dev/null +++ b/mysys/mf_qsort.c @@ -0,0 +1,217 @@ +/* Copyright (c) 2000-2002, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + qsort implementation optimized for comparison of pointers + Inspired by the qsort implementations by Douglas C. Schmidt, + and Bentley & McIlroy's "Engineering a Sort Function". +*/ + + +#include "mysys_priv.h" +#ifndef SCO +#include +#endif + +/* We need to use qsort with 2 different compare functions */ +#ifdef QSORT_EXTRA_CMP_ARGUMENT +#define CMP(A,B) ((*cmp)(cmp_argument,(A),(B))) +#else +#define CMP(A,B) ((*cmp)((A),(B))) +#endif + +#define SWAP(A, B, size,swap_ptrs) \ +do { \ + if (swap_ptrs) \ + { \ + reg1 char **a = (char**) (A), **b = (char**) (B); \ + char *tmp = *a; *a++ = *b; *b++ = tmp; \ + } \ + else \ + { \ + reg1 char *a = (A), *b = (B); \ + reg3 char *end= a+size; \ + do \ + { \ + char tmp = *a; *a++ = *b; *b++ = tmp; \ + } while (a < end); \ + } \ +} while (0) + +/* Put the median in the middle argument */ +#define MEDIAN(low, mid, high) \ +{ \ + if (CMP(high,low) < 0) \ + SWAP(high, low, size, ptr_cmp); \ + if (CMP(mid, low) < 0) \ + SWAP(mid, low, size, ptr_cmp); \ + else if (CMP(high, mid) < 0) \ + SWAP(mid, high, size, ptr_cmp); \ +} + +/* The following node is used to store ranges to avoid recursive calls */ + +typedef struct st_stack +{ + char *low,*high; +} stack_node; + +#define PUSH(LOW,HIGH) {stack_ptr->low = LOW; stack_ptr++->high = HIGH;} +#define POP(LOW,HIGH) {LOW = (--stack_ptr)->low; HIGH = stack_ptr->high;} + +/* The following stack size is enough for ulong ~0 elements */ +#define STACK_SIZE (8 * sizeof(unsigned long int)) +#define THRESHOLD_FOR_INSERT_SORT 10 +#if defined(QSORT_TYPE_IS_VOID) +#define SORT_RETURN return +#else +#define SORT_RETURN return 0 +#endif + +/**************************************************************************** +** 'standard' quicksort with the following extensions: +** +** Can be compiled with the qsort2_cmp compare function +** Store ranges on stack to avoid recursion +** Use insert sort on small ranges +** Optimize for sorting of pointers (used often by MySQL) +** Use median comparison to find partition element +*****************************************************************************/ + +#ifdef QSORT_EXTRA_CMP_ARGUMENT +qsort_t my_qsort2(void *base_ptr, size_t count, size_t size, qsort2_cmp cmp, + void *cmp_argument) +#else +qsort_t my_qsort(void *base_ptr, size_t count, size_t size, qsort_cmp cmp) +#endif +{ + char *low, *high, *pivot; + stack_node stack[STACK_SIZE], *stack_ptr; + my_bool ptr_cmp; + /* Handle the simple case first */ + /* This will also make the rest of the code simpler */ + if (count <= 1) + SORT_RETURN; + + low = (char*) base_ptr; + high = low+ size * (count - 1); + stack_ptr = stack + 1; +#ifdef HAVE_valgrind + /* The first element in the stack will be accessed for the last POP */ + stack[0].low=stack[0].high=0; +#endif + pivot = (char *) my_alloca((int) size); + ptr_cmp= size == sizeof(char*) && (intptr_t)low % sizeof(char*) == 0; + + /* The following loop sorts elements between high and low */ + do + { + char *low_ptr, *high_ptr, *mid; + + count=((size_t) (high - low) / size)+1; + /* If count is small, then an insert sort is faster than qsort */ + if (count < THRESHOLD_FOR_INSERT_SORT) + { + for (low_ptr = low + size; low_ptr <= high; low_ptr += size) + { + char *ptr; + for (ptr = low_ptr; ptr > low && CMP(ptr - size, ptr) > 0; + ptr -= size) + SWAP(ptr, ptr - size, size, ptr_cmp); + } + POP(low, high); + continue; + } + + /* Try to find a good middle element */ + mid= low + size * (count >> 1); + if (count > 40) /* Must be bigger than 24 */ + { + size_t step = size* (count / 8); + MEDIAN(low, low + step, low+step*2); + MEDIAN(mid - step, mid, mid+step); + MEDIAN(high - 2 * step, high-step, high); + /* Put best median in 'mid' */ + MEDIAN(low+step, mid, high-step); + low_ptr = low; + high_ptr = high; + } + else + { + MEDIAN(low, mid, high); + /* The low and high argument are already in sorted against 'pivot' */ + low_ptr = low + size; + high_ptr = high - size; + } + memcpy(pivot, mid, size); + + do + { + while (CMP(low_ptr, pivot) < 0) + low_ptr += size; + while (CMP(pivot, high_ptr) < 0) + high_ptr -= size; + + if (low_ptr < high_ptr) + { + SWAP(low_ptr, high_ptr, size, ptr_cmp); + low_ptr += size; + high_ptr -= size; + } + else + { + if (low_ptr == high_ptr) + { + low_ptr += size; + high_ptr -= size; + } + break; + } + } + while (low_ptr <= high_ptr); + + /* + Prepare for next iteration. + Skip partitions of size 1 as these doesn't have to be sorted + Push the larger partition and sort the smaller one first. + This ensures that the stack is keept small. + */ + + if ((int) (high_ptr - low) <= 0) + { + if ((int) (high - low_ptr) <= 0) + { + POP(low, high); /* Nothing more to sort */ + } + else + low = low_ptr; /* Ignore small left part. */ + } + else if ((int) (high - low_ptr) <= 0) + high = high_ptr; /* Ignore small right part. */ + else if ((high_ptr - low) > (high - low_ptr)) + { + PUSH(low, high_ptr); /* Push larger left part */ + low = low_ptr; + } + else + { + PUSH(low_ptr, high); /* Push larger right part */ + high = high_ptr; + } + } while (stack_ptr > stack); + my_afree(pivot); + SORT_RETURN; +} diff --git a/mysys/mf_qsort2.c b/mysys/mf_qsort2.c new file mode 100644 index 00000000..cee460f8 --- /dev/null +++ b/mysys/mf_qsort2.c @@ -0,0 +1,20 @@ +/* Copyright (C) 2000 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* qsort that sends one extra argument to the compare subrutine */ + +#define QSORT_EXTRA_CMP_ARGUMENT +#include "mf_qsort.c" diff --git a/mysys/mf_radix.c b/mysys/mf_radix.c new file mode 100644 index 00000000..8f044cf9 --- /dev/null +++ b/mysys/mf_radix.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2000, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Radixsort for pointers to fixed length strings. + A very quick sort for not to long (< 20 char) strings. + Neads a extra buffers of number_of_elements pointers but is + 2-3 times faster than quicksort +*/ + +#include "mysys_priv.h" +#include + + /* Radixsort */ + +my_bool radixsort_is_appliccable(uint n_items, size_t size_of_element) +{ + return size_of_element <= 20 && n_items >= 1000 && n_items < 100000; +} + +void radixsort_for_str_ptr(uchar **base, uint number_of_elements, size_t size_of_element, uchar **buffer) +{ + uchar **end,**ptr,**buffer_ptr; + uint32 *count_ptr,*count_end,count[256]; + int pass; + + end=base+number_of_elements; count_end=count+256; + for (pass=(int) size_of_element-1 ; pass >= 0 ; pass--) + { + bzero((uchar*) count,sizeof(uint32)*256); + for (ptr= base ; ptr < end ; ptr++) + count[ptr[0][pass]]++; + if (count[0] == number_of_elements) + goto next; + for (count_ptr=count+1 ; count_ptr < count_end ; count_ptr++) + { + if (*count_ptr == number_of_elements) + goto next; + (*count_ptr)+= *(count_ptr-1); + } + for (ptr= end ; ptr-- != base ;) + buffer[--count[ptr[0][pass]]]= *ptr; + for (ptr=base, buffer_ptr=buffer ; ptr < end ;) + (*ptr++) = *buffer_ptr++; + next:; + } +} diff --git a/mysys/mf_same.c b/mysys/mf_same.c new file mode 100644 index 00000000..fda0710c --- /dev/null +++ b/mysys/mf_same.c @@ -0,0 +1,41 @@ +/* Copyright (c) 2000, 2001, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Kopierar biblioteksstrukturen och extensionen fr}n ett filnamn */ + +#include "mysys_priv.h" +#include + + /* + Copy directory and/or extension between filenames. + (For the meaning of 'flag', check mf_format.c) + 'to' may be equal to 'name'. + Returns 'to'. + */ + +char * fn_same(char *to, const char *name, int flag) +{ + char dev[FN_REFLEN]; + const char *ext; + size_t dev_length; + DBUG_ENTER("fn_same"); + DBUG_PRINT("enter",("to: %s name: %s flag: %d",to,name,flag)); + + if ((ext=strrchr(name+dirname_part(dev, name, &dev_length),FN_EXTCHAR)) == 0) + ext=""; + + DBUG_RETURN(fn_format(to,to,dev,ext,flag)); +} /* fn_same */ diff --git a/mysys/mf_sort.c b/mysys/mf_sort.c new file mode 100644 index 00000000..24e875b8 --- /dev/null +++ b/mysys/mf_sort.c @@ -0,0 +1,42 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Sort of string pointers in string-order with radix or qsort */ + +#include "mysys_priv.h" +#include + +void my_string_ptr_sort(uchar *base, uint items, size_t size) +{ +#if INT_MAX > 65536L + uchar **ptr=0; + + if (radixsort_is_appliccable(items, size) && + (ptr= (uchar**) my_malloc(PSI_NOT_INSTRUMENTED, + items * sizeof(char*),MYF(0)))) + { + radixsort_for_str_ptr((uchar**) base,items,size,ptr); + my_free(ptr); + } + else +#endif + { + if (size && items) + { + my_qsort2(base,items, sizeof(uchar*), get_ptr_compare(size), + (void*) &size); + } + } +} diff --git a/mysys/mf_soundex.c b/mysys/mf_soundex.c new file mode 100644 index 00000000..b686cac8 --- /dev/null +++ b/mysys/mf_soundex.c @@ -0,0 +1,106 @@ +/* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/**************************************************************** +* SOUNDEX ALGORITHM in C * +* * +* The basic Algorithm source is taken from EDN Nov. * +* 14, 1985 pg. 36. * +* * +* As a test Those in Illinois will find that the * +* first group of numbers in their drivers license * +* number is the soundex number for their last name. * +* * +* RHW PC-IBBS ID. #1230 * +* * +* As an extension if remove_garbage is set then all non- * +* alpha characters are skipped * +* * +* Note, that this implementation corresponds to the * +* original version of the algorithm, not to the more * +* popular "enhanced" version, described by Knuth. * +****************************************************************/ + +#include "mysys_priv.h" +#include +#include "my_static.h" + +static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage); + + /* outputed string is 4 byte long */ + /* out_pntr can be == in_pntr */ + +void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr, + pbool remove_garbage) +{ + char ch,last_ch; + reg3 char * end; + register const uchar *map=cs->to_upper; + + if (remove_garbage) + { + while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */ + in_pntr++; + } + *out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */ + last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */ + /* for the first 'double-letter */ + /* check. */ + end=out_pntr+3; /* Loop on input letters until */ + /* end of input (null) or output */ + /* letter code count = 3 */ + + in_pntr++; + while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0) + { + in_pntr++; + if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */ + { + *out_pntr++ = ch; /* letter, copy to output */ + } /* for next double-letter check */ + last_ch = ch; /* save code of last input letter */ + } + while (out_pntr < end) + *out_pntr++ = '0'; + *out_pntr=0; /* end string */ + return; +} /* soundex */ + + + /* + If alpha, map input letter to soundex code. + If not alpha and remove_garbage is set then skip to next char + else return 0 + */ + +static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage) +{ + uchar ch; + + if (remove_garbage) + { + while (**ptr && !my_isalpha(cs,**ptr)) + (*ptr)++; + } + ch=my_toupper(cs,**ptr); + if (ch < 'A' || ch > 'Z') + { + if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */ + return '0'; /* threat as vokal */ + return 0; /* Can't map */ + } + return(soundex_map[ch-'A']); +} /* get_scode */ diff --git a/mysys/mf_tempdir.c b/mysys/mf_tempdir.c new file mode 100644 index 00000000..f2b1ea81 --- /dev/null +++ b/mysys/mf_tempdir.c @@ -0,0 +1,96 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +#if defined(_WIN32) +#define DELIM ';' +#else +#define DELIM ':' +#endif + +my_bool init_tmpdir(MY_TMPDIR *tmpdir, const char *pathlist) +{ + char *end, *copy; + char buff[FN_REFLEN]; + DBUG_ENTER("init_tmpdir"); + DBUG_PRINT("enter", ("pathlist: %s", pathlist ? pathlist : "NULL")); + + mysql_mutex_init(key_TMPDIR_mutex, &tmpdir->mutex, MY_MUTEX_INIT_FAST); + if (my_init_dynamic_array(key_memory_MY_TMPDIR_full_list, &tmpdir->full_list, + sizeof(char*), 1, 5, MYF(0))) + goto err; + if (!pathlist || !pathlist[0]) + { + /* Get default temporary directory */ + pathlist=getenv("TMPDIR"); /* Use this if possible */ +#if defined(_WIN32) + if (!pathlist) + pathlist=getenv("TEMP"); + if (!pathlist) + pathlist=getenv("TMP"); +#endif + if (!pathlist || !pathlist[0]) + pathlist= DEFAULT_TMPDIR; + } + do + { + size_t length; + end=strcend(pathlist, DELIM); + strmake(buff, pathlist, (uint) (end-pathlist)); + length= cleanup_dirname(buff, buff); + if (!(copy= my_strndup(key_memory_MY_TMPDIR_full_list, buff, length, MYF(MY_WME))) || + insert_dynamic(&tmpdir->full_list, (uchar*) ©)) + DBUG_RETURN(TRUE); + pathlist=end+1; + } + while (*end); + freeze_size(&tmpdir->full_list); + tmpdir->list=(char **)tmpdir->full_list.buffer; + tmpdir->max=tmpdir->full_list.elements-1; + tmpdir->cur=0; + DBUG_RETURN(FALSE); + +err: + delete_dynamic(&tmpdir->full_list); /* Safe to free */ + mysql_mutex_destroy(&tmpdir->mutex); + DBUG_RETURN(TRUE); +} + + +char *my_tmpdir(MY_TMPDIR *tmpdir) +{ + char *dir; + if (!tmpdir->max) + return tmpdir->list[0]; + mysql_mutex_lock(&tmpdir->mutex); + dir=tmpdir->list[tmpdir->cur]; + tmpdir->cur= (tmpdir->cur == tmpdir->max) ? 0 : tmpdir->cur+1; + mysql_mutex_unlock(&tmpdir->mutex); + return dir; +} + +void free_tmpdir(MY_TMPDIR *tmpdir) +{ + uint i; + if (!tmpdir->full_list.elements) + return; + for (i=0; i<=tmpdir->max; i++) + my_free(tmpdir->list[i]); + delete_dynamic(&tmpdir->full_list); + mysql_mutex_destroy(&tmpdir->mutex); +} + diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c new file mode 100644 index 00000000..0f1c6d6b --- /dev/null +++ b/mysys/mf_tempfile.c @@ -0,0 +1,177 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include +#include "my_static.h" +#include "mysys_err.h" +#include +#ifdef HAVE_PATHS_H +#include +#endif + +#ifdef HAVE_MKOSTEMP +#define mkstemp(A) mkostemp(A, O_CLOEXEC) +#endif + +/* + @brief + Create a temporary file with unique name in a given directory + + @details + create_temp_file + to pointer to buffer where temporary filename will be stored + dir directory where to create the file + prefix prefix the filename with this + mode Flags to use for my_create/my_open + MyFlags Magic flags + + @return + File descriptor of opened file if success + -1 and sets errno if fails. + + @note + The behaviour of this function differs a lot between + implementation, it's main use is to generate a file with + a name that does not already exist. + + When passing MY_TEMPORARY flag in MyFlags the file is automatically deleted + + "mode" bits that always must be used for newly created files with + unique file names (O_EXCL | O_TRUNC | O_CREAT | O_RDWR) are added + automatically, and shouldn't be specified by the caller. + + The implementation using mkstemp should be considered the + reference implementation when adding a new or modifying an + existing one + +*/ + +File create_temp_file(char *to, const char *dir, const char *prefix, + int mode, myf MyFlags) +{ + File file= -1; + + DBUG_ENTER("create_temp_file"); + DBUG_PRINT("enter", ("dir: %s, prefix: %s", dir ? dir : "(null)", prefix)); + DBUG_ASSERT((mode & (O_EXCL | O_TRUNC | O_CREAT | O_RDWR)) == 0); + + mode|= O_TRUNC | O_CREAT | O_RDWR; /* not O_EXCL, see Windows code below */ + +#ifdef _WIN32 + { + TCHAR path_buf[MAX_PATH-14]; + /* + Use GetTempPath to determine path for temporary files. + This is because the documentation for GetTempFileName + has the following to say about this parameter: + "If this parameter is NULL, the function fails." + */ + if (!dir) + { + if(GetTempPath(sizeof(path_buf), path_buf) > 0) + dir = path_buf; + } + /* + Use GetTempFileName to generate a unique filename, create + the file and release it's handle + - uses up to the first three letters from prefix + */ + if (GetTempFileName(dir, prefix, 0, to) == 0) + DBUG_RETURN(-1); + + DBUG_PRINT("info", ("name: %s", to)); + + if (MyFlags & MY_TEMPORARY) + mode|= O_SHORT_LIVED | O_TEMPORARY; + + /* + Open the file without O_EXCL flag + since the file has already been created by GetTempFileName + */ + if ((file= my_open(to, mode, MyFlags)) < 0) + { + /* Open failed, remove the file created by GetTempFileName */ + int tmp= my_errno; + (void) my_delete(to, MYF(0)); + my_errno= tmp; + } + } +#elif defined(HAVE_MKSTEMP) + if (!dir && ! (dir =getenv("TMPDIR"))) + dir= DEFAULT_TMPDIR; +#ifdef O_TMPFILE + { + static int O_TMPFILE_works= 1; + + if ((MyFlags & MY_TEMPORARY) && O_TMPFILE_works) + { + /* explictly don't use O_EXCL here has it has a different + meaning with O_TMPFILE + */ + if ((file= open(dir, (mode & ~O_CREAT) | O_TMPFILE | O_CLOEXEC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) >= 0) + { + my_snprintf(to, FN_REFLEN, "%s/#sql/fd=%d", dir, file); + file=my_register_filename(file, to, FILE_BY_O_TMPFILE, + EE_CANTCREATEFILE, MyFlags); + } + else if (errno == EOPNOTSUPP || errno == EINVAL) + { + my_printf_error(EE_CANTCREATEFILE, "O_TMPFILE is not supported on %s " + "(disabling future attempts)", + MYF(ME_NOTE | ME_ERROR_LOG_ONLY), dir); + O_TMPFILE_works= 0; + } + } + } + if (file == -1) +#endif /* O_TMPFILE */ + { + char prefix_buff[30]; + uint pfx_len; + File org_file; + + pfx_len= (uint) (strmov(strnmov(prefix_buff, + prefix ? prefix : "tmp.", + sizeof(prefix_buff)-7),"XXXXXX") - + prefix_buff); + if (strlen(dir)+ pfx_len > FN_REFLEN-2) + { + errno=my_errno= ENAMETOOLONG; + DBUG_RETURN(file); + } + strmov(convert_dirname(to,dir,NullS),prefix_buff); + org_file=mkstemp(to); + if (org_file >= 0 && (MyFlags & MY_TEMPORARY)) + (void) my_delete(to, MYF(MY_WME)); + file=my_register_filename(org_file, to, FILE_BY_MKSTEMP, + EE_CANTCREATEFILE, MyFlags); + /* If we didn't manage to register the name, remove the temp file */ + if (org_file >= 0 && file < 0) + { + int tmp=my_errno; + close(org_file); + (void) my_delete(to, MYF(MY_WME)); + my_errno=tmp; + } + } +#else +#error No implementation found for create_temp_file +#endif + if (file >= 0) + statistic_increment(my_tmp_file_created,&THR_LOCK_open); + DBUG_RETURN(file); +} diff --git a/mysys/mf_unixpath.c b/mysys/mf_unixpath.c new file mode 100644 index 00000000..cc2b671a --- /dev/null +++ b/mysys/mf_unixpath.c @@ -0,0 +1,36 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +/** + Convert filename to unix style filename. + + @remark On Windows, converts '\' to '/'. + + @param to A pathname. +*/ + +void to_unix_path(char *to __attribute__((unused))) +{ +#if FN_LIBCHAR != '/' + { + to--; + while ((to=strchr(to+1,FN_LIBCHAR)) != 0) + *to='/'; + } +#endif +} diff --git a/mysys/mf_wcomp.c b/mysys/mf_wcomp.c new file mode 100644 index 00000000..6fb19eba --- /dev/null +++ b/mysys/mf_wcomp.c @@ -0,0 +1,90 @@ +/* Copyright (c) 2000, 2003, 2004 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Funktions for comparing with wild-cards */ + +#include "mysys_priv.h" + + /* Test if a string is "comparable" to a wild-card string */ + /* returns 0 if the strings are "comparable" */ + +char wild_many='*'; +char wild_one='?'; +char wild_prefix=0; /* QQ this can potentially cause a SIGSEGV */ + +int wild_compare(register const char *str, register const char *wildstr, + pbool str_is_pattern) +{ + char cmp; + DBUG_ENTER("wild_compare"); + + while (*wildstr) + { + while (*wildstr && *wildstr != wild_many && *wildstr != wild_one) + { + if (*wildstr == wild_prefix && wildstr[1]) + { + wildstr++; + if (str_is_pattern && *str++ != wild_prefix) + DBUG_RETURN(1); + } + if (*wildstr++ != *str++) + DBUG_RETURN(1); + } + if (! *wildstr ) + DBUG_RETURN(*str != 0); + if (*wildstr++ == wild_one) + { + if (! *str || (str_is_pattern && *str == wild_many)) + DBUG_RETURN(1); /* One char; skip */ + if (*str++ == wild_prefix && str_is_pattern && *str) + str++; + } + else + { /* Found '*' */ + while (str_is_pattern && *str == wild_many) + str++; + for (; *wildstr == wild_many || *wildstr == wild_one; wildstr++) + if (*wildstr == wild_many) + { + while (str_is_pattern && *str == wild_many) + str++; + } + else + { + if (str_is_pattern && *str == wild_prefix && str[1]) + str+=2; + else if (! *str++) + DBUG_RETURN (1); + } + if (!*wildstr) + DBUG_RETURN(0); /* '*' as last char: OK */ + if ((cmp= *wildstr) == wild_prefix && wildstr[1] && !str_is_pattern) + cmp=wildstr[1]; + for (;;str++) + { + while (*str && *str != cmp) + str++; + if (!*str) + DBUG_RETURN (1); + if (wild_compare(str,wildstr,str_is_pattern) == 0) + DBUG_RETURN (0); + } + /* We will never come here */ + } + } + DBUG_RETURN (*str != 0); +} /* wild_compare */ diff --git a/mysys/mulalloc.c b/mysys/mulalloc.c new file mode 100644 index 00000000..51f8d61b --- /dev/null +++ b/mysys/mulalloc.c @@ -0,0 +1,127 @@ +/* Copyright (c) 2000, 2002, 2003, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +#ifndef DBUG_OFF +/* Put a protected barrier after every element when using my_multi_malloc() */ +#define ALLOC_BARRIER +#endif + +/* + Malloc many pointers at the same time + Only ptr1 can be free'd, and doing this will free all + the memory allocated. ptr2, etc all point inside big allocated + memory area. + + SYNOPSIS + my_multi_malloc() + myFlags Flags + ptr1, length1 Multiple arguments terminated by null ptr + ptr2, length2 ... + ... + NULL +*/ + +void* my_multi_malloc(PSI_memory_key key, myf myFlags, ...) +{ + va_list args; + char **ptr,*start,*res; + size_t tot_length,length; + DBUG_ENTER("my_multi_malloc"); + + va_start(args,myFlags); + tot_length=0; + while ((ptr=va_arg(args, char **))) + { + length=va_arg(args,uint); + tot_length+=ALIGN_SIZE(length); +#ifdef ALLOC_BARRIER + tot_length+= ALIGN_SIZE(1); +#endif + } + va_end(args); + + if (!(start=(char *) my_malloc(key, tot_length,myFlags))) + DBUG_RETURN(0); /* purecov: inspected */ + + va_start(args,myFlags); + res=start; + while ((ptr=va_arg(args, char **))) + { + *ptr=res; + length=va_arg(args,uint); + res+=ALIGN_SIZE(length); +#ifdef ALLOC_BARRIER + TRASH_FREE(res, ALIGN_SIZE(1)); + res+= ALIGN_SIZE(1); +#endif + } + va_end(args); + DBUG_RETURN((void*) start); +} + + +/* + Same as my_multi_malloc, but each entry can be over 4G + + SYNOPSIS + my_multi_malloc() + myFlags Flags + ptr1, length1 Multiple arguments terminated by null ptr + ptr2, length2 ... + ... + NULL +*/ + +void *my_multi_malloc_large(PSI_memory_key key, myf myFlags, ...) +{ + va_list args; + char **ptr,*start,*res; + ulonglong tot_length,length; + DBUG_ENTER("my_multi_malloc"); + + va_start(args,myFlags); + tot_length=0; + while ((ptr=va_arg(args, char **))) + { + length=va_arg(args,ulonglong); + tot_length+=ALIGN_SIZE(length); +#ifdef ALLOC_BARRIER + tot_length+= ALIGN_SIZE(1); +#endif + } + va_end(args); + + if (!(start=(char *) my_malloc(key, (size_t) tot_length, myFlags))) + DBUG_RETURN(0); /* purecov: inspected */ + + va_start(args,myFlags); + res=start; + while ((ptr=va_arg(args, char **))) + { + *ptr=res; + length=va_arg(args,ulonglong); + res+=ALIGN_SIZE(length); +#ifdef ALLOC_BARRIER + TRASH_FREE(res, ALIGN_SIZE(1)); + res+= ALIGN_SIZE(1); +#endif + } + va_end(args); + DBUG_RETURN((void*) start); +} diff --git a/mysys/my_access.c b/mysys/my_access.c new file mode 100644 index 00000000..81e635d9 --- /dev/null +++ b/mysys/my_access.c @@ -0,0 +1,267 @@ +/* Copyright (c) 2000, 2012, Oracle and/or its affiliates + Copyright (c) 2012, 2014, SkySQL Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +#ifdef _WIN32 + +/* + Check a file or path for accessability. + + SYNOPSIS + file_access() + path Path to file + amode Access method + + RETURN VALUES + 0 ok + -1 error (We use -1 as my_access is mapped to access on other platforms) +*/ + +int my_access(const char *path, int amode) +{ + DWORD attributes; + + attributes = GetFileAttributes(path); + if (attributes == INVALID_FILE_ATTRIBUTES || + ((attributes & FILE_ATTRIBUTE_READONLY) && (amode & W_OK))) + { + my_errno= errno= EACCES; + return -1; + } + return 0; +} + +#endif /* _WIN32 */ + + +/* + List of file names that causes problem on windows + + NOTE that one can also not have file names of type CON.TXT + + NOTE: it is important to keep "CLOCK$" on the first place, + we skip it in check_if_legal_tablename. +*/ +static const char *reserved_names[]= +{ + "CLOCK$", + "CON", "PRN", "AUX", "NUL", + "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", + "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", + NullS +}; + +#define MAX_RESERVED_NAME_LENGTH 6 + + +/* + Looks up a null-terminated string in a list, + case insensitively. + + SYNOPSIS + str_list_find() + list list of items + str item to find + + RETURN + 0 ok + 1 reserved file name +*/ +static int str_list_find(const char **list, const char *str) +{ + const char **name; + for (name= list; *name; name++) + { + if (!my_strcasecmp(&my_charset_latin1, *name, str)) + return 1; + } + return 0; +} + + +/* + A map for faster reserved_names lookup, + helps to avoid loops in many cases. + 1 - can be the first letter + 2 - can be the second letter + 4 - can be the third letter +*/ +static char reserved_map[256]= +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* !"#$%&'()*+,-./ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0123456789:;<=>? */ + 0,1,0,1,0,0,0,0,0,0,0,0,7,4,5,2, /* @ABCDEFGHIJKLMNO */ + 3,0,2,0,4,2,0,0,4,0,0,0,0,0,0,0, /* PQRSTUVWXYZ[\]^_ */ + 0,1,0,1,0,0,0,0,0,0,0,0,7,4,5,2, /* bcdefghijklmno */ + 3,0,2,0,4,2,0,0,4,0,0,0,0,0,0,0, /* pqrstuvwxyz{|}~. */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ................ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* ................ */ +}; + + +/* + Check if a table name may cause problems + + SYNOPSIS + check_if_legal_tablename + name Table name (without any extensions) + + DESCRIPTION + We don't check 'CLOCK$' because dollar sign is encoded as @0024, + making table file name 'CLOCK@0024', which is safe. + This is why we start lookup from the second element + (i.e. &reserver_name[1]) + + RETURN + 0 ok + 1 reserved file name +*/ + +int check_if_legal_tablename(const char *name) +{ + DBUG_ENTER("check_if_legal_tablename"); + DBUG_RETURN((reserved_map[(uchar) name[0]] & 1) && + (reserved_map[(uchar) name[1]] & 2) && + (reserved_map[(uchar) name[2]] & 4) && + str_list_find(&reserved_names[1], name)); +} + + +#ifdef _WIN32 +/** + Checks if the drive letter supplied is valid or not. Valid drive + letters are A to Z, both lower case and upper case. + + @param drive_letter : The drive letter to validate. + + @return TRUE if the drive exists, FALSE otherwise. +*/ +static my_bool does_drive_exists(char drive_letter) +{ + DWORD drive_mask= GetLogicalDrives(); + drive_letter= toupper(drive_letter); + + return (drive_letter >= 'A' && drive_letter <= 'Z') && + (drive_mask & (0x1 << (drive_letter - 'A'))); +} + +/** + Verifies if the file name supplied is allowed or not. On Windows + file names with a colon (:) are not allowed because such file names + store data in Alternate Data Streams which can be used to hide + the data. + Apart from colon, other characters that are not allowed in filenames + on Windows are greater/less sign, double quotes, forward slash, backslash, + pipe and star characters. + + See MSDN documentation on filename restrictions. + + @param name contains the file name with or without path + @param length contains the length of file name + @param allow_current_dir TRUE if paths like C:foobar are allowed, + FALSE otherwise + + @return TRUE if the file name is allowed, FALSE otherwise. +*/ +#define ILLEGAL_FILENAME_CHARS "<>:\"/\\|?*" + +my_bool is_filename_allowed(const char *name __attribute__((unused)), + size_t length __attribute__((unused)), + my_bool allow_current_dir __attribute__((unused))) +{ + /* + For Windows, check if the file name contains : character. + Start from end of path and search if the file name contains : + */ + const char* ch = NULL; + for (ch= name + length - 1; ch >= name; --ch) + { + if (FN_LIBCHAR == *ch || '/' == *ch) + break; + else if (':' == *ch) + { + /* + File names like C:foobar.txt are allowed since the syntax means + file foobar.txt in current directory of C drive. However file + names likes CC:foobar are not allowed since this syntax means ADS + foobar in file CC. + */ + return (allow_current_dir && (ch - name == 1) && + does_drive_exists(*name)); + } + else if (strchr(ILLEGAL_FILENAME_CHARS, *ch)) + return FALSE; + } + return TRUE; +} /* is_filename_allowed */ +#endif /* _WIN32 */ + +#if defined(_WIN32) + +/* + Check if a path will access a reserved file name that may cause problems + + SYNOPSIS + check_if_legal_filename + path Path to file + + RETURN + 0 ok + 1 reserved file name +*/ + +int check_if_legal_filename(const char *path) +{ + const char *end; + const char **reserved_name; + DBUG_ENTER("check_if_legal_filename"); + + if (!is_filename_allowed(path, strlen(path), TRUE)) + DBUG_RETURN(1); + + path+= dirname_length(path); /* To start of filename */ + if (!(end= strchr(path, FN_EXTCHAR))) + end= strend(path); + if (path == end || (uint) (end - path) > MAX_RESERVED_NAME_LENGTH) + DBUG_RETURN(0); /* Simplify inner loop */ + + for (reserved_name= reserved_names; *reserved_name; reserved_name++) + { + const char *reserved= *reserved_name; /* never empty */ + const char *name= path; + + do + { + if (*reserved != my_toupper(&my_charset_latin1, *name)) + break; + if (++name == end && !reserved[1]) + DBUG_RETURN(1); /* Found wrong path */ + } while (*++reserved); + } + DBUG_RETURN(0); +} + +#endif /* defined(_WIN32) */ diff --git a/mysys/my_addr_resolve.c b/mysys/my_addr_resolve.c new file mode 100644 index 00000000..376e7368 --- /dev/null +++ b/mysys/my_addr_resolve.c @@ -0,0 +1,353 @@ +/* Copyright (C) 2011 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include +#include +#include + +/** + strip the path, leave the file name and the last dirname +*/ +static const char *strip_path(const char *s) __attribute__((unused)); +static const char *strip_path(const char *s) +{ + const char *prev, *last; + for(prev= last= s; *s; s++) + if (*s == '/' || *s == '\\') + { + prev= last; + last= s + 1; + } + return prev; +} + +/* + The following is very much single-threaded code and it's only supposed + to be used on shutdown or for a crash report + Or the caller should take care and use mutexes. + + Also it does not free any its memory. For the same reason - + it's only used for crash reports or on shutdown when we already + have a memory leak. +*/ + +#ifdef HAVE_BFD_H +#include +static bfd *bfdh= 0; +static asymbol **symtable= 0; + +#if defined(HAVE_LINK_H) && defined(HAVE_DLOPEN) +#include +static ElfW(Addr) offset= 0; +#else +#define offset 0 +#endif + +#ifndef bfd_get_section_flags +#define bfd_get_section_flags(H, S) bfd_section_flags(S) +#endif /* bfd_get_section_flags */ + +#ifndef bfd_get_section_size +#define bfd_get_section_size(S) bfd_section_size(S) +#endif /* bfd_get_section_size */ + +#ifndef bfd_get_section_vma +#define bfd_get_section_vma(H, S) bfd_section_vma(S) +#endif /* bfd_get_section_vma */ + +/** + finds a file name, a line number, and a function name corresponding to addr. + + the function name is demangled. + the file name is stripped of its path, only the two last components are kept + the resolving logic is mostly based on addr2line of binutils-2.17 + + @return 0 on success, 1 on failure +*/ +int my_addr_resolve(void *ptr, my_addr_loc *loc) +{ + bfd_vma addr= (intptr)ptr - offset; + asection *sec; + + for (sec= bfdh->sections; sec; sec= sec->next) + { + bfd_vma start; + + if ((bfd_get_section_flags(bfdh, sec) & SEC_ALLOC) == 0) + continue; + + start = bfd_get_section_vma(bfdh, sec); + if (addr < start || addr >= start + bfd_get_section_size(sec)) + continue; + + if (bfd_find_nearest_line(bfdh, sec, symtable, addr - start, + &loc->file, &loc->func, &loc->line)) + { + if (loc->file) + loc->file= strip_path(loc->file); + else + loc->file= ""; + + if (loc->func) + { + const char *str= bfd_demangle(bfdh, loc->func, 3); + if (str) + loc->func= str; + } + + return 0; + } + } + + return 1; +} + +const char *my_addr_resolve_init() +{ + if (!bfdh) + { + uint unused; + char **matching; + +#if defined(HAVE_LINK_H) && defined(HAVE_DLOPEN) + struct link_map *lm = (struct link_map*) dlopen(0, RTLD_NOW); + if (lm) + offset= lm->l_addr; +#endif + + bfdh= bfd_openr(my_progname, NULL); + if (!bfdh) + goto err; + + if (bfd_check_format(bfdh, bfd_archive)) + goto err; + if (!bfd_check_format_matches (bfdh, bfd_object, &matching)) + goto err; + + if (bfd_read_minisymbols(bfdh, FALSE, (void *)&symtable, &unused) < 0) + goto err; + } + return 0; + +err: + return bfd_errmsg(bfd_get_error()); +} +#elif defined(HAVE_LIBELF_H) +/* + another possible implementation. +*/ +#elif defined(MY_ADDR_RESOLVE_FORK) +/* + yet another - just execute addr2line pipe the addresses to it, and parse the + output +*/ + +#include +#include +#include + +#if defined(HAVE_POLL_H) +#include +#elif defined(HAVE_SYS_POLL_H) +#include +#endif /* defined(HAVE_POLL_H) */ + +static int in[2], out[2]; +static pid_t pid; +static char addr2line_binary[1024]; +static char output[1024]; +static struct pollfd poll_fds; +static void *addr_offset; + +int start_addr2line_fork(const char *binary_path) +{ + + if (pid > 0) + { + /* Don't leak FDs */ + close(in[1]); + close(out[0]); + /* Don't create zombie processes. */ + waitpid(pid, NULL, 0); + } + + if (pipe(in) < 0) + return 1; + if (pipe(out) < 0) + return 1; + + pid = fork(); + if (pid == -1) + return 1; + + if (!pid) /* child */ + { + dup2(in[0], 0); + dup2(out[1], 1); + close(in[0]); + close(in[1]); + close(out[0]); + close(out[1]); + execlp("addr2line", "addr2line", "-C", "-f", "-e", binary_path, NULL); + _exit(1); + } + + close(in[0]); + close(out[1]); + + return 0; +} + +static int first_error= 0; + +static int addr_resolve(void *ptr, my_addr_loc *loc) +{ + char input[32]; + size_t len; + + ssize_t total_bytes_read = 0; + ssize_t extra_bytes_read = 0; + ssize_t parsed = 0; + + int ret; + + int filename_start = -1; + int line_number_start = -1; + + poll_fds.fd = out[0]; + poll_fds.events = POLLIN | POLLRDBAND; + + len= my_snprintf(input, sizeof(input), "%p\n", ptr); + if (write(in[1], input, len) <= 0) + { + if (!first_error++) + fputs("Printing to addr2line failed\n", stderr); + return 3; + } + + + /* 5000 ms should be plenty of time for addr2line to issue a response. */ + /* Read in a loop till all the output from addr2line is complete. */ + while (parsed == total_bytes_read && + (ret= poll(&poll_fds, 1, 5000))) + { + /* error during poll */ + if (ret < 0) + return 1; + + extra_bytes_read= read(out[0], output + total_bytes_read, + sizeof(output) - total_bytes_read); + if (extra_bytes_read < 0) + return 4; + /* Timeout or max bytes read. */ + if (extra_bytes_read == 0) + break; + + total_bytes_read += extra_bytes_read; + + /* Go through the addr2line response and get the required data. + The response is structured in 2 lines. The first line contains the function + name, while the second one contains : */ + for (; parsed < total_bytes_read; parsed++) + { + if (output[parsed] == '\n') + { + filename_start = parsed + 1; + output[parsed] = '\0'; + } + if (filename_start != -1 && output[parsed] == ':') + { + line_number_start = parsed + 1; + output[parsed] = '\0'; + break; + } + } + } + + /* Response is malformed. */ + if (filename_start == -1 || line_number_start == -1) + return 5; + + loc->func= output; + loc->file= output + filename_start; + loc->line= atoi(output + line_number_start); + + /* Addr2line was unable to extract any meaningful information. */ + if ((strcmp(loc->file, "??") == 0 || strcmp(loc->file, "") == 0) && + (loc->func[0] == '?' || loc->line == 0)) + return 6; + + loc->file= strip_path(loc->file); + + return 0; +} + + +int my_addr_resolve(void *ptr, my_addr_loc *loc) +{ + Dl_info info; + + if (!dladdr(ptr, &info)) + return 1; + + if (strcmp(addr2line_binary, info.dli_fname)) + { + /* + We use dli_fname in case the path is longer than the length of + our static string. We don't want to allocate anything + dynamically here as we are in a "crashed" state. + */ + if (start_addr2line_fork(info.dli_fname)) + { + if (!first_error++) + fputs("Can't start addr2line\n", stderr); + addr2line_binary[0] = '\0'; + return 2; + } + /* Save result for future comparisons. */ + strnmov(addr2line_binary, info.dli_fname, sizeof(addr2line_binary)); + +#ifdef _AIX + /* + info.dli_fbase is a char on AIX and casting it doesn't fool gcc. + leave backtracing broken on AIX until a real solution can be found. + */ + addr_offset= NULL; +#else + /* + Check if we should use info.dli_fbase as an offset or not + for the base program. This is depending on if the compilation is + done with PIE or not. + */ + addr_offset= info.dli_fbase; +#endif +#ifndef __PIE__ + if (strcmp(info.dli_fname, my_progname) == 0 && + addr_resolve((void*) my_addr_resolve, loc) == 0 && + strcmp(loc->func, "my_addr_resolve") == 0) + addr_offset= 0; +#endif + } + + return addr_resolve((void*) (ptr - addr_offset), loc); +} + + +const char *my_addr_resolve_init() +{ + return 0; +} +#endif diff --git a/mysys/my_alarm.c b/mysys/my_alarm.c new file mode 100644 index 00000000..ee11dc9c --- /dev/null +++ b/mysys/my_alarm.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2000 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Function to set a varible when we got a alarm */ +/* Used by my_lock samt functions in m_alarm.h */ + + +#include "mysys_priv.h" +#include "my_alarm.h" + +#ifdef HAVE_ALARM + + /* ARGSUSED */ +sig_handler my_set_alarm_variable(int signo __attribute__((unused))) +{ + my_have_got_alarm=1; /* Tell program that time expired */ + return; +} + +#endif /* HAVE_ALARM */ diff --git a/mysys/my_alloc.c b/mysys/my_alloc.c new file mode 100644 index 00000000..eff5a031 --- /dev/null +++ b/mysys/my_alloc.c @@ -0,0 +1,671 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates + Copyright (c) 2010, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Routines to handle mallocing of results which will be freed the same time */ + +#include +#include +#include +#include +#ifdef HAVE_SYS_MMAN_H +#include +#endif + +#undef EXTRA_DEBUG +#define EXTRA_DEBUG + +#ifndef DBUG_OFF +/* Put a protected barrier after every element when using multi_alloc_root() */ +#define ALLOC_BARRIER +#endif + +/* data packed in MEM_ROOT -> min_malloc */ + +/* Don't allocate too small blocks */ +#define ROOT_MIN_BLOCK_SIZE 256 + +/* bits in MEM_ROOT->flags */ +#define ROOT_FLAG_THREAD_SPECIFIC 1 +#define ROOT_FLAG_MPROTECT 2 + +#define MALLOC_FLAG(R) MYF((R)->flags & ROOT_FLAG_THREAD_SPECIFIC ? THREAD_SPECIFIC : 0) + +#define TRASH_MEM(X) TRASH_FREE(((char*)(X) + ((X)->size-(X)->left)), (X)->left) + + +/* + Alloc memory through either my_malloc or mmap() +*/ + +static void *root_alloc(MEM_ROOT *root, size_t size, size_t *alloced_size, + myf my_flags) +{ + *alloced_size= size; +#if defined(HAVE_MMAP) && defined(HAVE_MPROTECT) && defined(MAP_ANONYMOUS) + if (root->flags & ROOT_FLAG_MPROTECT) + { + void *res; + *alloced_size= MY_ALIGN(size, my_system_page_size); + res= my_mmap(0, *alloced_size, PROT_READ | PROT_WRITE, + MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (res == MAP_FAILED) + res= 0; + return res; + } +#endif /* HAVE_MMAP */ + + return my_malloc(root->psi_key, size, + my_flags | MYF(root->flags & ROOT_FLAG_THREAD_SPECIFIC ? + MY_THREAD_SPECIFIC : 0)); +} + +static void root_free(MEM_ROOT *root, void *ptr, size_t size) +{ +#if defined(HAVE_MMAP) && defined(HAVE_MPROTECT) && defined(MAP_ANONYMOUS) + if (root->flags & ROOT_FLAG_MPROTECT) + my_munmap(ptr, size); + else +#endif + my_free(ptr); +} + + +/* + Calculate block sizes to use + + Sizes will be updated to next power of 2, minus operating system + memory management size. + + The idea is to reduce memory fragmentation as most system memory + allocators are using power of 2 block size internally. +*/ + +static void calculate_block_sizes(MEM_ROOT *mem_root, size_t block_size, + size_t *pre_alloc_size) +{ + size_t pre_alloc= *pre_alloc_size; + + if (mem_root->flags & ROOT_FLAG_MPROTECT) + { + mem_root->block_size= MY_ALIGN(block_size, my_system_page_size); + if (pre_alloc) + pre_alloc= MY_ALIGN(pre_alloc, my_system_page_size); + } + else + { + DBUG_ASSERT(block_size <= UINT_MAX32); + mem_root->block_size= (my_round_up_to_next_power((uint32) block_size - + MALLOC_OVERHEAD)- + MALLOC_OVERHEAD); + if (pre_alloc) + pre_alloc= (my_round_up_to_next_power((uint32) pre_alloc - + MALLOC_OVERHEAD)- + MALLOC_OVERHEAD); + } + *pre_alloc_size= pre_alloc; +} + + +/* + Initialize memory root + + SYNOPSIS + init_alloc_root() + key - key to register instrumented memory + mem_root - memory root to initialize + block_size - size of chunks (blocks) used for memory allocation. + Will be updated to next power of 2, minus + internal and system memory management size. This is + will reduce memory fragmentation as most system memory + allocators are using power of 2 block size internally. + (It is external size of chunk i.e. it should include + memory required for internal structures, thus it + should be no less than ROOT_MIN_BLOCK_SIZE). + pre_alloc_size - if non-0, then size of block that should be + pre-allocated during memory root initialization. + my_flags MY_THREAD_SPECIFIC flag for my_malloc + MY_RROOT_USE_MPROTECT for read only protected memory + + DESCRIPTION + This function prepares memory root for further use, sets initial size of + chunk for memory allocation and pre-allocates first block if specified. + Although error can happen during execution of this function if + pre_alloc_size is non-0 it won't be reported. Instead it will be + reported as error in first alloc_root() on this memory root. +*/ + +void init_alloc_root(PSI_memory_key key, MEM_ROOT *mem_root, size_t block_size, + size_t pre_alloc_size __attribute__((unused)), + myf my_flags) +{ + DBUG_ENTER("init_alloc_root"); + DBUG_PRINT("enter",("root: %p prealloc: %zu", mem_root, pre_alloc_size)); + + mem_root->free= mem_root->used= mem_root->pre_alloc= 0; + mem_root->min_malloc= 32 + REDZONE_SIZE; + mem_root->block_size= MY_MAX(block_size, ROOT_MIN_BLOCK_SIZE); + mem_root->flags= 0; + DBUG_ASSERT(!test_all_bits(mem_root->flags, + (MY_THREAD_SPECIFIC | MY_ROOT_USE_MPROTECT))); + if (my_flags & MY_THREAD_SPECIFIC) + mem_root->flags|= ROOT_FLAG_THREAD_SPECIFIC; + if (my_flags & MY_ROOT_USE_MPROTECT) + mem_root->flags|= ROOT_FLAG_MPROTECT; + + calculate_block_sizes(mem_root, block_size, &pre_alloc_size); + + mem_root->error_handler= 0; + mem_root->block_num= 4; /* We shift this with >>2 */ + mem_root->first_block_usage= 0; + mem_root->psi_key= key; +#ifdef PROTECT_STATEMENT_MEMROOT + mem_root->read_only= 0; +#endif + +#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG)) + if (pre_alloc_size) + { + size_t alloced_size; + if ((mem_root->free= mem_root->pre_alloc= + (USED_MEM*) root_alloc(mem_root, pre_alloc_size, &alloced_size, + MYF(0)))) + { + mem_root->free->size= alloced_size; + mem_root->free->left= alloced_size - ALIGN_SIZE(sizeof(USED_MEM)); + mem_root->free->next= 0; + TRASH_MEM(mem_root->free); + } + } +#endif + DBUG_VOID_RETURN; +} + +/* + SYNOPSIS + reset_root_defaults() + mem_root memory root to change defaults of + block_size new value of block size. Must be greater or equal + than ALLOC_ROOT_MIN_BLOCK_SIZE (this value is about + 68 bytes and depends on platform and compilation flags) + pre_alloc_size new size of preallocated block. If not zero, + must be equal to or greater than block size, + otherwise means 'no prealloc'. + DESCRIPTION + Function aligns and assigns new value to block size; then it tries to + reuse one of existing blocks as prealloc block, or malloc new one of + requested size. If no blocks can be reused, all unused blocks are freed + before allocation. +*/ + +void reset_root_defaults(MEM_ROOT *mem_root, size_t block_size, + size_t pre_alloc_size __attribute__((unused))) +{ + DBUG_ENTER("reset_root_defaults"); + DBUG_ASSERT(alloc_root_inited(mem_root)); + + calculate_block_sizes(mem_root, block_size, &pre_alloc_size); + +#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG)) + if (pre_alloc_size) + { + size_t size= mem_root->block_size, alloced_size; + if (!mem_root->pre_alloc || + mem_root->pre_alloc->size != mem_root->block_size) + { + USED_MEM *mem, **prev= &mem_root->free; + /* + Free unused blocks, so that consequent calls + to reset_root_defaults won't eat away memory. + */ + while (*prev) + { + mem= *prev; + if (mem->size == size) + { + /* We found a suitable block, no need to do anything else */ + mem_root->pre_alloc= mem; + DBUG_VOID_RETURN; + } + if (mem->left + ALIGN_SIZE(sizeof(USED_MEM)) == mem->size) + { + /* remove block from the list and free it */ + *prev= mem->next; + root_free(mem_root, mem, mem->size); + } + else + prev= &mem->next; + } + /* Allocate new prealloc block and add it to the end of free list */ + if ((mem= (USED_MEM *) root_alloc(mem_root, size, &alloced_size, + MYF(MY_WME)))) + { + mem->size= alloced_size; + mem->left= alloced_size - ALIGN_SIZE(sizeof(USED_MEM)); + mem->next= *prev; + *prev= mem_root->pre_alloc= mem; + TRASH_MEM(mem); + } + else + mem_root->pre_alloc= 0; + } + } + else +#endif + mem_root->pre_alloc= 0; + + DBUG_VOID_RETURN; +} + + +void *alloc_root(MEM_ROOT *mem_root, size_t length) +{ + size_t get_size, block_size; + uchar* point; + reg1 USED_MEM *next= 0; + reg2 USED_MEM **prev; + size_t original_length __attribute__((unused)) = length; + DBUG_ENTER("alloc_root"); + DBUG_PRINT("enter",("root: %p", mem_root)); + DBUG_ASSERT(alloc_root_inited(mem_root)); + +#ifdef PROTECT_STATEMENT_MEMROOT + DBUG_ASSERT(mem_root->read_only == 0); +#endif + + DBUG_EXECUTE_IF("simulate_out_of_memory", + { + if (mem_root->error_handler) + (*mem_root->error_handler)(); + DBUG_SET("-d,simulate_out_of_memory"); + DBUG_RETURN((void*) 0); /* purecov: inspected */ + }); + +#if defined(HAVE_valgrind) && defined(EXTRA_DEBUG) + if (!(mem_root->flags & ROOT_FLAG_MPROTECT)) + { + length+= ALIGN_SIZE(sizeof(USED_MEM)); + if (!(next = (USED_MEM*) my_malloc(mem_root->psi_key, length, + MYF(MY_WME | ME_FATAL | + (mem_root->flags & + ROOT_FLAG_THREAD_SPECIFIC ? + MY_THREAD_SPECIFIC : 0))))) + { + if (mem_root->error_handler) + (*mem_root->error_handler)(); + DBUG_RETURN((uchar*) 0); /* purecov: inspected */ + } + next->next= mem_root->used; + next->left= 0; + next->size= length; + mem_root->used= next; + DBUG_PRINT("exit",("ptr: %p", (((char*)next)+ALIGN_SIZE(sizeof(USED_MEM))))); + DBUG_RETURN((((uchar*) next)+ALIGN_SIZE(sizeof(USED_MEM)))); + } +#endif /* defined(HAVE_valgrind) && defined(EXTRA_DEBUG) */ + + length= ALIGN_SIZE(length) + REDZONE_SIZE; + if ((*(prev= &mem_root->free)) != NULL) + { + if ((*prev)->left < length && + mem_root->first_block_usage++ >= ALLOC_MAX_BLOCK_USAGE_BEFORE_DROP && + (*prev)->left < ALLOC_MAX_BLOCK_TO_DROP) + { + next= *prev; + *prev= next->next; /* Remove block from list */ + next->next= mem_root->used; + mem_root->used= next; + mem_root->first_block_usage= 0; + } + for (next= *prev ; next && next->left < length ; next= next->next) + prev= &next->next; + } + if (! next) + { /* Time to alloc new block */ + size_t alloced_length; + + /* Increase block size over time if there is a lot of mallocs */ + block_size= (MY_ALIGN(mem_root->block_size, ROOT_MIN_BLOCK_SIZE) * + (mem_root->block_num >> 2)- MALLOC_OVERHEAD); + get_size= length + ALIGN_SIZE(sizeof(USED_MEM)); + get_size= MY_MAX(get_size, block_size); + + if (!(next= (USED_MEM*) root_alloc(mem_root, get_size, &alloced_length, + MYF(MY_WME | ME_FATAL)))) + { + if (mem_root->error_handler) + (*mem_root->error_handler)(); + DBUG_RETURN((void*) 0); /* purecov: inspected */ + } + mem_root->block_num++; + next->next= *prev; + next->size= alloced_length; + next->left= alloced_length - ALIGN_SIZE(sizeof(USED_MEM)); + *prev=next; + TRASH_MEM(next); + } + + point= (uchar*) ((char*) next+ (next->size-next->left)); + /*TODO: next part may be unneded due to mem_root->first_block_usage counter*/ + if ((next->left-= length) < mem_root->min_malloc) + { /* Full block */ + *prev= next->next; /* Remove block from list */ + next->next= mem_root->used; + mem_root->used= next; + mem_root->first_block_usage= 0; + } + point+= REDZONE_SIZE; + TRASH_ALLOC(point, original_length); + DBUG_PRINT("exit",("ptr: %p", point)); + DBUG_RETURN((void*) point); +} + + +/* + Allocate many pointers at the same time. + + DESCRIPTION + ptr1, ptr2, etc all point into big allocated memory area. + + SYNOPSIS + multi_alloc_root() + root Memory root + ptr1, length1 Multiple arguments terminated by a NULL pointer + ptr2, length2 ... + ... + NULL + + RETURN VALUE + A pointer to the beginning of the allocated memory block + in case of success or NULL if out of memory. +*/ + +void *multi_alloc_root(MEM_ROOT *root, ...) +{ + va_list args; + char **ptr, *start, *res; + size_t tot_length, length; + DBUG_ENTER("multi_alloc_root"); + /* + We don't need to do DBUG_PRINT here as it will be done when alloc_root + is called + */ + + va_start(args, root); + tot_length= 0; + while ((ptr= va_arg(args, char **))) + { + length= va_arg(args, uint); + tot_length+= ALIGN_SIZE(length); +#ifdef ALLOC_BARRIER + tot_length+= ALIGN_SIZE(1); +#endif + } + va_end(args); + + if (!(start= (char*) alloc_root(root, tot_length))) + DBUG_RETURN(0); /* purecov: inspected */ + + va_start(args, root); + res= start; + while ((ptr= va_arg(args, char **))) + { + *ptr= res; + length= va_arg(args, uint); + res+= ALIGN_SIZE(length); +#ifdef ALLOC_BARRIER + TRASH_FREE(res, ALIGN_SIZE(1)); + res+= ALIGN_SIZE(1); +#endif + } + va_end(args); + DBUG_RETURN((void*) start); +} + + +#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG)) +/** Mark all data in blocks free for reusage */ + +static inline void mark_blocks_free(MEM_ROOT* root) +{ + reg1 USED_MEM *next; + reg2 USED_MEM **last; + + /* iterate through (partially) free blocks, mark them free */ + last= &root->free; + for (next= root->free; next; next= *(last= &next->next)) + { + next->left= next->size - ALIGN_SIZE(sizeof(USED_MEM)); + TRASH_MEM(next); + } + + /* Combine the free and the used list */ + *last= next=root->used; + + /* now go through the used blocks and mark them free */ + for (; next; next= next->next) + { + next->left= next->size - ALIGN_SIZE(sizeof(USED_MEM)); + TRASH_MEM(next); + } + + /* Now everything is set; Indicate that nothing is used anymore */ + root->used= 0; + root->first_block_usage= 0; + root->block_num= 4; +} +#endif + + +/* + Deallocate everything used by alloc_root or just move + used blocks to free list if called with MY_USED_TO_FREE + + SYNOPSIS + free_root() + root Memory root + MyFlags Flags for what should be freed: + + MY_MARK_BLOCKS_FREED Don't free blocks, just mark them free + MY_KEEP_PREALLOC If this is not set, then free also the + preallocated block + + NOTES + One can call this function either with root block initialised with + init_alloc_root() or with a bzero()-ed block. + It's also safe to call this multiple times with the same mem_root. +*/ + +void free_root(MEM_ROOT *root, myf MyFlags) +{ + reg1 USED_MEM *next,*old; + DBUG_ENTER("free_root"); + DBUG_PRINT("enter",("root: %p flags: %lu", root, MyFlags)); + +#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG)) + /* + There is no point in using mark_blocks_free when using valgrind as + it will not reclaim any memory + */ + if (MyFlags & MY_MARK_BLOCKS_FREE) + { + mark_blocks_free(root); + DBUG_VOID_RETURN; + } +#endif + if (!(MyFlags & MY_KEEP_PREALLOC)) + root->pre_alloc=0; + + for (next=root->used; next ;) + { + old=next; next= next->next ; + if (old != root->pre_alloc) + root_free(root, old, old->size); + } + for (next=root->free ; next ;) + { + old=next; next= next->next; + if (old != root->pre_alloc) + root_free(root, old, old->size); + } + root->used=root->free=0; + if (root->pre_alloc) + { + root->free=root->pre_alloc; + root->free->left=root->pre_alloc->size-ALIGN_SIZE(sizeof(USED_MEM)); + TRASH_MEM(root->pre_alloc); + root->free->next=0; + } + root->block_num= 4; + root->first_block_usage= 0; + DBUG_VOID_RETURN; +} + + +/* + Find block that contains an object and set the pre_alloc to it +*/ + +void set_prealloc_root(MEM_ROOT *root, char *ptr) +{ + USED_MEM *next; + for (next=root->used; next ; next=next->next) + { + if ((char*) next <= ptr && (char*) next + next->size > ptr) + { + root->pre_alloc=next; + return; + } + } + for (next=root->free ; next ; next=next->next) + { + if ((char*) next <= ptr && (char*) next + next->size > ptr) + { + root->pre_alloc=next; + return; + } + } +} + + +/* + Remember last MEM_ROOT block. + + This allows one to free all new allocated blocks. +*/ + +USED_MEM *get_last_memroot_block(MEM_ROOT* root) +{ + return root->used ? root->used : root->pre_alloc; +} + +/* + Free all newly allocated blocks +*/ + +void free_all_new_blocks(MEM_ROOT *root, USED_MEM *last_block) +{ + USED_MEM *old, *next; + if (!root->used) + return; /* Nothing allocated */ + return; + /* + Free everying allocated up to, but not including, last_block. + However do not go past pre_alloc as we do not want to free + that one. This should not be a problem as in almost all normal + usage pre_alloc is last in the list. + */ + + for (next= root->used ; + next && next != last_block && next != root->pre_alloc ; ) + { + old= next; next= next->next; + root_free(root, old, old->size); + } + root->used= next; + root->block_num= 4; + root->first_block_usage= 0; +} + +/** + Change protection for all blocks in the mem root +*/ + +#if defined(HAVE_MMAP) && defined(HAVE_MPROTECT) && defined(MAP_ANONYMOUS) +void protect_root(MEM_ROOT *root, int prot) +{ + reg1 USED_MEM *next,*old; + DBUG_ENTER("protect_root"); + DBUG_PRINT("enter",("root: %p prot: %d", root, prot)); + + DBUG_ASSERT(root->flags & ROOT_FLAG_MPROTECT); + + for (next= root->used; next ;) + { + old= next; next= next->next ; + mprotect(old, old->size, prot); + } + for (next= root->free; next ;) + { + old= next; next= next->next ; + mprotect(old, old->size, prot); + } + DBUG_VOID_RETURN; +} +#else +void protect_root(MEM_ROOT *root, int prot) +{ +} +#endif /* defined(HAVE_MMAP) && ... */ + + +char *strdup_root(MEM_ROOT *root, const char *str) +{ + return strmake_root(root, str, strlen(str)); +} + + +char *strmake_root(MEM_ROOT *root, const char *str, size_t len) +{ + char *pos; + if ((pos=alloc_root(root,len+1))) + { + if (len) + memcpy(pos,str,len); + pos[len]=0; + } + return pos; +} + + +void *memdup_root(MEM_ROOT *root, const void *str, size_t len) +{ + char *pos; + if ((pos=alloc_root(root,len)) && len) + memcpy(pos,str,len); + return pos; +} + +LEX_CSTRING safe_lexcstrdup_root(MEM_ROOT *root, const LEX_CSTRING str) +{ + LEX_CSTRING res; + if (str.length) + res.str= strmake_root(root, str.str, str.length); + else + res.str= (const char *)""; + res.length= str.length; + return res; +} diff --git a/mysys/my_atomic_writes.c b/mysys/my_atomic_writes.c new file mode 100644 index 00000000..90d0f1d0 --- /dev/null +++ b/mysys/my_atomic_writes.c @@ -0,0 +1,533 @@ +/* Copyright (c) 2016, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "mysys_priv.h" + +my_bool my_may_have_atomic_write= IF_WIN(1,0); + +#ifdef __linux__ + +my_bool has_shannon_atomic_write, has_fusion_io_atomic_write, + has_sfx_atomic_write; +my_bool has_sfx_card; + +#include + +/* Linux seems to allow up to 15 partitions per block device. +Partition number 0 is the whole block device. */ +# define SAME_DEV(fs_dev, blk_dev) \ + (fs_dev == blk_dev) || ((fs_dev & ~15U) == blk_dev) + +/*********************************************************************** + FUSION_IO +************************************************************************/ + +/** FusionIO atomic write control info */ +#define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint) + + +/** + Check if the system has a funsion_io card + @return TRUE Card exists +*/ + +static my_bool test_if_fusion_io_card_exists() +{ + /* Fusion card requires fallocate to exists */ +#ifndef HAVE_POSIX_FALLOCATE + return 0; +#else + return (access("/dev/fcta", F_OK)) == 0; +#endif +} + + +/** + Check if a file is on a Fusion_IO device and that it supports atomic_write + @param[in] file OS file handle + @param[in] page_size page size + @return TRUE Atomic write supported +*/ + +static my_bool fusion_io_has_atomic_write(File file, int page_size) +{ + int atomic= 1; + if (page_size <= 32768 && + ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic) != -1) + return(TRUE); + return(FALSE); +} + + +/*********************************************************************** + SHANNON +************************************************************************/ + +#define SHANNON_IOMAGIC 'x' +#define SHANNON_IOCQATOMIC_SIZE _IO(SHANNON_IOMAGIC, 22) + +#define SHANNON_MAX_DEVICES 32 +#define SHANNON_NO_ATOMIC_SIZE_YET -2 + +struct shannon_dev +{ + char dev_name[32]; + dev_t st_dev; + int atomic_size; +}; + + +static struct shannon_dev shannon_devices[SHANNON_MAX_DEVICES+1]; + +/** + Check if the system has a Shannon card + If card exists, record device numbers to allow us to later check if + a given file is on this device. + @return TRUE Card exists +*/ + +static my_bool test_if_shannon_card_exists() +{ + uint shannon_found_devices= 0; + char dev_part; + uint dev_no; + + if (access("/dev/scta", F_OK) < 0) + return 0; + + /* + The Shannon devices are /dev/dfX, where X can be from a-z. + We have to check all of them as some may be missing if the user + removed one with the U.2 interface. + */ + + for (dev_part= 'a' ; dev_part < 'z' ; dev_part++) + { + char path[32]; + struct stat stat_buff; + + snprintf(path, sizeof(path), "/dev/df%c", dev_part); +#ifdef TEST_SHANNON + if (stat(path, &stat_buff) < 0) + { + printf("%s(): stat %s failed.\n", __func__, path); + break; + } +#endif + shannon_devices[shannon_found_devices].st_dev= stat_buff.st_rdev; + snprintf(shannon_devices[shannon_found_devices].dev_name, + sizeof(shannon_devices[shannon_found_devices].dev_name), + "/dev/sct%c", + dev_part); + +#ifdef TEST_SHANNON + printf("%s(): i=%d, stat_buff.st_dev=0x%lx, stat_buff.st_rdev=0x%lx, st_rdev=0x%lx, dev_name=%s\n", + __func__, + shannon_found_devices, + (ulong) stat_buff.st_dev, + (ulong) stat_buff.st_rdev, + (ulong) shannon_devices[shannon_found_devices].st_dev, + shannon_devices[shannon_found_devices].dev_name); +#endif + + /* + The atomic size will be checked on first access. This is needed + as a normal user can't open the /dev/scta file + */ + shannon_devices[shannon_found_devices].atomic_size= + SHANNON_NO_ATOMIC_SIZE_YET; + if (++shannon_found_devices== SHANNON_MAX_DEVICES) + goto end; + + for (dev_no= 1 ; dev_no < 9 ; dev_no++) + { + snprintf(path, sizeof(path), "/dev/df%c%d", dev_part, dev_no); + if (stat(path, &stat_buff) < 0) + break; + + shannon_devices[shannon_found_devices].st_dev= stat_buff.st_rdev; + snprintf(shannon_devices[shannon_found_devices].dev_name, + sizeof(shannon_devices[shannon_found_devices].dev_name), + "/dev/sct%c%d", + dev_part, dev_no); + +#ifdef TEST_SHANNON + printf("%s(): i=%d, st_dev=0x%lx, st_rdev=0x%lx, dev_name=%s\n", + __func__, + shannon_found_devices, + (ulong) stat_buff.st_dev, + (ulong) shannon_devices[shannon_found_devices].st_dev, + shannon_devices[shannon_found_devices].dev_name); +#endif + + /* + The atomic size will be checked on first access. This is needed + as a normal user can't open the /dev/scta file + */ + shannon_devices[shannon_found_devices].atomic_size= + SHANNON_NO_ATOMIC_SIZE_YET; + if (++shannon_found_devices == SHANNON_MAX_DEVICES) + goto end; + } + } +end: + shannon_devices[shannon_found_devices].st_dev= 0; + return shannon_found_devices > 0; +} + + +static my_bool shannon_dev_has_atomic_write(struct shannon_dev *dev, + int page_size) +{ +#ifdef TEST_SHANNON + printf("%s: enter: page_size=%d, atomic_size=%d, dev_name=%s\n", + __func__, + page_size, + dev->atomic_size, + dev->dev_name); +#endif + if (dev->atomic_size == SHANNON_NO_ATOMIC_SIZE_YET) + { + int fd= open(dev->dev_name, 0); + if (fd < 0) + { + fprintf(stderr, "Unable to determine if atomic writes are supported:" + " open(\"%s\"): %m\n", dev->dev_name); + dev->atomic_size= 0; /* Don't try again */ + return FALSE; + } + dev->atomic_size= ioctl(fd, SHANNON_IOCQATOMIC_SIZE); + close(fd); + } + +#ifdef TEST_SHANNON + printf("%s: exit: page_size=%d, atomic_size=%d, dev_name=%s\n", + __func__, + page_size, + dev->atomic_size, + dev->dev_name); +#endif + return (page_size <= dev->atomic_size); +} + + +/** + Check if a file is on a Shannon device and that it supports atomic_write + @param[in] file OS file handle + @param[in] page_size page size + @return TRUE Atomic write supported + + @notes + This is called only at first open of a file. In this case it doesn't + matter so much that we loop over all cards. + We update the atomic size on first access. +*/ + +static my_bool shannon_has_atomic_write(File file, int page_size) +{ + struct shannon_dev *dev; + struct stat stat_buff; + + if (fstat(file, &stat_buff) < 0) + { +#ifdef TEST_SHANNON + printf("%s(): fstat failed\n", __func__); +#endif + return 0; + } + +#ifdef TEST_SHANNON + printf("%s(): st_dev=0x%lx, st_rdev=0x%lx\n", __func__, + (ulong) stat_buff.st_dev, (ulong) stat_buff.st_rdev); +#endif + + for (dev= shannon_devices ; dev->st_dev; dev++) + { +#ifdef TEST_SHANNON + printf("%s(): st_rdev=0x%lx\n", __func__, (ulong) dev->st_dev); +#endif + if (SAME_DEV(stat_buff.st_dev, dev->st_dev)) + return shannon_dev_has_atomic_write(dev, page_size); + } + return 0; +} + + +/*********************************************************************** + ScaleFlux +************************************************************************/ + +#define SFX_GET_ATOMIC_SIZE _IOR('N', 0x243, int) +#define SFX_MAX_DEVICES (32) +#define SFX_UNKNOWN_ATOMIC_WRITE_YET (-2) +#define SFX_MAX_ATOMIC_SIZE (256 * 1024) + +#define SFX_GET_SPACE_RATIO _IO('N', 0x244) +#define SFX_UNKNOWN_PUNCH_HOLE_YET (-3) + +/** + Threshold for logical_space / physical_space + No less than the threshold means we can disable hole punching +*/ +#define SFX_DISABLE_PUNCH_HOLE_RATIO (2) + +struct sfx_dev +{ + char dev_name[32]; + dev_t st_dev; + int atomic_write; + int disable_punch_hole; +}; + +static struct sfx_dev sfx_devices[SFX_MAX_DEVICES + 1]; + +/** + Check if the system has a ScaleFlux card + If card exists, record device numbers to allow us to later check if + a given file is on this device + Variables for atomic_write and disable_punch_hole will be initialized + @return TRUE Card exists +*/ + +static my_bool test_if_sfx_card_exists() +{ + uint sfx_found_devices = 0; + uint dev_num; + + for (dev_num = 0; dev_num < SFX_MAX_DEVICES; dev_num++) + { + struct stat stat_buff; + + sprintf(sfx_devices[sfx_found_devices].dev_name, "/dev/sfdv%dn1", + dev_num); + if (stat(sfx_devices[sfx_found_devices].dev_name, &stat_buff) < 0) + break; + + sfx_devices[sfx_found_devices].st_dev= stat_buff.st_rdev; + /* + The atomic size will be checked on first access. This is needed + as a normal user can't open the /dev/sfdvXn1 file + */ + sfx_devices[sfx_found_devices].atomic_write= SFX_UNKNOWN_ATOMIC_WRITE_YET; + sfx_devices[sfx_found_devices].disable_punch_hole= + SFX_UNKNOWN_PUNCH_HOLE_YET; + if (++sfx_found_devices == SFX_MAX_DEVICES) + goto end; + } +end: + sfx_devices[sfx_found_devices].st_dev= 0; + has_sfx_card = (sfx_found_devices > 0); + + return sfx_found_devices > 0; +} + +static my_bool sfx_dev_has_atomic_write(struct sfx_dev *dev, + int page_size) +{ + int result= -1, max_atomic_size= SFX_MAX_ATOMIC_SIZE; + + if (dev->atomic_write == SFX_UNKNOWN_ATOMIC_WRITE_YET) + { + int fd= open(dev->dev_name, 0); + if (fd < 0) + fprintf(stderr, "Unable to determine if atomic writes are supported:" + " open(\"%s\"): %m\n", dev->dev_name); + else + { + result= ioctl(fd, SFX_GET_ATOMIC_SIZE, &max_atomic_size); + close(fd); + } + dev->atomic_write= result == 0 && page_size <= max_atomic_size; + } + + return dev->atomic_write; +} + +/** + Check if a file is on a ScaleFlux device and that it supports atomic_write + @param[in] file OS file handle + @param[in] page_size page size + @return TRUE Atomic write supported + + @notes + This is called only at first open of a file. In this case it doesn't + matter so much that we loop over all cards. + We update the atomic size on first access. +*/ + +static my_bool sfx_has_atomic_write(File file, int page_size) +{ + struct sfx_dev *dev; + struct stat stat_buff; + + if (fstat(file, &stat_buff) == 0) + for (dev= sfx_devices; dev->st_dev; dev++) + if (SAME_DEV(stat_buff.st_dev, dev->st_dev)) + return sfx_dev_has_atomic_write(dev, page_size); + return 0; +} + +static my_bool sfx_dev_could_disable_punch_hole(struct sfx_dev *dev, File file) +{ + int result = 0; + + if (dev->disable_punch_hole == SFX_UNKNOWN_PUNCH_HOLE_YET) + { + int fd= open(dev->dev_name, 0); + if (fd < 0) + { + fprintf(stderr, "Unable to determine if thin provisioning is used:" + " open(\"%s\"): %m\n", dev->dev_name); + dev->disable_punch_hole= 0; /* Don't try again */ + return FALSE; + } + + /* + Ratio left-shifts 8 (multiplies 256) inside the ioctl; + will also add 1 to guarantee a round-up integer. + */ + result= ioctl(fd, SFX_GET_SPACE_RATIO); + result+= 1; + dev->disable_punch_hole= (result >= (((double)SFX_DISABLE_PUNCH_HOLE_RATIO) * 256)); + } + + return dev->disable_punch_hole; +} + +/** + Check if a file is on a ScaleFlux device and whether it is possible to + disable hole punch. + @param[in] file OS file handle + @return TRUE Could disable hole punch + + @notes + This is called only at first open of a file. In this case it's doesn't + matter so much that we loop over all cards +*/ + +static my_bool sfx_could_disable_punch_hole(File file) +{ + struct sfx_dev *dev; + struct stat stat_buff; + + if (fstat(file, &stat_buff) == 0) + for (dev = sfx_devices; dev->st_dev; dev++) + if (SAME_DEV(stat_buff.st_dev, dev->st_dev)) + return sfx_dev_could_disable_punch_hole(dev, file); + return 0; +} + +/*********************************************************************** + Generic atomic write code +************************************************************************/ + +/** + Initialize the atomic write subsystem. + Checks if we have any devices that supports atomic write +*/ + +void my_init_atomic_write(void) +{ + has_shannon_atomic_write= test_if_shannon_card_exists(); + has_fusion_io_atomic_write= test_if_fusion_io_card_exists(); + has_sfx_atomic_write= test_if_sfx_card_exists(); + + my_may_have_atomic_write= has_shannon_atomic_write || + has_fusion_io_atomic_write || has_sfx_atomic_write; + +#ifdef TEST_SHANNON + printf("%s(): has_shannon_atomic_write=%d, my_may_have_atomic_write=%d\n", + __func__, + has_shannon_atomic_write, + my_may_have_atomic_write); +#endif +} + + +/** + Check if a file supports atomic write + + @return FALSE No atomic write support + TRUE File supports atomic write +*/ + +my_bool my_test_if_atomic_write(File handle, int page_size) +{ +#ifdef TEST_SHANNON + printf("%s(): has_shannon_atomic_write=%d, my_may_have_atomic_write=%d\n", + __func__, + has_shannon_atomic_write, + my_may_have_atomic_write); +#endif + if (!my_may_have_atomic_write) + return 0; + + if (has_shannon_atomic_write && + shannon_has_atomic_write(handle, page_size)) + return 1; + + if (has_fusion_io_atomic_write && + fusion_io_has_atomic_write(handle, page_size)) + return 1; + + if (has_sfx_atomic_write && + sfx_has_atomic_write(handle, page_size)) + return 1; + + return 0; +} + + +/** + Check if a file resides on thinly provisioned storage. + + @return FALSE File cannot disable hole punch + TRUE File could disable hole punch +*/ + +my_bool my_test_if_thinly_provisioned(File handle) +{ + if (has_sfx_card && sfx_could_disable_punch_hole(handle)) + return 1; + + return 0; +} + +#ifdef TEST_SHANNON +int main() +{ + int fd, ret; + + my_init_atomic_write(); + fd= open("/u01/1.file", O_RDWR); + ret= my_test_if_atomic_write(fd, 4096); + if (ret) + printf("support atomic_write\n"); + else + printf("do not support atomic_write\n"); + close(fd); + return 0; +} +#endif + + +#else /* __linux__ */ + +/* Dummy functions to provide the interfaces for other systems */ + +void my_init_atomic_write(void) +{ +} +#endif /* __linux__ */ diff --git a/mysys/my_basename.c b/mysys/my_basename.c new file mode 100644 index 00000000..54c4b0aa --- /dev/null +++ b/mysys/my_basename.c @@ -0,0 +1,42 @@ +/* Copyright (C) 2011 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +/** + @brief retrieve last component of the filename. + Loosely based on Single Unix Spec definition. + + @fn my_basename() + @param filename Filename +*/ +const char *my_basename(const char *filename) +{ + const char *last; + const char *s=filename; + + /* Handle basename()'s special cases, as per single unix spec */ + if (!filename || !filename[0]) + return "."; + if(filename[0] == '/' && filename[1]== '\0') + return filename; + + for(last= s; *s; s++) + { + if (*s == '/' || *s == '\\') + last= s + 1; + } + return last; +} diff --git a/mysys/my_bit.c b/mysys/my_bit.c new file mode 100644 index 00000000..d3130ea4 --- /dev/null +++ b/mysys/my_bit.c @@ -0,0 +1,46 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include + +#include + + +/* + perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)' +*/ +const uchar _my_bits_reverse_table[256]={ +0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, +0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, +0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, +0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, +0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, +0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, +0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, +0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, +0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, +0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, +0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, +0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, +0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, +0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, +0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, +0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, +0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, +0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, +0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, +0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; + diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c new file mode 100644 index 00000000..9893c7e4 --- /dev/null +++ b/mysys/my_bitmap.c @@ -0,0 +1,695 @@ +/* + Copyright (c) 2001, 2011, Oracle and/or its affiliates. + Copyright (C) 2009- 2011 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Handling of uchar arrays as large bitmaps. + + API limitations (or, rather asserted safety assumptions, + to encourage correct programming) + + * the internal size is a set of 32 bit words + * the number of bits specified in creation can be any number > 0 + + TODO: + Make assembler thread safe versions of these using test-and-set instructions + + Original version created by Sergei Golubchik 2001 - 2004. + New version written and test program added and some changes to the interface + was made by Mikael Ronstrom 2005, with assistance of Tomas Ulin and Mats + Kindahl. +*/ + +#include "mysys_priv.h" +#include +#include +#include + +/* + Create a mask with the upper 'unused' bits set and the lower 'used' + bits clear. The bits within each byte is stored in big-endian order. +*/ + +static inline uchar invers_last_byte_mask(uint bits) +{ + return last_byte_mask(bits) ^ 255; +} + + +void create_last_word_mask(MY_BITMAP *map) +{ + unsigned char const mask= invers_last_byte_mask(map->n_bits); + + /* + The first bytes are to be set to zero since they represent real bits + in the bitvector. The last bytes are set to 0xFF since they represent + bytes not used by the bitvector. Finally the last byte contains bits + as set by the mask above. + */ + unsigned char *ptr= (unsigned char*)&map->last_word_mask; + + map->last_word_ptr= map->bitmap + no_words_in_map(map)-1; + switch (no_bytes_in_map(map) & 3) { + case 1: + map->last_word_mask= ~0U; + ptr[0]= mask; + return; + case 2: + map->last_word_mask= ~0U; + ptr[0]= 0; + ptr[1]= mask; + return; + case 3: + map->last_word_mask= 0U; + ptr[2]= mask; + ptr[3]= 0xFFU; + return; + case 0: + map->last_word_mask= 0U; + ptr[3]= mask; + return; + } +} + + +static inline my_bitmap_map last_word_mask(uint bit) +{ + my_bitmap_map last_word_mask; + uint n_bits= bit + 1; + unsigned char const mask= invers_last_byte_mask(n_bits); + + /* + The first bytes are to be set to zero since they represent real bits + in the bitvector. The last bytes are set to 0xFF since they represent + bytes not used by the bitvector. Finally the last byte contains bits + as set by the mask above. + */ + unsigned char *ptr= (unsigned char*)&last_word_mask; + + switch ((n_bits + 7)/8 & 3) { + case 1: + last_word_mask= ~0U; + ptr[0]= mask; + break; + case 2: + last_word_mask= ~0U; + ptr[0]= 0; + ptr[1]= mask; + break; + case 3: + last_word_mask= 0U; + ptr[2]= mask; + ptr[3]= 0xFFU; + break; + case 0: + last_word_mask= 0U; + ptr[3]= mask; + break; + } + return last_word_mask; +} + + +static inline uint get_first_set(my_bitmap_map value, uint word_pos) +{ + uchar *byte_ptr= (uchar*)&value; + uchar byte_value; + uint byte_pos, bit_pos; + + DBUG_ASSERT(value); + for (byte_pos=0; ; byte_pos++, byte_ptr++) + { + if ((byte_value= *byte_ptr)) + { + for (bit_pos=0; ; bit_pos++) + if (byte_value & (1 << bit_pos)) + return (word_pos*32) + (byte_pos*8) + bit_pos; + } + } + return MY_BIT_NONE; /* Impossible */ +} + +/* + Initialize a bitmap object. All bits will be set to zero +*/ + +my_bool my_bitmap_init(MY_BITMAP *map, my_bitmap_map *buf, uint n_bits) +{ + DBUG_ENTER("my_bitmap_init"); + if (!buf) + { + uint size_in_bytes= bitmap_buffer_size(n_bits); + if (!(buf= (my_bitmap_map*) my_malloc(key_memory_MY_BITMAP_bitmap, + size_in_bytes, MYF(MY_WME)))) + DBUG_RETURN(1); + } + + map->bitmap= buf; + map->n_bits= n_bits; + create_last_word_mask(map); + bitmap_clear_all(map); + DBUG_RETURN(0); +} + + +void my_bitmap_free(MY_BITMAP *map) +{ + DBUG_ENTER("my_bitmap_free"); + if (map->bitmap) + { + my_free(map->bitmap); + map->bitmap=0; + } + DBUG_VOID_RETURN; +} + + +/* + test if bit already set and set it if it was not (thread unsafe method) + + SYNOPSIS + bitmap_fast_test_and_set() + MAP bit map struct + BIT bit number + + RETURN + 0 bit was not set + !=0 bit was set +*/ + +my_bool bitmap_fast_test_and_set(MY_BITMAP *map, uint bitmap_bit) +{ + uchar *value= ((uchar*) map->bitmap) + (bitmap_bit / 8); + uchar bit= 1 << ((bitmap_bit) & 7); + uchar res= (*value) & bit; + *value|= bit; + return res; +} + + +/* + test if bit already set and set it if it was not (thread safe method) + + SYNOPSIS + bitmap_fast_test_and_set() + map bit map struct + bitmap_bit bit number + + RETURN + 0 bit was not set + !=0 bit was set +*/ + +my_bool bitmap_test_and_set(MY_BITMAP *map, uint bitmap_bit) +{ + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(bitmap_bit < map->n_bits); + return bitmap_fast_test_and_set(map, bitmap_bit); +} + +/* + test if bit already set and clear it if it was set(thread unsafe method) + + SYNOPSIS + bitmap_fast_test_and_set() + MAP bit map struct + BIT bit number + + RETURN + 0 bit was not set + !=0 bit was set +*/ + +my_bool bitmap_fast_test_and_clear(MY_BITMAP *map, uint bitmap_bit) +{ + uchar *byte= (uchar*) map->bitmap + (bitmap_bit / 8); + uchar bit= 1 << ((bitmap_bit) & 7); + uchar res= (*byte) & bit; + *byte&= ~bit; + return res; +} + + +my_bool bitmap_test_and_clear(MY_BITMAP *map, uint bitmap_bit) +{ + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(bitmap_bit < map->n_bits); + return bitmap_fast_test_and_clear(map, bitmap_bit); +} + + +uint bitmap_set_next(MY_BITMAP *map) +{ + uint bit_found; + DBUG_ASSERT(map->bitmap); + if ((bit_found= bitmap_get_first(map)) != MY_BIT_NONE) + bitmap_set_bit(map, bit_found); + return bit_found; +} + + +/** + Set the specified number of bits in the bitmap buffer. + + @param map [IN] Bitmap + @param prefix_size [IN] Number of bits to be set +*/ +void bitmap_set_prefix(MY_BITMAP *map, uint prefix_size) +{ + uint prefix_bytes, prefix_bits, d; + uchar *m= (uchar *)map->bitmap; + + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(prefix_size <= map->n_bits || prefix_size == (uint) ~0); + set_if_smaller(prefix_size, map->n_bits); + if ((prefix_bytes= prefix_size / 8)) + memset(m, 0xff, prefix_bytes); + m+= prefix_bytes; + if ((prefix_bits= prefix_size & 7)) + { + *(m++)= (1 << prefix_bits)-1; + // As the prefix bits are set, lets count this byte too as a prefix byte. + prefix_bytes ++; + } + if ((d= no_bytes_in_map(map)-prefix_bytes)) + memset(m, 0, d); +} + + +my_bool bitmap_is_prefix(const MY_BITMAP *map, uint prefix_size) +{ + uint prefix_mask= last_byte_mask(prefix_size); + uchar *m= (uchar*) map->bitmap; + uchar *end_prefix= m+(prefix_size-1)/8; + uchar *end; + DBUG_ASSERT(m); + DBUG_ASSERT(prefix_size <= map->n_bits); + + /* Empty prefix is always true */ + if (!prefix_size) + return 1; + + while (m < end_prefix) + if (*m++ != 0xff) + return 0; + + end= ((uchar*) map->bitmap) + no_bytes_in_map(map) - 1; + if (m == end) + return ((*m & last_byte_mask(map->n_bits)) == prefix_mask); + + if (*m != prefix_mask) + return 0; + + while (++m < end) + if (*m != 0) + return 0; + return ((*m & last_byte_mask(map->n_bits)) == 0); +} + + +my_bool bitmap_is_set_all(const MY_BITMAP *map) +{ + my_bitmap_map *data_ptr= map->bitmap; + my_bitmap_map *end= map->last_word_ptr; + for (; data_ptr < end; data_ptr++) + if (*data_ptr != 0xFFFFFFFF) + return FALSE; + return (*data_ptr | map->last_word_mask) == 0xFFFFFFFF; +} + + +my_bool bitmap_is_clear_all(const MY_BITMAP *map) +{ + my_bitmap_map *data_ptr= map->bitmap; + my_bitmap_map *end= map->last_word_ptr; + + DBUG_ASSERT(map->n_bits > 0); + for (; data_ptr < end; data_ptr++) + if (*data_ptr) + return FALSE; + return (*data_ptr & ~map->last_word_mask) == 0; +} + +/* Return TRUE if map1 is a subset of map2 */ + +my_bool bitmap_is_subset(const MY_BITMAP *map1, const MY_BITMAP *map2) +{ + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + + DBUG_ASSERT(map1->bitmap && map2->bitmap); + DBUG_ASSERT(map1->n_bits==map2->n_bits); + + end= map1->last_word_ptr; + while (m1 < end) + { + if ((*m1++) & ~(*m2++)) + return 0; + } + /* here both maps have the same number of bits - see assert above */ + return ((*m1 & ~*m2 & ~map1->last_word_mask) ? 0 : 1); +} + +/* True if bitmaps has any common bits */ + +my_bool bitmap_is_overlapping(const MY_BITMAP *map1, const MY_BITMAP *map2) +{ + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + + DBUG_ASSERT(map1->bitmap); + DBUG_ASSERT(map2->bitmap); + DBUG_ASSERT(map1->n_bits==map2->n_bits); + + end= map1->last_word_ptr; + while (m1 < end) + { + if ((*m1++) & (*m2++)) + return 1; + } + /* here both maps have the same number of bits - see assert above */ + return ((*m1 & *m2 & ~map1->last_word_mask) ? 1 : 0); +} + + +void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2) +{ + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; + uint len= no_words_in_map(map), len2 = no_words_in_map(map2); + + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(map2->bitmap); + + end= to+MY_MIN(len,len2); + while (to < end) + *to++ &= *from++; + + if (len2 <= len) + { + to[-1]&= ~map2->last_word_mask; /* Clear last not relevant bits */ + end+= len-len2; + while (to < end) + *to++= 0; + } +} + + +/* + Check if there is some bit index between start_bit and end_bit, such that + this is bit is set for all bitmaps in bitmap_list. + + SYNOPSIS + bitmap_exists_intersection() + bitmpap_array [in] a set of MY_BITMAPs + bitmap_count [in] number of elements in bitmpap_array + start_bit [in] beginning (inclusive) of the range of bits to search + end_bit [in] end (inclusive) of the range of bits to search, must be + no bigger than the bits of the shortest bitmap. + + NOTES + This function assumes that for at least one of the bitmaps in bitmap_array all + bits outside the range [start_bit, end_bit] are 0. As a result is not + necessary to take care of the bits outside the range [start_bit, end_bit]. + + RETURN + TRUE if an intersecion exists + FALSE no intersection +*/ + +my_bool bitmap_exists_intersection(const MY_BITMAP **bitmap_array, + uint bitmap_count, + uint start_bit, uint end_bit) +{ + uint i, j, start_idx, end_idx; + my_bitmap_map cur_res; + + DBUG_ASSERT(bitmap_count); + DBUG_ASSERT(end_bit >= start_bit); + for (j= 0; j < bitmap_count; j++) + DBUG_ASSERT(end_bit < bitmap_array[j]->n_bits); + + start_idx= start_bit/8/sizeof(my_bitmap_map); + end_idx= end_bit/8/sizeof(my_bitmap_map); + + for (i= start_idx; i < end_idx; i++) + { + cur_res= ~0; + for (j= 0; cur_res && j < bitmap_count; j++) + cur_res &= bitmap_array[j]->bitmap[i]; + if (cur_res) + return TRUE; + } + cur_res= ~last_word_mask(end_bit); + for (j= 0; cur_res && j < bitmap_count; j++) + cur_res &= bitmap_array[j]->bitmap[end_idx]; + return cur_res != 0; +} + + +/* True if union of bitmaps have all bits set */ + +my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2) +{ + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + + DBUG_ASSERT(map1->bitmap); + DBUG_ASSERT(map2->bitmap); + DBUG_ASSERT(map1->n_bits==map2->n_bits); + end= map1->last_word_ptr; + while ( m1 < end) + if ((*m1++ | *m2++) != 0xFFFFFFFF) + return FALSE; + /* here both maps have the same number of bits - see assert above */ + return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFF); +} + + + +/* + Set/clear all bits above a bit. + + SYNOPSIS + bitmap_set_above() + map RETURN The bitmap to change. + from_byte The bitmap buffer byte offset to start with. + use_bit The bit value (1/0) to use for all upper bits. + + NOTE + You can only set/clear full bytes. + The function is meant for the situation that you copy a smaller bitmap + to a bigger bitmap. Bitmap lengths are always multiple of eigth (the + size of a byte). Using 'from_byte' saves multiplication and division + by eight during parameter passing. + + RETURN + void +*/ + +void bitmap_set_above(MY_BITMAP *map, uint from_byte, uint use_bit) +{ + uchar use_byte= use_bit ? 0xff : 0; + uchar *to= (uchar *)map->bitmap + from_byte; + uchar *end= (uchar *)map->bitmap + (map->n_bits+7)/8; + + while (to < end) + *to++= use_byte; +} + + +void bitmap_subtract(MY_BITMAP *map, const MY_BITMAP *map2) +{ + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(map2->bitmap); + DBUG_ASSERT(map->n_bits==map2->n_bits); + + end= map->last_word_ptr; + + while (to <= end) + *to++ &= ~(*from++); +} + + +void bitmap_union(MY_BITMAP *map, const MY_BITMAP *map2) +{ + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; + + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(map2->bitmap); + DBUG_ASSERT(map->n_bits == map2->n_bits); + end= map->last_word_ptr; + + while (to <= end) + *to++ |= *from++; +} + + +void bitmap_xor(MY_BITMAP *map, const MY_BITMAP *map2) +{ + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(map2->bitmap); + DBUG_ASSERT(map->n_bits == map2->n_bits); + while (to <= end) + *to++ ^= *from++; +} + + +void bitmap_invert(MY_BITMAP *map) +{ + my_bitmap_map *to= map->bitmap, *end; + + DBUG_ASSERT(map->bitmap); + end= map->last_word_ptr; + + while (to <= end) + *to++ ^= 0xFFFFFFFF; +} + + +uint bitmap_bits_set(const MY_BITMAP *map) +{ + my_bitmap_map *data_ptr= map->bitmap; + my_bitmap_map *end= map->last_word_ptr; + uint res= 0; + DBUG_ASSERT(map->bitmap); + + for (; data_ptr < end; data_ptr++) + res+= my_count_bits_uint32(*data_ptr); + + /*Reset last bits to zero*/ + res+= my_count_bits_uint32(*map->last_word_ptr & ~map->last_word_mask); + return res; +} + +void bitmap_copy(MY_BITMAP *map, const MY_BITMAP *map2) +{ + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; + + DBUG_ASSERT(map->bitmap); + DBUG_ASSERT(map2->bitmap); + DBUG_ASSERT(map->n_bits == map2->n_bits); + end= map->last_word_ptr; + + while (to <= end) + *to++ = *from++; +} + + +uint bitmap_get_first_set(const MY_BITMAP *map) +{ + uint i; + my_bitmap_map *data_ptr= map->bitmap, *end= map->last_word_ptr; + + DBUG_ASSERT(map->bitmap); + + for (i=0; data_ptr < end; data_ptr++, i++) + if (*data_ptr) + goto found; + if (!(*data_ptr & ~map->last_word_mask)) + return MY_BIT_NONE; + +found: + return get_first_set(*data_ptr, i); +} + + +/** + Get the next set bit. + + @param map Bitmap + @param bitmap_bit Bit to start search from + + @return Index to first bit set after bitmap_bit +*/ + +uint bitmap_get_next_set(const MY_BITMAP *map, uint bitmap_bit) +{ + uint word_pos, byte_to_mask, i; + union { my_bitmap_map bitmap ; uchar bitmap_buff[sizeof(my_bitmap_map)]; } + first_word; + uchar *ptr= &first_word.bitmap_buff[0]; + my_bitmap_map *data_ptr, *end= map->last_word_ptr; + + DBUG_ASSERT(map->bitmap); + + /* Look for the next bit */ + bitmap_bit++; + if (bitmap_bit >= map->n_bits) + return MY_BIT_NONE; + word_pos= bitmap_bit / 32; + data_ptr= map->bitmap + word_pos; + first_word.bitmap= *data_ptr; + + /* Mask out previous bits from first_word */ + byte_to_mask= (bitmap_bit % 32) / 8; + for (i= 0; i < byte_to_mask; i++) + ptr[i]= 0; + ptr[byte_to_mask]&= 0xFFU << (bitmap_bit & 7); + + if (data_ptr == end) + { + if (first_word.bitmap & ~map->last_word_mask) + return get_first_set(first_word.bitmap, word_pos); + else + return MY_BIT_NONE; + } + + if (first_word.bitmap) + return get_first_set(first_word.bitmap, word_pos); + + for (data_ptr++, word_pos++; data_ptr < end; data_ptr++, word_pos++) + if (*data_ptr) + return get_first_set(*data_ptr, word_pos); + + if (!(*end & ~map->last_word_mask)) + return MY_BIT_NONE; + return get_first_set(*end, word_pos); +} + + +/* Get first free bit */ + +uint bitmap_get_first(const MY_BITMAP *map) +{ + uchar *byte_ptr; + uint i,j,k; + my_bitmap_map *data_ptr, *end= map->last_word_ptr; + + DBUG_ASSERT(map->bitmap); + data_ptr= map->bitmap; + *map->last_word_ptr|= map->last_word_mask; + + for (i=0; data_ptr < end; data_ptr++, i++) + if (*data_ptr != 0xFFFFFFFF) + goto found; + if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFF) + return MY_BIT_NONE; + +found: + byte_ptr= (uchar*)data_ptr; + for (j=0; ; j++, byte_ptr++) + { + if (*byte_ptr != 0xFF) + { + for (k=0; ; k++) + { + if (!(*byte_ptr & (1 << k))) + return (i*32) + (j*8) + k; + } + } + } + DBUG_ASSERT(0); + return MY_BIT_NONE; /* Impossible */ +} diff --git a/mysys/my_chmod.c b/mysys/my_chmod.c new file mode 100644 index 00000000..3d086393 --- /dev/null +++ b/mysys/my_chmod.c @@ -0,0 +1,48 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" + +/** + @brief Change mode of file. + + @fn my_chmod() + @param name Filename + @param mode_t Mode + @param my_flags Flags + + @notes + The mode of the file given by path or referenced by fildes is changed + + @retval 0 Ok + @retval # Error +*/ + +int my_chmod(const char *name, mode_t mode, myf my_flags) +{ + DBUG_ENTER("my_chmod"); + DBUG_PRINT("my",("name: %s mode: %lu flags: %lu", name, (ulong) mode, + my_flags)); + + if (chmod(name, mode)) + { + my_errno= errno; + if (my_flags & MY_WME) + my_error(EE_CANT_CHMOD, MYF(0), name, (ulong) mode, my_errno); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} diff --git a/mysys/my_chsize.c b/mysys/my_chsize.c new file mode 100644 index 00000000..f1affb80 --- /dev/null +++ b/mysys/my_chsize.c @@ -0,0 +1,101 @@ +/* Copyright (c) 2000-2007 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include "m_string.h" + +/* + Change size of file. + + SYNOPSIS + my_chsize() + fd File descriptor + new_length New file size + filler If we don't have truncate, fill up all bytes after + new_length with this character + MyFlags Flags + + DESCRIPTION + my_chsize() truncates file if shorter else fill with the filler character. + The function also changes the file pointer. Usually it points to the end + of the file after execution. + + RETURN VALUE + 0 Ok + 1 Error +*/ +int my_chsize(File fd, my_off_t newlength, int filler, myf MyFlags) +{ + my_off_t oldsize; + uchar buff[IO_SIZE]; + DBUG_ENTER("my_chsize"); + DBUG_PRINT("my",("fd: %d length: %lu MyFlags: %lu",fd,(ulong) newlength, + MyFlags)); + + if ((oldsize= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE))) == newlength) + DBUG_RETURN(0); + + DBUG_PRINT("info",("old_size: %ld", (ulong) oldsize)); + + if (oldsize > newlength) + { +#ifdef _WIN32 + if (my_win_chsize(fd, newlength)) + { + my_errno= errno; + goto err; + } + DBUG_RETURN(0); +#elif defined(HAVE_FTRUNCATE) + if (ftruncate(fd, (off_t) newlength)) + { + my_errno= errno; + goto err; + } + DBUG_RETURN(0); +#else + /* + Fill space between requested length and true length with 'filler' + We should never come here on any modern machine + */ + if (my_seek(fd, newlength, MY_SEEK_SET, MYF(MY_WME+MY_FAE)) + == MY_FILEPOS_ERROR) + { + goto err; + } + swap_variables(my_off_t, newlength, oldsize); +#endif + } + + /* Full file with 'filler' until it's as big as requested */ + bfill(buff, IO_SIZE, filler); + while (newlength-oldsize > IO_SIZE) + { + if (my_write(fd, buff, IO_SIZE, MYF(MY_NABP))) + goto err; + oldsize+= IO_SIZE; + } + if (my_write(fd,buff,(size_t) (newlength-oldsize), MYF(MY_NABP))) + goto err; + DBUG_RETURN(0); + +err: + DBUG_PRINT("error", ("errno: %d", errno)); + if (MyFlags & MY_WME) + my_error(EE_CANT_CHSIZE, MYF(ME_BELL), my_errno); + DBUG_RETURN(1); +} /* my_chsize */ diff --git a/mysys/my_compare.c b/mysys/my_compare.c new file mode 100644 index 00000000..d1326dc9 --- /dev/null +++ b/mysys/my_compare.c @@ -0,0 +1,633 @@ +/* Copyright (c) 2011, Oracle and/or its affiliates. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include +#include + + +static int compare_bin(const uchar *a, uint a_length, + const uchar *b, uint b_length, + my_bool part_key, my_bool skip_end_space) +{ + uint length= MY_MIN(a_length,b_length); + const uchar *end= a+ length; + int flag; + + while (a < end) + if ((flag= (int) *a++ - (int) *b++)) + return flag; + if (part_key && b_length < a_length) + return 0; + if (skip_end_space && a_length != b_length) + { + int swap= 1; + /* + We are using space compression. We have to check if longer key + has next character < ' ', in which case it's less than the shorter + key that has an implicite space afterwards. + + This code is identical to the one in + strings/ctype-simple.c:my_strnncollsp_simple + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return (*a < ' ') ? -swap : swap; + } + return 0; + } + return (int) (a_length-b_length); +} + + +/* + Compare two keys + + SYNOPSIS + ha_key_cmp() + keyseg Array of key segments of key to compare + a First key to compare, in format from _mi_pack_key() + This is always from the row + b Second key to compare. This is from the row or the user + key_length Length of key to compare, based on key b. This can be shorter + than b to just compare sub keys + next_flag How keys should be compared + If bit SEARCH_FIND is not set the keys includes the row + position and this should also be compared + If SEARCH_PAGE_KEY_HAS_TRANSID is set then 'a' has transid + If SEARCH_USER_KEY_HAS_TRANSID is set then 'b' has transid + diff_pos OUT Number of first keypart where values differ, counting + from one. + diff_pos[1] OUT (b + diff_pos[1]) points to first value in tuple b + that is different from corresponding value in tuple a. + + EXAMPLES + Example1: if the function is called for tuples + ('aaa','bbb') and ('eee','fff'), then + diff_pos[0] = 1 (as 'aaa' != 'eee') + diff_pos[1] = 0 (offset from beginning of tuple b to 'eee' keypart). + + Example2: if the index function is called for tuples + ('aaa','bbb') and ('aaa','fff'), + diff_pos[0] = 2 (as 'aaa' != 'eee') + diff_pos[1] = 3 (offset from beginning of tuple b to 'fff' keypart, + here we assume that first key part is CHAR(3) NOT NULL) + + NOTES + Number-keys can't be splited + + RETURN VALUES + <0 If a < b + 0 If a == b + >0 If a > b +*/ + +#define FCMP(A,B) ((int) (A) - (int) (B)) + +int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a, + const uchar *b, uint key_length, uint32 nextflag, + uint *diff_pos) +{ + int flag; + int16 s_1,s_2; + int32 l_1,l_2; + uint32 u_1,u_2; + float f_1,f_2; + double d_1,d_2; + uint next_key_length; + const uchar *orig_b= b; + + *diff_pos=0; + for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++) + { + const uchar *end; + uint piks=! (keyseg->flag & HA_NO_SORT); + (*diff_pos)++; + diff_pos[1]= (uint)(b - orig_b); + + /* Handle NULL part */ + if (keyseg->null_bit) + { + key_length--; + if (*a != *b && piks) + { + flag = (int) *a - (int) *b; + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + } + b++; + if (!*a++) /* If key was NULL */ + { + if ((nextflag & (SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT | + SEARCH_NULL_ARE_EQUAL)) == + (SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT)) + { + /* Allow duplicate keys */ + nextflag= (nextflag & ~(SEARCH_FIND | SEARCH_UPDATE)) | SEARCH_SAME; + } + else if (nextflag & SEARCH_NULL_ARE_NOT_EQUAL) + { + /* + This is only used from mi_check() to calculate cardinality. + It can't be used when searching for a key as this would cause + compare of (a,b) and (b,a) to return the same value. + */ + return -1; + } + next_key_length=key_length; + continue; /* To next key part */ + } + } + end= a+ MY_MIN(keyseg->length,key_length); + next_key_length=key_length-keyseg->length; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: /* Ascii; Key is converted */ + if (keyseg->flag & HA_SPACE_PACK) + { + int a_length,b_length,pack_length; + get_key_length(a_length,a); + get_key_pack_length(b_length,pack_length,b); + next_key_length=key_length-b_length-pack_length; + + if (piks && + (flag= ha_compare_char_fixed(keyseg->charset, + a, a_length, + b, b_length, + keyseg->length / keyseg->charset->mbmaxlen, + (my_bool) ((nextflag & SEARCH_PREFIX) && + next_key_length <= 0)))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a+=a_length; + b+=b_length; + break; + } + else + { + uint length=(uint) (end-a), a_length=length, b_length=length; + if (piks && + (flag= ha_compare_char_fixed(keyseg->charset, + a, a_length, + b, b_length, + keyseg->length / keyseg->charset->mbmaxlen, + (my_bool) ((nextflag & SEARCH_PREFIX) && + next_key_length <= 0)))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a=end; + b+=length; + } + break; + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_BIT: + if (keyseg->flag & HA_SPACE_PACK) + { + int a_length,b_length,pack_length; + get_key_length(a_length,a); + get_key_pack_length(b_length,pack_length,b); + next_key_length=key_length-b_length-pack_length; + + if (piks && + (flag=compare_bin(a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && + next_key_length <= 0),1))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a+=a_length; + b+=b_length; + break; + } + else + { + uint length=keyseg->length; + if (piks && + (flag=compare_bin(a,length,b,length, + (my_bool) ((nextflag & SEARCH_PREFIX) && + next_key_length <= 0),0))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a+=length; + b+=length; + } + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + { + int a_length,b_length,pack_length; + get_key_length(a_length,a); + get_key_pack_length(b_length,pack_length,b); + next_key_length=key_length-b_length-pack_length; + + if (piks && + (flag= ha_compare_char_varying(keyseg->charset, + a, a_length, + b, b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && + next_key_length <= 0)))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a+= a_length; + b+= b_length; + break; + } + break; + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + { + int a_length,b_length,pack_length; + get_key_length(a_length,a); + get_key_pack_length(b_length,pack_length,b); + next_key_length=key_length-b_length-pack_length; + + if (piks && + (flag=compare_bin(a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && + next_key_length <= 0), 0))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a+=a_length; + b+=b_length; + } + break; + case HA_KEYTYPE_INT8: + { + int i_1= (int) *((signed char*) a); + int i_2= (int) *((signed char*) b); + if (piks && (flag = CMP_NUM(i_1,i_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b++; + break; + } + case HA_KEYTYPE_SHORT_INT: + s_1= mi_sint2korr(a); + s_2= mi_sint2korr(b); + if (piks && (flag = CMP_NUM(s_1,s_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 2; /* sizeof(short int); */ + break; + case HA_KEYTYPE_USHORT_INT: + { + uint16 us_1,us_2; + us_1= mi_sint2korr(a); + us_2= mi_sint2korr(b); + if (piks && (flag = CMP_NUM(us_1,us_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+=2; /* sizeof(short int); */ + break; + } + case HA_KEYTYPE_LONG_INT: + l_1= mi_sint4korr(a); + l_2= mi_sint4korr(b); + if (piks && (flag = CMP_NUM(l_1,l_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 4; /* sizeof(long int); */ + break; + case HA_KEYTYPE_ULONG_INT: + u_1= mi_sint4korr(a); + u_2= mi_sint4korr(b); + if (piks && (flag = CMP_NUM(u_1,u_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 4; /* sizeof(long int); */ + break; + case HA_KEYTYPE_INT24: + l_1=mi_sint3korr(a); + l_2=mi_sint3korr(b); + if (piks && (flag = CMP_NUM(l_1,l_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 3; + break; + case HA_KEYTYPE_UINT24: + l_1=mi_uint3korr(a); + l_2=mi_uint3korr(b); + if (piks && (flag = CMP_NUM(l_1,l_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 3; + break; + case HA_KEYTYPE_FLOAT: + mi_float4get(f_1,a); + mi_float4get(f_2,b); + /* + The following may give a compiler warning about floating point + comparison not being safe, but this is ok in this context as + we are bascily doing sorting + */ + if (piks && (flag = CMP_NUM(f_1,f_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 4; /* sizeof(float); */ + break; + case HA_KEYTYPE_DOUBLE: + mi_float8get(d_1,a); + mi_float8get(d_2,b); + /* + The following may give a compiler warning about floating point + comparison not being safe, but this is ok in this context as + we are bascily doing sorting + */ + if (piks && (flag = CMP_NUM(d_1,d_2))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 8; /* sizeof(double); */ + break; + case HA_KEYTYPE_NUM: /* Numeric key */ + { + int swap_flag= 0; + int alength,blength; + + if (keyseg->flag & HA_REVERSE_SORT) + { + swap_variables(const uchar*, a, b); + swap_flag=1; /* Remember swap of a & b */ + end= a+ (int) (end-b); + } + if (keyseg->flag & HA_SPACE_PACK) + { + alength= *a++; blength= *b++; + end=a+alength; + next_key_length=key_length-blength-1; + } + else + { + alength= (int) (end-a); + blength=keyseg->length; + /* remove pre space from keys */ + for ( ; alength && *a == ' ' ; a++, alength--) ; + for ( ; blength && *b == ' ' ; b++, blength--) ; + } + if (piks) + { + if (*a == '-') + { + if (*b != '-') + return -1; + a++; b++; + swap_variables(const uchar*, a, b); + swap_variables(int, alength, blength); + swap_flag=1-swap_flag; + alength--; blength--; + end=a+alength; + } + else if (*b == '-') + return 1; + while (alength && (*a == '+' || *a == '0')) + { + a++; alength--; + } + while (blength && (*b == '+' || *b == '0')) + { + b++; blength--; + } + if (alength != blength) + return (alength < blength) ? -1 : 1; + while (a < end) + if (*a++ != *b++) + return ((int) a[-1] - (int) b[-1]); + } + else + { + b+=(end-a); + a=end; + } + + if (swap_flag) /* Restore pointers */ + swap_variables(const uchar*, a, b); + break; + } +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + { + longlong ll_a,ll_b; + ll_a= mi_sint8korr(a); + ll_b= mi_sint8korr(b); + if (piks && (flag = CMP_NUM(ll_a,ll_b))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 8; + break; + } + case HA_KEYTYPE_ULONGLONG: + { + ulonglong ll_a,ll_b; + ll_a= mi_uint8korr(a); + ll_b= mi_uint8korr(b); + if (piks && (flag = CMP_NUM(ll_a,ll_b))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a= end; + b+= 8; + break; + } +#endif + case HA_KEYTYPE_END: /* Ready */ + goto end; /* diff_pos is incremented */ + } + } + (*diff_pos)++; +end: + if (!(nextflag & SEARCH_FIND)) + { + /* + Compare rowid and possible transid + This happens in the following case: + - INSERT, UPDATE, DELETE when we have not unique keys or + are using versioning + - SEARCH_NEXT, SEARCH_PREVIOUS when we need to restart search + + The logic for comparing transid are as follows: + Keys with have a transid have lowest bit in the rowidt. This means that + if we are comparing a key with a transid with another key that doesn't + have a tranid, we must reset the lowest bit for both keys. + + When we have transid, the keys are compared in transid order. + A key without a transid is regared to be smaller than a key with + a transid. + */ + + uint i; + uchar key_mask, tmp_a, tmp_b; + + if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) /* Find record after key */ + return (nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; + key_mask= (uchar) 255; + + if (!(nextflag & (SEARCH_USER_KEY_HAS_TRANSID | + SEARCH_PAGE_KEY_HAS_TRANSID))) + { + /* + Neither key has a trid. Only compare row id's and don't + try to store rows in trid order + */ + key_length= keyseg->length; + nextflag&= ~SEARCH_INSERT; + } + else + { + /* + Set key_mask so that we reset the last bit in the rowid before + we compare it. This is needed as the lowest bit in the rowid is + used to mark if the key has a transid or not. + */ + key_mask= (uchar) 254; + if (!test_all_bits(nextflag, (SEARCH_USER_KEY_HAS_TRANSID | + SEARCH_PAGE_KEY_HAS_TRANSID))) + { + /* + No transaction id for user key or for key on page + Ignore transid as at least one of the keys are visible for all + */ + key_length= keyseg->length; + } + else + { + /* + Both keys have trids. No need of special handling of incomplete + trids below. + */ + nextflag&= ~SEARCH_INSERT; + } + } + DBUG_ASSERT(key_length > 0); + + for (i= key_length-1 ; (int) i-- > 0 ; ) + { + if (*a++ != *b++) + { + flag= FCMP(a[-1],b[-1]); + goto found; + } + } + tmp_a= *a & key_mask; + tmp_b= *b & key_mask; + flag= FCMP(tmp_a, tmp_b); + + if (flag == 0 && (nextflag & SEARCH_INSERT)) + { + /* + Ensure that on insert we get rows stored in trid order. + If one of the parts doesn't have a trid, this should be regarded + as smaller than the other + */ + return (nextflag & SEARCH_USER_KEY_HAS_TRANSID) ? -1 : 1; + } +found: + if (nextflag & SEARCH_SAME) + return (flag); /* read same */ + if (nextflag & SEARCH_BIGGER) + return (flag <= 0 ? -1 : 1); /* read next */ + return (flag < 0 ? -1 : 1); /* read previous */ + } + return 0; +} /* ha_key_cmp */ + +/* + Find the first NULL value in index-suffix values tuple + + SYNOPSIS + ha_find_null() + keyseg Array of keyparts for key suffix + a Key suffix value tuple + + DESCRIPTION + Find the first NULL value in index-suffix values tuple. + + TODO + Consider optimizing this function or its use so we don't search for + NULL values in completely NOT NULL index suffixes. + + RETURN + First key part that has NULL as value in values tuple, or the last key + part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain + NULLs. +*/ + +HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, const uchar *a) +{ + for (; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++) + { + const uchar *end; + if (keyseg->null_bit) + { + if (!*a++) + return keyseg; + } + end= a+ keyseg->length; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_BIT: + if (keyseg->flag & HA_SPACE_PACK) + { + int a_length; + get_key_length(a_length, a); + a += a_length; + break; + } + else + a= end; + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + { + int a_length; + get_key_length(a_length, a); + a+= a_length; + break; + } + case HA_KEYTYPE_NUM: + if (keyseg->flag & HA_SPACE_PACK) + { + int alength= *a++; + end= a+alength; + } + a= end; + break; + case HA_KEYTYPE_INT8: + case HA_KEYTYPE_SHORT_INT: + case HA_KEYTYPE_USHORT_INT: + case HA_KEYTYPE_LONG_INT: + case HA_KEYTYPE_ULONG_INT: + case HA_KEYTYPE_INT24: + case HA_KEYTYPE_UINT24: +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + case HA_KEYTYPE_ULONGLONG: +#endif + case HA_KEYTYPE_FLOAT: + case HA_KEYTYPE_DOUBLE: + a= end; + break; + case HA_KEYTYPE_END: /* purecov: inspected */ + /* keep compiler happy */ + DBUG_ASSERT(0); + break; + } + } + return keyseg; +} + diff --git a/mysys/my_compress.c b/mysys/my_compress.c new file mode 100644 index 00000000..e30d42c2 --- /dev/null +++ b/mysys/my_compress.c @@ -0,0 +1,186 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Written by Sinisa Milivojevic */ + +#include +#ifdef HAVE_COMPRESS +#include +#ifndef SCO +#include +#endif +#include + +/* + This replaces the packet with a compressed packet + + SYNOPSIS + my_compress() + packet Data to compress. This is is replaced with the compressed data. + len Length of data to compress at 'packet' + complen out: 0 if packet was not compressed + + RETURN + 1 error. 'len' is not changed' + 0 ok. In this case 'len' contains the size of the compressed packet +*/ + +my_bool my_compress(uchar *packet, size_t *len, size_t *complen) +{ + DBUG_ENTER("my_compress"); + if (*len < MIN_COMPRESS_LENGTH) + { + *complen=0; + DBUG_PRINT("note",("Packet too short: Not compressed")); + } + else + { + uchar *compbuf=my_compress_alloc(packet,len,complen); + if (!compbuf) + DBUG_RETURN(*complen ? 0 : 1); + memcpy(packet,compbuf,*len); + my_free(compbuf); + } + DBUG_RETURN(0); +} + + +void *my_az_allocator(void *dummy __attribute__((unused)), unsigned int items, + unsigned int size) +{ + return my_malloc(key_memory_my_compress_alloc, (size_t)items*(size_t)size, + MYF(0)); +} + +void my_az_free(void *dummy __attribute__((unused)), void *address) +{ + my_free(address); +} + +/* + This works like zlib compress(), but using custom memory allocators to work + better with my_malloc leak detection and Valgrind. +*/ +int my_compress_buffer(uchar *dest, size_t *destLen, + const uchar *source, size_t sourceLen) +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; + stream.next_out = (Bytef*)dest; + stream.avail_out = (uInt)*destLen; + if ((size_t)stream.avail_out != *destLen) + return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)my_az_allocator; + stream.zfree = (free_func)my_az_free; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, Z_DEFAULT_COMPRESSION); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + +uchar *my_compress_alloc(const uchar *packet, size_t *len, size_t *complen) +{ + uchar *compbuf; + int res; + *complen= *len * 120 / 100 + 12; + + if (!(compbuf= (uchar *) my_malloc(key_memory_my_compress_alloc, + *complen, MYF(MY_WME)))) + return 0; + + res= my_compress_buffer(compbuf, complen, packet, *len); + + if (res != Z_OK) + { + my_free(compbuf); + return 0; + } + + if (*complen >= *len) + { + *complen= 0; + my_free(compbuf); + DBUG_PRINT("note",("Packet got longer on compression; Not compressed")); + return 0; + } + /* Store length of compressed packet in *len */ + swap_variables(size_t, *len, *complen); + return compbuf; +} + + +/* + Uncompress packet + + SYNOPSIS + my_uncompress() + packet Compressed data. This is is replaced with the original data. + len Length of compressed data + complen Length of the packet buffer (must be enough for the original + data) + + RETURN + 1 error + 0 ok. In this case 'complen' contains the updated size of the + real data. +*/ + +my_bool my_uncompress(uchar *packet, size_t len, size_t *complen) +{ + uLongf tmp_complen; + DBUG_ENTER("my_uncompress"); + + if (*complen) /* If compressed */ + { + uchar *compbuf= (uchar *) my_malloc(key_memory_my_compress_alloc, + *complen,MYF(MY_WME)); + int error; + if (!compbuf) + DBUG_RETURN(1); /* Not enough memory */ + + tmp_complen= (uLongf) *complen; + error= uncompress((Bytef*) compbuf, &tmp_complen, (Bytef*) packet, + (uLong) len); + *complen= tmp_complen; + if (error != Z_OK) + { /* Probably wrong packet */ + DBUG_PRINT("error",("Can't uncompress packet, error: %d",error)); + my_free(compbuf); + DBUG_RETURN(1); + } + memcpy(packet, compbuf, *complen); + my_free(compbuf); + } + else + *complen= len; + DBUG_RETURN(0); +} + +#endif /* HAVE_COMPRESS */ diff --git a/mysys/my_copy.c b/mysys/my_copy.c new file mode 100644 index 00000000..3b07dd5f --- /dev/null +++ b/mysys/my_copy.c @@ -0,0 +1,151 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include /* for stat */ +#include +#if defined(HAVE_UTIME_H) +#include +#elif defined(HAVE_SYS_UTIME_H) +#include +#elif !defined(HPUX10) +#include +struct utimbuf { + time_t actime; + time_t modtime; +}; +#endif + + +/* + int my_copy(const char *from, const char *to, myf MyFlags) + + NOTES + Ordinary ownership and accesstimes are copied from 'from-file' + If MyFlags & MY_HOLD_ORIGINAL_MODES is set and to-file exists then + the modes of to-file isn't changed + If MyFlags & MY_DONT_OVERWRITE_FILE is set, we will give an error + if the file existed. + + WARNING + Don't set MY_FNABP or MY_NABP bits on when calling this function ! + + RETURN + 0 ok + # Error + +*/ + +int my_copy(const char *from, const char *to, myf MyFlags) +{ + size_t Count; + my_bool new_file_stat= 0; /* 1 if we could stat "to" */ + int create_flag; + File from_file,to_file; + uchar buff[IO_SIZE]; + MY_STAT stat_buff,new_stat_buff; + my_bool file_created= 0; + DBUG_ENTER("my_copy"); + DBUG_PRINT("my",("from %s to %s MyFlags %lu", from, to, MyFlags)); + + from_file=to_file= -1; + DBUG_ASSERT(!(MyFlags & (MY_FNABP | MY_NABP))); /* for my_read/my_write */ + if (MyFlags & MY_HOLD_ORIGINAL_MODES) /* Copy stat if possible */ + new_file_stat= MY_TEST(my_stat((char*) to, &new_stat_buff, MYF(0))); + + if ((from_file=my_open(from,O_RDONLY | O_SHARE,MyFlags)) >= 0) + { + if (!my_stat(from, &stat_buff, MyFlags)) + { + my_errno=errno; + goto err; + } + if (MyFlags & MY_HOLD_ORIGINAL_MODES && new_file_stat) + stat_buff=new_stat_buff; + create_flag= (MyFlags & MY_DONT_OVERWRITE_FILE) ? O_EXCL : O_TRUNC; + + if ((to_file= my_create(to,(int) stat_buff.st_mode, + O_WRONLY | create_flag | O_BINARY | O_SHARE, + MyFlags)) < 0) + goto err; + + file_created= 1; + while ((Count=my_read(from_file, buff, sizeof(buff), MyFlags)) != 0) + { + if (Count == (uint) -1 || + my_write(to_file,buff,Count,MYF(MyFlags | MY_NABP))) + goto err; + } + + /* sync the destination file */ + if (MyFlags & MY_SYNC) + { + if (my_sync(to_file, MyFlags)) + goto err; + } + + if (my_close(from_file,MyFlags) | my_close(to_file,MyFlags)) + DBUG_RETURN(-1); /* Error on close */ + + from_file=to_file= -1; /* Files are closed */ + + /* Copy modes if possible */ + + if (MyFlags & MY_HOLD_ORIGINAL_MODES && !new_file_stat) + DBUG_RETURN(0); /* File copyed but not stat */ + /* Copy modes */ + if (chmod(to, stat_buff.st_mode & 07777)) + { + my_errno= errno; + if (MyFlags & MY_WME) + my_error(EE_CHANGE_PERMISSIONS, MYF(ME_BELL), to, errno); + if (MyFlags & MY_FAE) + goto err; + } +#if !defined(_WIN32) + /* Copy ownership */ + if (chown(to, stat_buff.st_uid, stat_buff.st_gid)) + { + my_errno= errno; + if (MyFlags & MY_WME) + my_error(EE_CANT_COPY_OWNERSHIP, MYF(ME_BELL), to, errno); + if (MyFlags & MY_FAE) + goto err; + } +#endif + + if (MyFlags & MY_COPYTIME) + { + struct utimbuf timep; + timep.actime = stat_buff.st_atime; + timep.modtime = stat_buff.st_mtime; + (void) utime((char*) to, &timep); /* last accessed and modified times */ + } + + DBUG_RETURN(0); + } + +err: + if (from_file >= 0) (void) my_close(from_file,MyFlags); + if (to_file >= 0) (void) my_close(to_file, MyFlags); + + /* attempt to delete the to-file we've partially written */ + if (file_created) + (void) my_delete(to, MyFlags); + + DBUG_RETURN(-1); +} /* my_copy */ diff --git a/mysys/my_cpu.c b/mysys/my_cpu.c new file mode 100644 index 00000000..52500d78 --- /dev/null +++ b/mysys/my_cpu.c @@ -0,0 +1,79 @@ +/* Copyright (c) 2019, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include +#include +#include + +#ifdef HAVE_PAUSE_INSTRUCTION +/** How many times to invoke PAUSE in a loop */ +unsigned my_cpu_relax_multiplier = 200; + +#define PAUSE4 MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU() +#define PAUSE16 PAUSE4; PAUSE4; PAUSE4; PAUSE4 + +/** + Initialize my_cpu_relax_multiplier. + + Determine the duration of a PAUSE instruction by running an + unrolled loop of 16 PAUSE instructions twice, and taking the + faster of the two runs. In this way, even if the execution is + interrupted by the operating system, it should be extremely + unlikely that both loops get interrupted. + + On the Intel Skylake microarchitecture, the PAUSE instruction takes + around 140 clock cycles, while on earlier microarchitectures it could + be 10 clock cycles or less. Scale the PAUSE loop counter accordingly. + + On a pre-Skylake Intel Xeon CPU E5-2630 v4 @ 2.20GHz running an AMD64 + executable, the numbers would be between 172 and 220 when all the code + is inlined as follows: + + rdtsc,mov,shl,or, 16*pause, + rdtsc,mov,shl,or, 16*pause, + rdtsc. + + That would yield 11 to 14 cycles per PAUSE instruction even if we + (wrongly) ignore the overhead of the other instructions. + + On a Skylake mobile processor Intel Core i7-6500U CPU @ 2.50GHz, the + numbers would range from 1896 to 2410 (or 1976 if taking the minimum + of two runs), yielding 118 to 151 (or 123) cycles per PAUSE instruction. + + Let us define a threshold at roughly 30 cycles per PAUSE instruction, + and use a shorter delay if the PAUSE instruction takes longer than + that. In some AMD processors, the PAUSE instruction could take 40 or + 50 cycles. Let us use a shorter delay multiplier for them as well. + + The 1/2 scaling factor (200/100) was derived experimentally by + Steve Shaw from Intel and Sergey Vojtovich from MariaDB Foundation. + In an earlier experiment on MySQL code base, a 1/10 scaling factor + (200/20) seemed to work best. + + The basic idea of the detection algorithm (run 16 PAUSE instructions + between RDTSC) was suggested by Mikhail Sinyavin from Intel. +*/ +void my_cpu_init(void) +{ + ulonglong t0, t1, t2; + t0= my_timer_cycles(); + PAUSE16; + t1= my_timer_cycles(); + PAUSE16; + t2= my_timer_cycles(); + if (t2 - t1 > 30 * 16 && t1 - t0 > 30 * 16) + my_cpu_relax_multiplier= 100; +} +#endif diff --git a/mysys/my_create.c b/mysys/my_create.c new file mode 100644 index 00000000..6fb817da --- /dev/null +++ b/mysys/my_create.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2000, 2001, 2005-2008 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include +#include "mysys_err.h" +#include +#include +#if defined(_WIN32) +#include +#endif + + /* + ** Create a new file + ** Arguments: + ** Path-name of file + ** Read | write on file (umask value) + ** Read & Write on open file + ** Special flags + */ + + +File my_create(const char *FileName, int CreateFlags, int access_flags, + myf MyFlags) +{ + int fd; + DBUG_ENTER("my_create"); + DBUG_PRINT("my",("Name: '%s' CreateFlags: %d AccessFlags: %d MyFlags: %lu", + FileName, CreateFlags, access_flags, MyFlags)); +#if defined(_WIN32) + fd= my_win_open(FileName, access_flags | O_CREAT); +#else + fd= open((char *) FileName, access_flags | O_CREAT | O_CLOEXEC, + CreateFlags ? CreateFlags : my_umask); +#endif + + if ((MyFlags & MY_SYNC_DIR) && (fd >=0) && + my_sync_dir_by_file(FileName, MyFlags)) + { + my_close(fd, MyFlags); + fd= -1; + } + + fd= my_register_filename(fd, FileName, FILE_BY_CREATE, + EE_CANTCREATEFILE, MyFlags); + DBUG_RETURN(fd); +} /* my_create */ diff --git a/mysys/my_default.c b/mysys/my_default.c new file mode 100644 index 00000000..65a876a9 --- /dev/null +++ b/mysys/my_default.c @@ -0,0 +1,1089 @@ +/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2011, 2018, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/**************************************************************************** + Add all options from files named "group".cnf from the default_directories + before the command line arguments. + On Windows defaults will also search in the Windows directory for a file + called 'group'.ini + As long as the program uses the last argument for conflicting + options one only have to add a call to "load_defaults" to enable + use of default values. + pre- and end 'blank space' are removed from options and values. The + following escape sequences are recognized in values: \b \t \n \r \\ + + The following arguments are handled automatically; If used, they must be + first argument on the command line! + --no-defaults ; no options are read. + --defaults-file=full-path-to-default-file ; Only this file will be read. + --defaults-extra-file=full-path-to-default-file ; Read this file before ~/ + --defaults-group-suffix ; Also read groups with concat(group, suffix) + --print-defaults ; Print the modified command line and exit +****************************************************************************/ + +#include "mysys_priv.h" +#include +#include +#include +#include +#ifdef _WIN32 +#include +#endif + +/* + Mark file names in argv[]. File marker is *always* followed by a file name + All options after it come from that file. + Empty file name ("") means command line. +*/ +static char *file_marker= (char*)"----file-marker----"; +my_bool my_defaults_mark_files= FALSE; +my_bool is_file_marker(const char* arg) +{ + return arg == file_marker; +} + +my_bool my_no_defaults=FALSE, my_print_defaults= FALSE; +const char *my_defaults_file=0; +const char *my_defaults_group_suffix=0; +const char *my_defaults_extra_file=0; + +/* Which directories are searched for options (and in which order) */ + +#define MAX_DEFAULT_DIRS 7 +#define DEFAULT_DIRS_SIZE (MAX_DEFAULT_DIRS + 1) /* Terminate with NULL */ +static const char **default_directories = NULL; + +#ifdef _WIN32 +static const char *f_extensions[]= { ".ini", ".cnf", 0 }; +#define NEWLINE "\r\n" +#else +static const char *f_extensions[]= { ".cnf", 0 }; +#define NEWLINE "\n" +#endif + +struct handle_option_ctx +{ + MEM_ROOT *alloc; + DYNAMIC_ARRAY *args; + TYPELIB *group; +}; + +static int search_default_file(struct handle_option_ctx *, + const char *, const char *); +static int search_default_file_with_ext(struct handle_option_ctx *, + const char *, const char *, + const char *, int); + + +/** + Create the list of default directories. + + @param alloc MEM_ROOT where the list of directories is stored + + @details + The directories searched, in order, are: + - Windows: GetSystemWindowsDirectory() + - Windows: GetWindowsDirectory() + - Windows: C:/ + - Windows: Directory above where the executable is located + - Unix: /etc/ or the value of DEFAULT_SYSCONFDIR, if defined + - Unix: /etc/mysql/ unless DEFAULT_SYSCONFDIR is defined + - ALL: getenv("MYSQL_HOME") + - ALL: --defaults-extra-file= (run-time option) + - Unix: ~/ + + On all systems, if a directory is already in the list, it will be moved + to the end of the list. This avoids reading defaults files multiple times, + while ensuring the correct precedence. + + @retval NULL Failure (out of memory, probably) + @retval other Pointer to NULL-terminated array of default directories +*/ + +static const char **init_default_directories(MEM_ROOT *alloc); + + +static char *remove_end_comment(char *ptr); + + +/* + Process config files in default directories. + + SYNOPSIS + my_search_option_files() + conf_file Basename for configuration file to search for. + If this is a path, then only this file is read. + argc Pointer to argc of original program + argv Pointer to argv of original program + func Pointer to the function to process options + func_ctx It's context. Usually it is the structure to + store additional options. + DESCRIPTION + Process the default options from argc & argv + Read through each found config file looks and calls 'func' to process + each option. + + NOTES + --defaults-group-suffix is only processed if we are called from + load_defaults(). + + + RETURN + 0 ok + 1 given cinf_file doesn't exist + 2 out of memory + 3 Can't get current working directory + + The global variable 'my_defaults_group_suffix' is updated with value for + --defaults_group_suffix +*/ + +static int my_search_option_files(const char *conf_file, + struct handle_option_ctx *ctx, + const char **default_directories) +{ + const char **dirs; + int error= 0; + DBUG_ENTER("my_search_option_files"); + + if (my_defaults_group_suffix) + { + /* Handle --defaults-group-suffix= */ + uint i; + const char **extra_groups; + const size_t instance_len= strlen(my_defaults_group_suffix); + char *ptr; + TYPELIB *group= ctx->group; + + if (!(extra_groups= + (const char**)alloc_root(ctx->alloc, + (2*group->count+1)*sizeof(char*)))) + DBUG_RETURN(2); + + for (i= 0; i < group->count; i++) + { + size_t len; + extra_groups[i]= group->type_names[i]; /** copy group */ + + len= strlen(extra_groups[i]); + if (!(ptr= alloc_root(ctx->alloc, (uint) (len+instance_len+1)))) + DBUG_RETURN(2); + + extra_groups[i+group->count]= ptr; + + /** Construct new group */ + memcpy(ptr, extra_groups[i], len); + memcpy(ptr+len, my_defaults_group_suffix, instance_len+1); + } + + group->count*= 2; + group->type_names= extra_groups; + group->type_names[group->count]= 0; + } + + if (my_defaults_file) + { + if ((error= search_default_file_with_ext(ctx, "", "", + my_defaults_file, 0)) < 0) + goto err; + if (error > 0) + { + fprintf(stderr, "Could not open required defaults file: %s\n", + my_defaults_file); + goto err; + } + } + else if (dirname_length(conf_file)) + { + if ((error= search_default_file(ctx, NullS, conf_file)) < 0) + goto err; + } + else + { + for (dirs= default_directories ; *dirs; dirs++) + { + if (**dirs) + { + if (search_default_file(ctx, *dirs, conf_file) < 0) + goto err; + } + else if (my_defaults_extra_file) + { + if ((error= search_default_file_with_ext(ctx, "", "", + my_defaults_extra_file, 0)) < 0) + goto err; /* Fatal error */ + if (error > 0) + { + fprintf(stderr, "Could not open required defaults file: %s\n", + my_defaults_extra_file); + goto err; + } + } + } + } + + DBUG_RETURN(0); + +err: + fprintf(stderr,"Fatal error in defaults handling. Program aborted\n"); + DBUG_RETURN(1); +} + + +/* + adds an option to the array of options + + SYNOPSIS + add_option() + in_ctx Handler context. + option The very option to be processed. It is already + prepared to be used in argv (has -- prefix). + + RETURN + 0 - ok + 1 - error occurred +*/ + +static int add_option(struct handle_option_ctx *ctx, const char *option) +{ + char *tmp= strdup_root(ctx->alloc, option); + return !tmp || insert_dynamic(ctx->args, (uchar*) &tmp); +} + + +/* + Gets options from the command line + + SYNOPSIS + get_defaults_options() + argv Pointer to argv of original program + + DESCRIPTION + Sets my_no_defaults, my_defaults_file, my_defaults_extra_file, + my_defaults_group_suffix, my_print_defaults + + RETURN + # Number of arguments used from *argv +*/ + +int get_defaults_options(char **argv) +{ + static char file_buffer[FN_REFLEN]; + static char extra_file_buffer[FN_REFLEN]; + char **orig_argv= argv; + + argv++; /* Skip program name */ + + my_defaults_file= my_defaults_group_suffix= my_defaults_extra_file= 0; + my_no_defaults= my_print_defaults= FALSE; + + if (*argv && !strcmp(*argv, "--no-defaults")) + { + my_no_defaults= 1; + argv++; + } + else + for(; *argv; argv++) + { + if (!my_defaults_file && is_prefix(*argv, "--defaults-file=")) + my_defaults_file= *argv + sizeof("--defaults-file=")-1; + else + if (!my_defaults_extra_file && is_prefix(*argv, "--defaults-extra-file=")) + my_defaults_extra_file= *argv + sizeof("--defaults-extra-file=")-1; + else + if (!my_defaults_group_suffix && is_prefix(*argv, "--defaults-group-suffix=")) + my_defaults_group_suffix= *argv + sizeof("--defaults-group-suffix=")-1; + else + break; + } + + if (*argv && !strcmp(*argv, "--print-defaults")) + { + my_print_defaults= 1; + my_defaults_mark_files= FALSE; + argv++; + } + + if (! my_defaults_group_suffix) + my_defaults_group_suffix= getenv("MYSQL_GROUP_SUFFIX"); + + if (my_defaults_extra_file && my_defaults_extra_file != extra_file_buffer) + { + my_realpath(extra_file_buffer, my_defaults_extra_file, MYF(0)); + my_defaults_extra_file= extra_file_buffer; + } + + if (my_defaults_file && my_defaults_file != file_buffer) + { + my_realpath(file_buffer, my_defaults_file, MYF(0)); + my_defaults_file= file_buffer; + } + + return (int)(argv - orig_argv); +} + +/* + Wrapper around my_load_defaults() for interface compatibility. + + SYNOPSIS + load_defaults() + conf_file Basename for configuration file to search for. + If this is a path, then only this file is read. + groups Which [group] entrys to read. + Points to an null terminated array of pointers + argc Pointer to argc of original program + argv Pointer to argv of original program + + NOTES + + This function is NOT thread-safe as it uses a global pointer internally. + See also notes for my_load_defaults(). + + RETURN + 0 ok + 1 The given conf_file didn't exists +*/ +int load_defaults(const char *conf_file, const char **groups, + int *argc, char ***argv) +{ + return my_load_defaults(conf_file, groups, argc, argv, &default_directories); +} + +/* + Read options from configurations files + + SYNOPSIS + my_load_defaults() + conf_file Basename for configuration file to search for. + If this is a path, then only this file is read. + groups Which [group] entrys to read. + Points to an null terminated array of pointers + argc Pointer to argc of original program + argv Pointer to argv of original program + default_directories Pointer to a location where a pointer to the list + of default directories will be stored + + IMPLEMENTATION + + Read options from configuration files and put them BEFORE the arguments + that are already in argc and argv. This way the calling program can + easily command line options override options in configuration files + + NOTES + In case of fatal error, the function will print a warning and returns 2 + + To free used memory one should call free_defaults() with the argument + that was put in *argv + + RETURN + - If successful, 0 is returned. If 'default_directories' is not NULL, + a pointer to the array of default directory paths is stored to a location + it points to. That stored value must be passed to my_search_option_files() + later. + + - 1 is returned if the given conf_file didn't exist. In this case, the + value pointed to by default_directories is undefined. +*/ + + +int my_load_defaults(const char *conf_file, const char **groups, int *argc, + char ***argv, const char ***default_directories) +{ + DYNAMIC_ARRAY args; + int args_used= 0; + int error= 0; + MEM_ROOT alloc; + char *ptr,**res; + const char **dirs; + DBUG_ENTER("my_load_defaults"); + + init_alloc_root(key_memory_defaults, &alloc, 512, 0, MYF(0)); + if ((dirs= init_default_directories(&alloc)) == NULL) + goto err; + + args_used= get_defaults_options(*argv); + + if (my_init_dynamic_array(key_memory_defaults, &args, sizeof(char*), 128, 64, + MYF(0))) + goto err; + + insert_dynamic(&args, *argv);/* Name MUST be set, even by embedded library */ + + *argc-= args_used; + *argv+= args_used; + + if (!my_no_defaults) + { + TYPELIB group; // XXX + struct handle_option_ctx ctx; + + group.count=0; + group.name= "defaults"; + group.type_names= groups; + + for (; *groups ; groups++) + group.count++; + + ctx.alloc= &alloc; + ctx.args= &args; + ctx.group= &group; + + if ((error= my_search_option_files(conf_file, &ctx, dirs))) + { + delete_dynamic(&args); + free_root(&alloc,MYF(0)); + DBUG_RETURN(error); + } + } + + if (!(ptr=(char*) alloc_root(&alloc, sizeof(alloc) + + (args.elements + *argc + 3) * sizeof(char*)))) + goto err; + res= (char**) (ptr+sizeof(alloc)); + + /* found arguments + command line arguments to new array */ + memcpy(res, args.buffer, args.elements * sizeof(char*)); + + if (my_defaults_mark_files) + { + res[args.elements++]= file_marker; + res[args.elements++]= (char*)""; + } + + if (*argc) + memcpy(res + args.elements, *argv, *argc * sizeof(char*)); + + (*argc)+= (int)args.elements; + *argv= res; + (*argv)[*argc]= 0; + *(MEM_ROOT*) ptr= alloc; /* Save alloc root for free */ + delete_dynamic(&args); + if (my_print_defaults) + { + int i; + printf("%s would have been started with the following arguments:\n", + **argv); + for (i=1 ; i < *argc ; i++) + printf("%s ", (*argv)[i]); + puts(""); + DBUG_RETURN(4); + } + + if (default_directories) + *default_directories= dirs; + + DBUG_RETURN(0); + + err: + fprintf(stderr,"Fatal error in defaults handling. Program aborted\n"); + DBUG_RETURN(2); +} + + +void free_defaults(char **argv) +{ + MEM_ROOT ptr; + memcpy(&ptr, ((char *) argv) - sizeof(ptr), sizeof(ptr)); + free_root(&ptr,MYF(0)); +} + + +static int search_default_file(struct handle_option_ctx *ctx, const char *dir, + const char *config_file) +{ + char **ext; + const char *empty_list[]= { "", 0 }; + my_bool have_ext= fn_ext(config_file)[0] != 0; + const char **exts_to_use= have_ext ? empty_list : f_extensions; + + for (ext= (char**) exts_to_use; *ext; ext++) + { + int error; + if ((error= search_default_file_with_ext(ctx, dir, *ext, config_file, 0)) < 0) + return error; + } + return 0; +} + + +/* + Skip over keyword and get argument after keyword + + SYNOPSIS + get_argument() + keyword Include directive keyword + kwlen Length of keyword + ptr Pointer to the keword in the line under process + line line number + + RETURN + 0 error + # Returns pointer to the argument after the keyword. +*/ + +static char *get_argument(const char *keyword, size_t kwlen, + char *ptr, char *name, uint line) +{ + char *end; + + /* Skip over "include / includedir keyword" and following whitespace */ + + for (ptr+= kwlen - 1; + my_isspace(&my_charset_latin1, ptr[0]); + ptr++) + {} + + /* + Trim trailing whitespace from directory name + The -1 below is for the newline added by fgets() + Note that my_isspace() is true for \r and \n + */ + for (end= ptr + strlen(ptr) - 1; + my_isspace(&my_charset_latin1, *(end - 1)); + end--) + {} + end[0]= 0; /* Cut off end space */ + + /* Print error msg if there is nothing after !include* directive */ + if (end <= ptr) + { + fprintf(stderr, + "error: Wrong '!%s' directive in config file: %s at line %d\n", + keyword, name, line); + return 0; + } + return ptr; +} + + +/* + Open a configuration file (if exists) and read given options from it + + SYNOPSIS + search_default_file_with_ext() + ctx Pointer to the structure to store actual + parameters of the function. + dir directory to read + ext Extension for configuration file + config_file Name of configuration file + group groups to read + recursion_level the level of recursion, got while processing + "!include" or "!includedir" + + RETURN + 0 Success + -1 Fatal error, abort + 1 File not found (Warning) +*/ + +static int search_default_file_with_ext(struct handle_option_ctx *ctx, + const char *dir, const char *ext, + const char *config_file, + int recursion_level) +{ + char name[FN_REFLEN + 10], buff[4096], curr_gr[4096], *ptr, *end, **tmp_ext; + char *value, option[4096+2], tmp[FN_REFLEN]; + static const char includedir_keyword[]= "includedir"; + static const char include_keyword[]= "include"; + const int max_recursion_level= 10; + MYSQL_FILE *fp; + uint line=0; + enum { NONE, PARSE, SKIP } found_group= NONE; + size_t i; + MY_DIR *search_dir; + FILEINFO *search_file; + + if (safe_strlen(dir) + strlen(config_file) >= FN_REFLEN-3) + return 0; /* Ignore wrong paths */ + if (dir) + { + end=convert_dirname(name, dir, NullS); + if (dir[0] == FN_HOMELIB) /* Add . to filenames in home */ + *end++='.'; + strxmov(end,config_file,ext,NullS); + } + else + { + strmov(name,config_file); + } + fn_format(name,name,"","",4); +#if !defined(_WIN32) + { + MY_STAT stat_info; + if (!my_stat(name,&stat_info,MYF(0))) + return 1; + /* + Ignore world-writable regular files (exceptions apply). + This is mainly done to protect us to not read a file that may be + modified by anyone. + + Also check access so that read only mounted (EROFS) + or immutable files (EPERM) that are suitable protections. + + The main case we are allowing is a container readonly volume mount + from a filesystem that doesn't have unix permissions. This will + have a 0777 permission and access will set errno = EROFS. + + Note if a ROFS has a file with permissions 04n6, access sets errno + EACCESS, rather the ROFS, so in this case we'll error, even though + the ROFS is protecting the file. + + An ideal, race free, implementation would do fstat / fstatvfs / ioctl + for permission, read only filesystem, and immutability resprectively. + */ + if ((stat_info.st_mode & S_IWOTH) && + (stat_info.st_mode & S_IFMT) == S_IFREG && + (access(name, W_OK) == 0 || (errno != EROFS && errno != EPERM))) + { + fprintf(stderr, "Warning: World-writable config file '%s' is ignored\n", + name); + return 0; + } + } +#endif + if (!(fp= mysql_file_fopen(key_file_cnf, name, O_RDONLY, MYF(0)))) + return 1; /* Ignore wrong files */ + + if (my_defaults_mark_files) + if (insert_dynamic(ctx->args, (uchar*) &file_marker) || + add_option(ctx, name)) + goto err; + + while (mysql_file_fgets(buff, sizeof(buff) - 1, fp)) + { + line++; + /* Ignore comment and empty lines */ + for (ptr= buff; my_isspace(&my_charset_latin1, *ptr); ptr++) + {} + + if (*ptr == '#' || *ptr == ';' || !*ptr) + continue; + + /* Configuration File Directives */ + if (*ptr == '!') + { + if (recursion_level >= max_recursion_level) + { + for (end= ptr + strlen(ptr) - 1; + my_isspace(&my_charset_latin1, *(end - 1)); + end--) + {} + end[0]= 0; + fprintf(stderr, + "Warning: skipping '%s' directive as maximum include" + "recursion level was reached in file %s at line %d\n", + ptr, name, line); + continue; + } + + /* skip over `!' and following whitespace */ + for (++ptr; my_isspace(&my_charset_latin1, ptr[0]); ptr++) + {} + + if ((!strncmp(ptr, includedir_keyword, + sizeof(includedir_keyword) - 1)) && + my_isspace(&my_charset_latin1, ptr[sizeof(includedir_keyword) - 1])) + { + if (!(ptr= get_argument(includedir_keyword, + sizeof(includedir_keyword), + ptr, name, line))) + goto err; + + if (!(search_dir= my_dir(ptr, MYF(MY_WME | MY_WANT_SORT)))) + goto err; + + for (i= 0; i < search_dir->number_of_files; i++) + { + search_file= search_dir->dir_entry + i; + ext= fn_ext2(search_file->name); + + /* check extension */ + for (tmp_ext= (char**) f_extensions; *tmp_ext; tmp_ext++) + { + if (!strcmp(ext, *tmp_ext)) + break; + } + + if (*tmp_ext) + { + fn_format(tmp, search_file->name, ptr, "", + MY_UNPACK_FILENAME | MY_SAFE_PATH); + + search_default_file_with_ext(ctx, "", "", tmp, recursion_level + 1); + } + } + + my_dirend(search_dir); + } + else if ((!strncmp(ptr, include_keyword, sizeof(include_keyword) - 1)) && + my_isspace(&my_charset_latin1, ptr[sizeof(include_keyword)-1])) + { + if (!(ptr= get_argument(include_keyword, + sizeof(include_keyword), ptr, + name, line))) + goto err; + + search_default_file_with_ext(ctx, "", "", ptr, recursion_level + 1); + } + + continue; + } + + if (*ptr == '[') /* Group name */ + { + if (!(end=(char *) strchr(++ptr,']'))) + { + fprintf(stderr, + "error: Wrong group definition in config file: %s at line %d\n", + name,line); + goto err; + } + /* Remove end space */ + for ( ; my_isspace(&my_charset_latin1,end[-1]) ; end--) ; + end[0]=0; + + strmake(curr_gr, ptr, MY_MIN((size_t) (end-ptr), sizeof(curr_gr)-1)); + found_group= find_type(curr_gr, ctx->group, FIND_TYPE_NO_PREFIX) + ? PARSE : SKIP; + continue; + } + switch (found_group) + { + case NONE: + fprintf(stderr, + "error: Found option without preceding group in config file: %s at line: %d\n", + name,line); + goto err; + case PARSE: + break; + case SKIP: + continue; + } + + end= remove_end_comment(ptr); + if ((value= strchr(ptr, '='))) + end= value; + for ( ; my_isspace(&my_charset_latin1,end[-1]) ; end--) ; + ptr= strmake(strmov(option,"--"), ptr, (size_t) (end-ptr)); + if (value) + { + /* Remove pre- and end space */ + char *value_end; + for (value++ ; my_isspace(&my_charset_latin1,*value); value++) ; + value_end=strend(value); + /* + We don't have to test for value_end >= value as we know there is + an '=' before + */ + for ( ; my_isspace(&my_charset_latin1,value_end[-1]) ; value_end--) ; + if (value_end < value) /* Empty string */ + value_end=value; + + /* remove quotes around argument */ + if ((*value == '\"' || *value == '\'') && /* First char is quote */ + (value + 1 < value_end ) && /* String is longer than 1 */ + *value == value_end[-1] ) /* First char is equal to last char */ + { + value++; + value_end--; + } + *ptr++= '='; + for ( ; value != value_end; value++) + { + if (*value == '\\' && value != value_end-1) + { + switch(*++value) { + case 'n': + *ptr++='\n'; + break; + case 't': + *ptr++= '\t'; + break; + case 'r': + *ptr++ = '\r'; + break; + case 'b': + *ptr++ = '\b'; + break; + case 's': + *ptr++= ' '; /* space */ + break; + case '\"': + *ptr++= '\"'; + break; + case '\'': + *ptr++= '\''; + break; + case '\\': + *ptr++= '\\'; + break; + default: /* Unknown; Keep '\' */ + *ptr++= '\\'; + *ptr++= *value; + break; + } + } + else + *ptr++= *value; + } + *ptr=0; + } + + if (add_option(ctx, option)) + goto err; + } + mysql_file_fclose(fp, MYF(0)); + return(0); + + err: + mysql_file_fclose(fp, MYF(0)); + return -1; /* Fatal error */ +} + + +static char *remove_end_comment(char *ptr) +{ + char quote= 0; /* we are inside quote marks */ + char escape= 0; /* symbol is protected by escape chagacter */ + + for (; *ptr; ptr++) + { + if ((*ptr == '\'' || *ptr == '\"') && !escape) + { + if (!quote) + quote= *ptr; + else if (quote == *ptr) + quote= 0; + } + /* We are not inside a string */ + if (!quote && *ptr == '#') + { + *ptr= 0; + return ptr; + } + escape= (quote && *ptr == '\\' && !escape); + } + return ptr; +} + + +void my_print_default_files(const char *conf_file) +{ + const char *empty_list[]= { "", 0 }; + my_bool have_ext= fn_ext(conf_file)[0] != 0; + const char **exts_to_use= have_ext ? empty_list : f_extensions; + char name[FN_REFLEN], **ext; + + puts("\nDefault options are read from the following files in the given order:"); + if (my_defaults_file) + { + puts(my_defaults_file); + return; + } + + if (dirname_length(conf_file)) + fputs(conf_file,stdout); + else + { + const char **dirs; + MEM_ROOT alloc; + init_alloc_root(key_memory_defaults, &alloc, 512, 0, MYF(0)); + + if ((dirs= init_default_directories(&alloc)) == NULL) + { + fputs("Internal error initializing default directories list", stdout); + } + else + { + for ( ; *dirs; dirs++) + { + for (ext= (char**) exts_to_use; *ext; ext++) + { + const char *pos; + char *end; + if (**dirs) + pos= *dirs; + else if (my_defaults_extra_file) + { + pos= my_defaults_extra_file; + fputs(pos, stdout); + fputs(" ", stdout); + continue; + } + else + continue; + end= convert_dirname(name, pos, NullS); + if (name[0] == FN_HOMELIB) /* Add . to filenames in home */ + *end++= '.'; + strxmov(end, conf_file, *ext, " ", NullS); + fputs(name, stdout); + } + } + } + + free_root(&alloc, MYF(0)); + } + puts(""); +} + +void print_defaults(const char *conf_file, const char **groups) +{ + const char **groups_save= groups; + my_print_default_files(conf_file); + + fputs("The following groups are read:",stdout); + for ( ; *groups ; groups++) + { + fputc(' ',stdout); + fputs(*groups,stdout); + } + + if (my_defaults_group_suffix) + { + groups= groups_save; + for ( ; *groups ; groups++) + { + fputc(' ',stdout); + fputs(*groups,stdout); + fputs(my_defaults_group_suffix,stdout); + } + } + puts("\nThe following options may be given as the first argument:\n\ +--print-defaults Print the program argument list and exit.\n\ +--no-defaults Don't read default options from any option file.\n\ +The following specify which files/extra groups are read (specified before remaining options):\n\ +--defaults-file=# Only read default options from the given file #.\n\ +--defaults-extra-file=# Read this file after the global files are read.\n\ +--defaults-group-suffix=# Additionally read default groups with # appended as a suffix."); +} + + +static int add_directory(MEM_ROOT *alloc, const char *dir, const char **dirs) +{ + char buf[FN_REFLEN]; + size_t len; + char *p; + my_bool err __attribute__((unused)); + + len= normalize_dirname(buf, dir); + if (!(p= strmake_root(alloc, buf, len))) + return 1; /* Failure */ + /* Should never fail if DEFAULT_DIRS_SIZE is correct size */ + err= array_append_string_unique(p, dirs, DEFAULT_DIRS_SIZE); + DBUG_ASSERT(err == FALSE); + + return 0; +} + +#ifdef _WIN32 +static const char *my_get_module_parent(char *buf, size_t size) +{ + char *last= NULL; + char *end; + if (!GetModuleFileName(NULL, buf, (DWORD) size)) + return NULL; + end= strend(buf); + + /* + Look for the second-to-last \ in the filename, but hang on + to a pointer after the last \ in case we're in the root of + a drive. + */ + for ( ; end > buf; end--) + { + if (*end == FN_LIBCHAR) + { + if (last) + { + /* Keep the last '\' as this works both with D:\ and a directory */ + end[1]= 0; + break; + } + last= end; + } + } + + return buf; +} +#endif /* _WIN32 */ + + +static const char **init_default_directories(MEM_ROOT *alloc) +{ + const char **dirs; + char *env; + int errors= 0; + DBUG_ENTER("init_default_directories"); + + dirs= (const char **)alloc_root(alloc, DEFAULT_DIRS_SIZE * sizeof(char *)); + if (dirs == NULL) + DBUG_RETURN(NULL); + bzero((char *) dirs, DEFAULT_DIRS_SIZE * sizeof(char *)); + +#ifdef _WIN32 + + { + char fname_buffer[FN_REFLEN]; + if (GetSystemWindowsDirectory(fname_buffer, sizeof(fname_buffer))) + errors += add_directory(alloc, fname_buffer, dirs); + + if (GetWindowsDirectory(fname_buffer, sizeof(fname_buffer))) + errors += add_directory(alloc, fname_buffer, dirs); + + errors += add_directory(alloc, "C:/", dirs); + + if (my_get_module_parent(fname_buffer, sizeof(fname_buffer)) != NULL) + { + errors += add_directory(alloc, fname_buffer, dirs); + + strcat_s(fname_buffer, sizeof(fname_buffer), "/data"); + errors += add_directory(alloc, fname_buffer, dirs); + } + } + +#else + +#if defined(DEFAULT_SYSCONFDIR) + if (DEFAULT_SYSCONFDIR[0]) + errors += add_directory(alloc, DEFAULT_SYSCONFDIR, dirs); +#else + errors += add_directory(alloc, "/etc/", dirs); + errors += add_directory(alloc, "/etc/mysql/", dirs); +#endif /* DEFAULT_SYSCONFDIR */ + +#endif + + /* + If value of $MARIADB_HOME environment variable name is NULL, check + for $MYSQL_HOME + */ + if ((env= getenv("MARIADB_HOME"))) + errors += add_directory(alloc, env, dirs); + else + { + if ((env= getenv("MYSQL_HOME"))) + errors += add_directory(alloc, env, dirs); + } + + /* Placeholder for --defaults-extra-file= */ + errors += add_directory(alloc, "", dirs); + +#if !defined(_WIN32) + errors += add_directory(alloc, "~/", dirs); +#endif + + DBUG_RETURN(errors > 0 ? NULL : dirs); +} diff --git a/mysys/my_delete.c b/mysys/my_delete.c new file mode 100644 index 00000000..6854033f --- /dev/null +++ b/mysys/my_delete.c @@ -0,0 +1,261 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include + +#ifdef _WIN32 +#include /* rmdir */ +static int my_win_unlink(const char *name); +#endif + +CREATE_NOSYMLINK_FUNCTION( + unlink_nosymlinks(const char *pathname), + unlinkat(dfd, filename, 0), + unlink(pathname) +); + +int my_delete(const char *name, myf MyFlags) +{ + int err; + DBUG_ENTER("my_delete"); + DBUG_PRINT("my",("name %s MyFlags %lu", name, MyFlags)); + +#ifdef _WIN32 + err = my_win_unlink(name); +#else + if (MyFlags & MY_NOSYMLINKS) + err= unlink_nosymlinks(name); + else + err= unlink(name); +#endif + + if ((MyFlags & MY_IGNORE_ENOENT) && errno == ENOENT) + DBUG_RETURN(0); + + if (err) + { + my_errno= errno; + if (MyFlags & (MY_FAE+MY_WME)) + my_error(EE_DELETE, MYF(ME_BELL), name, errno); + } + else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(name, MyFlags)) + err= -1; + DBUG_RETURN(err); +} /* my_delete */ + + +#if defined (_WIN32) + +/* + Delete file. + + The function also makes best effort to minimize number of errors, + where another program (or thread in the current program) has the the same file + open. + + We're using several tricks to prevent the errors, such as + + - Windows 10 "posix semantics" delete + + - Avoid the error by using CreateFile() with FILE_FLAG_DELETE_ON_CLOSE, instead + of DeleteFile() + + - If file which is deleted (delete on close) but has not entirely gone, + because it is still opened by some app, an attempt to trcreate file with the + same name would result in yet another error. The workaround here is renaming + a file to unique name. + + Symbolic link are deleted without renaming. Directories are not deleted. +*/ +#include + +static int my_win_unlink(const char *name) +{ + HANDLE handle= INVALID_HANDLE_VALUE; + DWORD attributes; + uint last_error; + char unique_filename[MAX_PATH + 35]; + unsigned long long tsc; /* time stamp counter, for unique filename*/ + int retries; + DBUG_ENTER("my_win_unlink"); + + DBUG_INJECT_FILE_SHARING_VIOLATION(name); + + for (retries= FILE_SHARING_VIOLATION_RETRIES; ; retries--) + { + attributes= GetFileAttributes(name); + if (attributes == INVALID_FILE_ATTRIBUTES) + { + last_error= GetLastError(); + DBUG_PRINT("error", + ("GetFileAttributes(%s) failed with %u\n", name, last_error)); + goto error; + } + + if (attributes & FILE_ATTRIBUTE_DIRECTORY) + { + DBUG_PRINT("error", ("can't remove %s - it is a directory\n", name)); + errno= EINVAL; + DBUG_RETURN(-1); + } + + if (attributes & FILE_ATTRIBUTE_REPARSE_POINT) + { + /* Symbolic link. Delete link, the not target */ + if (!DeleteFile(name)) + { + last_error= GetLastError(); + DBUG_PRINT("error", + ("DeleteFile(%s) failed with %u\n", name, last_error)); + goto error; + } + DBUG_RETURN(0); + } + + /* + Try Windows 10 method, delete with "posix semantics" (file is not + visible, and creating a file with the same name won't fail, even if it + the file was open) + */ + handle= CreateFile(name, DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, 0, NULL); + if (handle != INVALID_HANDLE_VALUE) + { + /* 0x3 = FILE_DISPOSITION_FLAG_DELETE | FILE_DISPOSITION_FLAG_POSIX_SEMANTICS */ + struct {DWORD _Flags;} disp= {0x3}; + BOOL ok= SetFileInformationByHandle( + handle, (FILE_INFO_BY_HANDLE_CLASS) 21, &disp, sizeof(disp)); + CloseHandle(handle); + if (ok) + DBUG_RETURN(0); + } + + handle= CreateFile(name, DELETE, 0, NULL, OPEN_EXISTING, + FILE_FLAG_DELETE_ON_CLOSE, NULL); + if (handle != INVALID_HANDLE_VALUE) + { + /* + We opened file without sharing flags (exclusive), no one else has this + file opened, thus it is safe to close handle to remove it. No renaming + is necessary. + */ + CloseHandle(handle); + DBUG_RETURN(0); + } + + /* + Can't open file exclusively, hence the file must be already opened by + someone else. Open it for delete (with all FILE_SHARE flags set), + rename to unique name, close. + */ + handle= CreateFile(name, DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, FILE_FLAG_DELETE_ON_CLOSE, NULL); + if (handle == INVALID_HANDLE_VALUE) + { + last_error= GetLastError(); + DBUG_PRINT( + "error", + ("CreateFile(%s) with FILE_FLAG_DELETE_ON_CLOSE failed with %u\n", + name, last_error)); + goto error; + } + + tsc= my_timer_cycles(); + my_snprintf(unique_filename, sizeof(unique_filename), "%s.%llx.deleted", + name, tsc); + if (!MoveFile(name, unique_filename)) + { + DBUG_PRINT("warning", + ("moving %s to unique filename failed, error %lu\n", name, + GetLastError())); + } + CloseHandle(handle); + DBUG_RETURN(0); + +error: + if (last_error != ERROR_SHARING_VIOLATION || retries == 0) + { + my_osmaperr(last_error); + DBUG_RETURN(-1); + } + DBUG_CLEAR_FILE_SHARING_VIOLATION(); + Sleep(FILE_SHARING_VIOLATION_DELAY_MS); + } +} +#endif + +/* + Remove directory recursively. +*/ +int my_rmtree(const char *dir, myf MyFlags) +{ + char path[FN_REFLEN]; + char sep[] = { FN_LIBCHAR, 0 }; + int err = 0; + size_t i; + + MY_DIR *dir_info = my_dir(dir, MYF(MY_DONT_SORT | MY_WANT_STAT)); + if (!dir_info) + return 1; + + for (i = 0; i < dir_info->number_of_files; i++) + { + FILEINFO *file = dir_info->dir_entry + i; + /* Skip "." and ".." */ + if (!strcmp(file->name, ".") || !strcmp(file->name, "..")) + continue; + + strxnmov(path, sizeof(path), dir, sep, file->name, NULL); + + if (!MY_S_ISDIR(file->mystat->st_mode)) + { + err = my_delete(path, MyFlags); +#ifdef _WIN32 + /* + On Windows, check and possible reset readonly attribute. + my_delete(), or DeleteFile does not remove theses files. + */ + if (err) + { + DWORD attr = GetFileAttributes(path); + if (attr != INVALID_FILE_ATTRIBUTES && + (attr & FILE_ATTRIBUTE_READONLY)) + { + SetFileAttributes(path, attr &~FILE_ATTRIBUTE_READONLY); + err = my_delete(path, MyFlags); + } + } +#endif + } + else + err = my_rmtree(path, MyFlags); + + if (err) + break; + } + + my_dirend(dir_info); + + if (!err) + err = rmdir(dir); + + return err; +} + + diff --git a/mysys/my_div.c b/mysys/my_div.c new file mode 100644 index 00000000..3395d142 --- /dev/null +++ b/mysys/my_div.c @@ -0,0 +1,38 @@ +/* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" + +/* + Get filename of file + + SYNOPSIS + my_filename() + fd File descriptor +*/ + +char * my_filename(File fd) +{ + DBUG_ENTER("my_filename"); + if ((uint) fd >= (uint) my_file_limit || !my_file_info[fd].name) + DBUG_RETURN((char*) "UNKNOWN"); + if (fd >= 0 && my_file_info[fd].type != UNOPEN) + { + DBUG_RETURN(my_file_info[fd].name); + } + else + DBUG_RETURN((char*) "UNOPENED"); /* Debug message */ +} diff --git a/mysys/my_dlerror.c b/mysys/my_dlerror.c new file mode 100644 index 00000000..ab34da0e --- /dev/null +++ b/mysys/my_dlerror.c @@ -0,0 +1,31 @@ +/* + Copyright (c) 2017, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include + +const char *my_dlerror(const char *dlpath) +{ + const char *errmsg=dlerror(); + size_t dlpathlen= strlen(dlpath); + if (!strncmp(dlpath, errmsg, dlpathlen)) + { /* if errmsg starts from dlpath, trim this prefix */ + errmsg+=dlpathlen; + if (*errmsg == ':') errmsg++; + if (*errmsg == ' ') errmsg++; + } + return errmsg; +} diff --git a/mysys/my_error.c b/mysys/my_error.c new file mode 100644 index 00000000..106e51de --- /dev/null +++ b/mysys/my_error.c @@ -0,0 +1,329 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include +#include + +/* Max length of a error message. Should be kept in sync with MYSQL_ERRMSG_SIZE. */ +#define ERRMSGSIZE (512) + +/* Define some external variables for error handling */ + +/* + WARNING! + my_error family functions have to be used according following rules: + - if message have not parameters use my_message(ER_CODE, ER(ER_CODE), MYF(N)) + - if message registered use my_error(ER_CODE, MYF(N), ...). + - With some special text of errror message use: + my_printf_error(ER_CODE, format, MYF(N), ...) +*/ + +/* + Message texts are registered into a linked list of 'my_err_head' structs. + Each struct contains (1.) an array of pointers to C character strings with + '\0' termination, (2.) the error number for the first message in the array + (array index 0) and (3.) the error number for the last message in the array + (array index (last - first)). + The array may contain gaps with NULL pointers and pointers to empty strings. + Both kinds of gaps will be translated to "Unknown error %d.", if my_error() + is called with a respective error number. + The list of header structs is sorted in increasing order of error numbers. + Negative error numbers are allowed. Overlap of error numbers is not allowed. + Not registered error numbers will be translated to "Unknown error %d.". +*/ +static struct my_err_head +{ + struct my_err_head *meh_next; /* chain link */ + const char** (*get_errmsgs)(int nr); /* returns error message format */ + uint meh_first; /* error number matching array slot 0 */ + uint meh_last; /* error number matching last slot */ +} my_errmsgs_globerrs= +{NULL, get_global_errmsgs, EE_ERROR_FIRST, EE_ERROR_LAST}; + +static struct my_err_head *my_errmsgs_list= &my_errmsgs_globerrs; + + +/** + @brief Get an error format string from one of the my_error_register()ed sets + + @note + NULL values are possible even within a registered range. + + @param nr Errno + + @retval NULL if no message is registered for this error number + @retval str C-string +*/ + +const char *my_get_err_msg(uint nr) +{ + const char *format; + struct my_err_head *meh_p; + + /* Search for the range this error is in. */ + for (meh_p= my_errmsgs_list; meh_p; meh_p= meh_p->meh_next) + if (nr <= meh_p->meh_last) + break; + + /* + If we found the range this error number is in, get the format string. + If the string is empty, or a NULL pointer, or if we're out of return, + we return NULL. + */ + if (!(format= (meh_p && (nr >= meh_p->meh_first)) ? + meh_p->get_errmsgs(nr)[nr - meh_p->meh_first] : NULL) || + !*format) + return NULL; + + return format; +} + + +/** + Fill in and print a previously registered error message. + + @note + Goes through the (sole) function registered in error_handler_hook + + @param nr error number + @param MyFlags Flags + @param ... variable list matching that error format string +*/ + +void my_error(uint nr, myf MyFlags, ...) +{ + const char *format; + va_list args; + char ebuff[ERRMSGSIZE]; + DBUG_ENTER("my_error"); + DBUG_PRINT("my", ("nr: %d MyFlags: %lu errno: %d", nr, MyFlags, errno)); + if (!(format = my_get_err_msg(nr))) + (void) my_snprintf(ebuff, sizeof(ebuff), "Unknown error %d", nr); + else + { + va_start(args,MyFlags); + (void) my_vsnprintf_ex(&my_charset_utf8mb3_general_ci, ebuff, + sizeof(ebuff), format, args); + va_end(args); + } + (*error_handler_hook)(nr, ebuff, MyFlags); + DBUG_VOID_RETURN; +} + + +/** + Print an error message. + + @note + Just like my_error, but for cases when the error message is not ER(error) + + @param error error number + @param format format string + @param MyFlags Flags + @param ... variable list matching that error format string +*/ + +void my_printf_error(uint error, const char *format, myf MyFlags, ...) +{ + va_list args; + char ebuff[ERRMSGSIZE]; + DBUG_ENTER("my_printf_error"); + DBUG_PRINT("my", ("nr: %d MyFlags: %lu errno: %d format: %s", + error, MyFlags, errno, format)); + + va_start(args,MyFlags); + (void) my_vsnprintf_ex(&my_charset_utf8mb3_general_ci, ebuff, + sizeof(ebuff), format, args); + va_end(args); + (*error_handler_hook)(error, ebuff, MyFlags); + DBUG_VOID_RETURN; +} + +/** + Print an error message. + + @note + Goes through the (sole) function registered in error_handler_hook + + @param error error number + @param format format string + @param MyFlags Flags + @param ap variable list matching that error format string +*/ + +void my_printv_error(uint error, const char *format, myf MyFlags, va_list ap) +{ + char ebuff[ERRMSGSIZE]; + DBUG_ENTER("my_printv_error"); + DBUG_PRINT("my", ("nr: %d MyFlags: %lu errno: %d format: %s", + error, MyFlags, errno, format)); + + (void) my_vsnprintf(ebuff, sizeof(ebuff), format, ap); + (*error_handler_hook)(error, ebuff, MyFlags); + DBUG_VOID_RETURN; +} + + +/** + Print an error message. + + @note + Goes through the (sole) function registered in error_handler_hook + + @param error error number + @param str error message + @param MyFlags Flags +*/ + +void my_message(uint error, const char *str, register myf MyFlags) +{ + (*error_handler_hook)(error, str, MyFlags); +} + + +/** + Register error messages for use with my_error(). + + @description + + The pointer array is expected to contain addresses to NUL-terminated + C character strings. The array contains (last - first + 1) pointers. + NULL pointers and empty strings ("") are allowed. These will be mapped to + "Unknown error" when my_error() is called with a matching error number. + This function registers the error numbers 'first' to 'last'. + No overlapping with previously registered error numbers is allowed. + + @param errmsgs array of pointers to error messages + @param first error number of first message in the array + @param last error number of last message in the array + + @retval 0 OK + @retval != 0 Error +*/ + +int my_error_register(const char** (*get_errmsgs)(int error), uint first, + uint last) +{ + struct my_err_head *meh_p; + struct my_err_head **search_meh_pp; + + /* Allocate a new header structure. */ + if (! (meh_p= (struct my_err_head*) my_malloc(key_memory_my_err_head, + sizeof(struct my_err_head), + MYF(MY_WME)))) + return 1; + meh_p->get_errmsgs= get_errmsgs; + meh_p->meh_first= first; + meh_p->meh_last= last; + + /* Search for the right position in the list. */ + for (search_meh_pp= &my_errmsgs_list; + *search_meh_pp; + search_meh_pp= &(*search_meh_pp)->meh_next) + { + if ((*search_meh_pp)->meh_last > first) + break; + } + + /* Error numbers must be unique. No overlapping is allowed. */ + if (*search_meh_pp && ((*search_meh_pp)->meh_first <= last)) + { + my_free(meh_p); + return 1; + } + + /* Insert header into the chain. */ + meh_p->meh_next= *search_meh_pp; + *search_meh_pp= meh_p; + return 0; +} + + +/** + Unregister formerly registered error messages. + + @description + + This function unregisters the error numbers 'first' to 'last'. + These must have been previously registered by my_error_register(). + 'first' and 'last' must exactly match the registration. + If a matching registration is present, the header is removed from the + list and the pointer to the error messages pointers array is returned. + (The messages themselves are not released here as they may be static.) + Otherwise, NULL is returned. + + @param first error number of first message + @param last error number of last message + + @retval NULL Error, no such number range registered. + @retval non-NULL OK, returns address of error messages pointers array. +*/ + +my_bool my_error_unregister(uint first, uint last) +{ + struct my_err_head *meh_p; + struct my_err_head **search_meh_pp; + + /* Search for the registration in the list. */ + for (search_meh_pp= &my_errmsgs_list; + *search_meh_pp; + search_meh_pp= &(*search_meh_pp)->meh_next) + { + if (((*search_meh_pp)->meh_first == first) && + ((*search_meh_pp)->meh_last == last)) + break; + } + if (! *search_meh_pp) + return TRUE; + + /* Remove header from the chain. */ + meh_p= *search_meh_pp; + *search_meh_pp= meh_p->meh_next; + + my_free(meh_p); + + return FALSE; +} + + +/** + Unregister all formerly registered error messages. + + @description + + This function unregisters all error numbers that previously have + been previously registered by my_error_register(). + All headers are removed from the list; the messages themselves are + not released here as they may be static. +*/ + +void my_error_unregister_all(void) +{ + struct my_err_head *cursor, *saved_next; + + for (cursor= my_errmsgs_globerrs.meh_next; cursor != NULL; cursor= saved_next) + { + /* We need this ptr, but we're about to free its container, so save it. */ + saved_next= cursor->meh_next; + + my_free(cursor); + } + my_errmsgs_globerrs.meh_next= NULL; /* Freed in first iteration above. */ + + my_errmsgs_list= &my_errmsgs_globerrs; +} diff --git a/mysys/my_file.c b/mysys/my_file.c new file mode 100644 index 00000000..c2b358f5 --- /dev/null +++ b/mysys/my_file.c @@ -0,0 +1,136 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "my_static.h" +#include + +/* + set how many open files we want to be able to handle + + SYNOPSIS + set_maximum_open_files() + max_file_limit Files to open + + NOTES + The request may not fulfilled becasue of system limitations + + RETURN + Files available to open. + May be more or less than max_file_limit! +*/ + +#if defined(HAVE_GETRLIMIT) && defined(RLIMIT_NOFILE) + +#ifndef RLIM_INFINITY +#define RLIM_INFINITY ((uint) 0xffffffff) +#endif + +static uint set_max_open_files(uint max_file_limit) +{ + struct rlimit rlimit; + uint old_cur; + DBUG_ENTER("set_max_open_files"); + DBUG_PRINT("enter",("files: %u", max_file_limit)); + + if (!getrlimit(RLIMIT_NOFILE,&rlimit)) + { + old_cur= (uint) rlimit.rlim_cur; + DBUG_PRINT("info", ("rlim_cur: %u rlim_max: %u", + (uint) rlimit.rlim_cur, + (uint) rlimit.rlim_max)); + if ((ulonglong) rlimit.rlim_cur == (ulonglong) RLIM_INFINITY || + rlimit.rlim_cur >= max_file_limit) + DBUG_RETURN(max_file_limit); + rlimit.rlim_cur= rlimit.rlim_max= max_file_limit; + if (setrlimit(RLIMIT_NOFILE, &rlimit)) + max_file_limit= old_cur; /* Use original value */ + else + { + rlimit.rlim_cur= 0; /* Safety if next call fails */ + (void) getrlimit(RLIMIT_NOFILE,&rlimit); + DBUG_PRINT("info", ("rlim_cur: %u", (uint) rlimit.rlim_cur)); + if (rlimit.rlim_cur) /* If call didn't fail */ + max_file_limit= (uint) rlimit.rlim_cur; + } + } + DBUG_PRINT("exit",("max_file_limit: %u", max_file_limit)); + DBUG_RETURN(max_file_limit); +} + +#else +static uint set_max_open_files(uint max_file_limit) +{ + /* We don't know the limit. Return best guess */ + return MY_MIN(max_file_limit, OS_FILE_LIMIT); +} +#endif + + +/* + Change number of open files + + SYNOPSIS: + my_set_max_open_files() + files Number of requested files + + RETURN + number of files available for open +*/ + +uint my_set_max_open_files(uint files) +{ + struct st_my_file_info *tmp; + DBUG_ENTER("my_set_max_open_files"); + DBUG_PRINT("enter",("files: %u my_file_limit: %u", files, my_file_limit)); + + files+= MY_FILE_MIN; + files= set_max_open_files(MY_MIN(files, OS_FILE_LIMIT)); + if (files <= MY_NFILE) + DBUG_RETURN(files); + + if (!(tmp= (struct st_my_file_info*) my_malloc(key_memory_my_file_info, + sizeof(*tmp) * files, + MYF(MY_WME)))) + DBUG_RETURN(MY_NFILE); + + /* Copy any initialized files */ + memcpy((char*) tmp, (char*) my_file_info, + sizeof(*tmp) * MY_MIN(my_file_limit, files)); + bzero((char*) (tmp + my_file_limit), + MY_MAX((int) (files- my_file_limit), 0)*sizeof(*tmp)); + my_free_open_file_info(); /* Free if already allocated */ + my_file_info= tmp; + my_file_limit= files; + DBUG_PRINT("exit",("files: %u", files)); + DBUG_RETURN(files); +} + + +void my_free_open_file_info() +{ + DBUG_ENTER("my_free_file_info"); + if (my_file_info != my_file_info_default) + { + /* Copy data back for my_print_open_files */ + memcpy((char*) my_file_info_default, my_file_info, + sizeof(*my_file_info_default)* MY_NFILE); + my_free(my_file_info); + my_file_info= my_file_info_default; + my_file_limit= MY_NFILE; + } + DBUG_VOID_RETURN; +} diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c new file mode 100644 index 00000000..2bc1da52 --- /dev/null +++ b/mysys/my_fopen.c @@ -0,0 +1,301 @@ +/* Copyright (c) 2000, 2012, Oracle and/or its affiliates + Copyright (c) 1985, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "my_static.h" +#include +#include "mysys_err.h" +#include "my_atomic.h" + +static void make_ftype(char * to,int flag); + +/* + Open a file as stream + + SYNOPSIS + my_fopen() + FileName Path-name of file + Flags Read | write | append | trunc (like for open()) + MyFlags Flags for handling errors + + RETURN + 0 Error + # File handler +*/ + +FILE *my_fopen(const char *filename, int flags, myf MyFlags) +{ + FILE *fd; + char type[10]; + DBUG_ENTER("my_fopen"); + DBUG_PRINT("my",("Name: '%s' flags: %d MyFlags: %lu", + filename, flags, MyFlags)); + + make_ftype(type,flags); + +#ifdef _WIN32 + fd= my_win_fopen(filename, type); +#else + fd= fopen(filename, type); +#endif + if (fd != 0) + { + /* + The test works if MY_NFILE < 128. The problem is that fileno() is char + on some OS (SUNOS). Actually the filename save isn't that important + so we can ignore if this doesn't work. + */ + + int filedesc= my_fileno(fd); + if ((uint)filedesc >= my_file_limit) + { + statistic_increment(my_stream_opened,&THR_LOCK_open); + DBUG_RETURN(fd); /* safeguard */ + } + my_file_info[filedesc].name= my_strdup(key_memory_my_file_info, filename, MyFlags); + statistic_increment(my_stream_opened, &THR_LOCK_open); + statistic_increment(my_file_total_opened, &THR_LOCK_open); + my_file_info[filedesc].type= STREAM_BY_FOPEN; + DBUG_PRINT("exit",("stream: %p", fd)); + DBUG_RETURN(fd); + } + else + my_errno=errno; + DBUG_PRINT("error",("Got error %d on open",my_errno)); + if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) + my_error((flags & O_RDONLY) ? EE_FILENOTFOUND : EE_CANTCREATEFILE, + MYF(ME_BELL), filename, my_errno); + DBUG_RETURN((FILE*) 0); +} /* my_fopen */ + + +#if defined(_WIN32) + +static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream) +{ + int handle_fd, fd= _fileno(stream); + HANDLE osfh; + + DBUG_ASSERT(path && stream); + DBUG_ASSERT(strchr(mode, 'a')); /* We use FILE_APPEND_DATA below */ + + /* Services don't have stdout/stderr on Windows, so _fileno returns -1. */ + if (fd < 0) + { + if (!freopen(path, mode, stream)) + return NULL; + + fd= _fileno(stream); + } + + if ((osfh= CreateFile(path, GENERIC_READ | FILE_APPEND_DATA, + FILE_SHARE_READ | FILE_SHARE_WRITE | + FILE_SHARE_DELETE, NULL, + OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, + NULL)) == INVALID_HANDLE_VALUE) + return NULL; + + if ((handle_fd= _open_osfhandle((intptr_t)osfh, _O_TEXT)) == -1) + { + CloseHandle(osfh); + return NULL; + } + + if (_dup2(handle_fd, fd) < 0) + { + CloseHandle(osfh); + return NULL; + } + + _close(handle_fd); + + return stream; +} + +#endif + + +/** + Change the file associated with a file stream. + + @param path Path to file. + @param mode Mode of the stream. + @param stream File stream. + + @note + This function is used to redirect stdout and stderr to a file and + subsequently to close and reopen that file for log rotation. + + @retval A FILE pointer on success. Otherwise, NULL. +*/ + +FILE *my_freopen(const char *path, const char *mode, FILE *stream) +{ + FILE *result; + +#if defined(_WIN32) + result= my_win_freopen(path, mode, stream); +#else + result= freopen(path, mode, stream); +#endif + + return result; +} + + +/* Close a stream */ +int my_fclose(FILE *fd, myf MyFlags) +{ + int err,file; + char *name= NULL; + DBUG_ENTER("my_fclose"); + DBUG_PRINT("my",("stream: %p MyFlags: %lu", fd, MyFlags)); + + file= my_fileno(fd); + if ((uint) file < my_file_limit && my_file_info[file].type != UNOPEN) + { + name= my_file_info[file].name; + my_file_info[file].name= NULL; + my_file_info[file].type= UNOPEN; + } +#ifndef _WIN32 + err= fclose(fd); +#else + err= my_win_fclose(fd); +#endif + if(err < 0) + { + my_errno=errno; + if (MyFlags & (MY_FAE | MY_WME)) + my_error(EE_BADCLOSE, MYF(ME_BELL), name, errno); + } + else + statistic_decrement(my_stream_opened, &THR_LOCK_open); + + if (name) + { + my_free(name); + } + DBUG_RETURN(err); +} /* my_fclose */ + + + /* Make a stream out of a file handle */ + /* Name may be 0 */ + +FILE *my_fdopen(File Filedes, const char *name, int Flags, myf MyFlags) +{ + FILE *fd; + char type[5]; + DBUG_ENTER("my_fdopen"); + DBUG_PRINT("my",("fd: %d Flags: %d MyFlags: %lu", + Filedes, Flags, MyFlags)); + + make_ftype(type,Flags); +#ifdef _WIN32 + fd= my_win_fdopen(Filedes, type); +#else + fd= fdopen(Filedes, type); +#endif + if (!fd) + { + my_errno=errno; + if (MyFlags & (MY_FAE | MY_WME)) + my_error(EE_CANT_OPEN_STREAM, MYF(ME_BELL), errno); + } + else + { + statistic_increment(my_stream_opened, &THR_LOCK_open); + if ((uint) Filedes < (uint) my_file_limit) + { + if (my_file_info[Filedes].type != UNOPEN) + { + /* File is opened with my_open ! */ + my_atomic_add32_explicit(&my_file_opened, -1, MY_MEMORY_ORDER_RELAXED); + } + else + { + my_file_info[Filedes].name= my_strdup(key_memory_my_file_info, + name, MyFlags); + } + my_file_info[Filedes].type= STREAM_BY_FDOPEN; + } + } + + DBUG_PRINT("exit",("stream: %p", fd)); + DBUG_RETURN(fd); +} /* my_fdopen */ + + +/* + Make a fopen() typestring from a open() type bitmap + + SYNOPSIS + make_ftype() + to String for fopen() is stored here + flag Flag used by open() + + IMPLEMENTATION + This routine attempts to find the best possible match + between a numeric option and a string option that could be + fed to fopen. There is not a 1 to 1 mapping between the two. + + NOTE + On Unix, O_RDONLY is usually 0 + + MAPPING + r == O_RDONLY + w == O_WRONLY|O_TRUNC|O_CREAT + a == O_WRONLY|O_APPEND|O_CREAT + r+ == O_RDWR + w+ == O_RDWR|O_TRUNC|O_CREAT + a+ == O_RDWR|O_APPEND|O_CREAT + b == FILE_BINARY + e == O_CLOEXEC +*/ + +static void make_ftype(register char * to, register int flag) +{ + /* check some possible invalid combinations */ + DBUG_ASSERT((flag & (O_TRUNC | O_APPEND)) != (O_TRUNC | O_APPEND)); + DBUG_ASSERT((flag & (O_WRONLY | O_RDWR)) != (O_WRONLY | O_RDWR)); + + if ((flag & (O_RDONLY|O_WRONLY)) == O_WRONLY) + *to++= (flag & O_APPEND) ? 'a' : 'w'; + else if (flag & O_RDWR) + { + /* Add '+' after theese */ + if (flag & (O_TRUNC | O_CREAT)) + *to++= 'w'; + else if (flag & O_APPEND) + *to++= 'a'; + else + *to++= 'r'; + *to++= '+'; + } + else + *to++= 'r'; + + if (flag & FILE_BINARY) + *to++='b'; + else if (flag & O_TEXT) + *to++= 't'; + + if (O_CLOEXEC) + *to++= 'e'; + + *to='\0'; +} /* make_ftype */ diff --git a/mysys/my_fstream.c b/mysys/my_fstream.c new file mode 100644 index 00000000..a43fe13c --- /dev/null +++ b/mysys/my_fstream.c @@ -0,0 +1,194 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* USE_MY_STREAM isn't set because we can't thrust my_fclose! */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include + +#ifdef HAVE_FSEEKO +#undef ftell +#undef fseek +#define ftell(A) ftello(A) +#define fseek(A,B,C) fseeko((A),(B),(C)) +#endif + +/* + Read a chunk of bytes from a FILE + + SYNOPSIS + my_fread() + stream File descriptor + Buffer Buffer to read to + Count Number of bytes to read + MyFlags Flags on what to do on error + + RETURN + (size_t) -1 Error + # Number of bytes read + */ + +size_t my_fread(FILE *stream, uchar *Buffer, size_t Count, myf MyFlags) +{ + size_t readbytes; + DBUG_ENTER("my_fread"); + DBUG_PRINT("my",("stream: %p Buffer %p Count: %u MyFlags: %lu", + stream, Buffer, (uint) Count, MyFlags)); + + if ((readbytes= fread(Buffer, sizeof(char), Count, stream)) != Count) + { + DBUG_PRINT("error",("Read only %d bytes", (int) readbytes)); + if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) + { + if (ferror(stream)) + my_error(EE_READ, MYF(ME_BELL), + my_filename(my_fileno(stream)),errno); + else + if (MyFlags & (MY_NABP | MY_FNABP)) + my_error(EE_EOFERR, MYF(ME_BELL), + my_filename(my_fileno(stream)),errno); + } + my_errno=errno ? errno : -1; + if (ferror(stream) || MyFlags & (MY_NABP | MY_FNABP)) + DBUG_RETURN((size_t) -1); /* Return with error */ + } + if (MyFlags & (MY_NABP | MY_FNABP)) + DBUG_RETURN(0); /* Read ok */ + DBUG_RETURN(readbytes); +} /* my_fread */ + + +/* + Write a chunk of bytes to a stream + + my_fwrite() + stream File descriptor + Buffer Buffer to write from + Count Number of bytes to write + MyFlags Flags on what to do on error + + RETURN + (size_t) -1 Error + # Number of bytes written +*/ + +size_t my_fwrite(FILE *stream, const uchar *Buffer, size_t Count, myf MyFlags) +{ + size_t writtenbytes =0; + my_off_t seekptr; +#if !defined(NO_BACKGROUND) && defined(USE_MY_STREAM) + uint errors; +#endif + DBUG_ENTER("my_fwrite"); + DBUG_PRINT("my",("stream:%p Buffer:%p Count: %u MyFlags: %lu", + stream, Buffer, (uint) Count, MyFlags)); + +#if !defined(NO_BACKGROUND) && defined(USE_MY_STREAM) + errors=0; +#endif + seekptr= ftell(stream); + for (;;) + { + size_t written; + if ((written = (size_t) fwrite((char*) Buffer,sizeof(char), + Count, stream)) != Count) + { + DBUG_PRINT("error",("Write only %d bytes", (int) writtenbytes)); + my_errno=errno; + if (written != (size_t) -1) + { + seekptr+=written; + Buffer+=written; + writtenbytes+=written; + Count-=written; + } +#ifdef EINTR + if (errno == EINTR) + { + (void) my_fseek(stream,seekptr,MY_SEEK_SET,MYF(0)); + continue; + } +#endif +#if !defined(NO_BACKGROUND) && defined(USE_MY_STREAM) + if (my_thread_var->abort) + MyFlags&= ~ MY_WAIT_IF_FULL; /* End if aborted by user */ + + if ((errno == ENOSPC || errno == EDQUOT) && + (MyFlags & MY_WAIT_IF_FULL)) + { + wait_for_free_space("[stream]", errors); + errors++; + (void) my_fseek(stream,seekptr,MY_SEEK_SET,MYF(0)); + continue; + } +#endif + if (ferror(stream) || (MyFlags & (MY_NABP | MY_FNABP))) + { + if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) + { + my_error(EE_WRITE, MYF(ME_BELL), + my_filename(my_fileno(stream)), errno); + } + writtenbytes= (size_t) -1; /* Return that we got error */ + break; + } + } + if (MyFlags & (MY_NABP | MY_FNABP)) + writtenbytes= 0; /* Everything OK */ + else + writtenbytes+= written; + break; + } + DBUG_RETURN(writtenbytes); +} /* my_fwrite */ + + +/* Seek to position in file */ + +my_off_t my_fseek(FILE *stream, my_off_t pos, int whence, + myf MyFlags __attribute__((unused))) +{ + DBUG_ENTER("my_fseek"); + DBUG_PRINT("my",("stream:%p pos: %llu whence: %d MyFlags: %lu", + stream, (ulonglong) pos, whence, MyFlags)); + DBUG_RETURN(fseek(stream, (off_t) pos, whence) ? + MY_FILEPOS_ERROR : (my_off_t) ftell(stream)); +} /* my_seek */ + + +/* Tell current position of file */ + +my_off_t my_ftell(FILE *stream, myf MyFlags __attribute__((unused))) +{ + long long pos; + DBUG_ENTER("my_ftell"); + DBUG_PRINT("my",("stream:%p MyFlags: %lu", stream, MyFlags)); + pos=IF_WIN(_ftelli64(stream),ftell(stream)); + DBUG_PRINT("exit",("ftell: %lld",pos)); + DBUG_RETURN((my_off_t) pos); +} /* my_ftell */ + + +/* Get a File corresponding to the stream*/ +int my_fileno(FILE *f) +{ +#ifdef _WIN32 + return my_win_fileno(f); +#else + return fileno(f); +#endif +} diff --git a/mysys/my_gethwaddr.c b/mysys/my_gethwaddr.c new file mode 100644 index 00000000..1f344af8 --- /dev/null +++ b/mysys/my_gethwaddr.c @@ -0,0 +1,214 @@ +/* + Copyright (c) 2004, 2010, Oracle and/or its affiliates + Copyright (c) 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* get hardware address for an interface */ +/* if there are many available, any non-zero one can be used */ + +#include "mysys_priv.h" +#include + +#ifndef MAIN + +#if defined(_AIX) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__linux__) || defined(__sun) || defined(_WIN32) +static my_bool memcpy_and_test(uchar *to, uchar *from, uint len) +{ + uint i, res= 1; + + for (i= 0; i < len; i++) + if ((*to++= *from++)) + res= 0; + return res; +} +#endif + +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) +#ifdef __OpenBSD__ +#include +#include +#include +#else +#include +#endif +#include +#include +#include +#include + +my_bool my_gethwaddr(uchar *to) +{ + size_t len; + uchar *buf, *next, *end, *addr; + struct if_msghdr *ifm; + struct sockaddr_dl *sdl; + int res= 1, mib[6]= {CTL_NET, AF_ROUTE, 0, AF_LINK, NET_RT_IFLIST, 0}; + + if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) + goto err; + if (!(buf = alloca(len))) + goto err; + if (sysctl(mib, 6, buf, &len, NULL, 0) < 0) + goto err; + + end = buf + len; + + for (next = buf ; res && next < end ; next += ifm->ifm_msglen) + { + ifm = (struct if_msghdr *)next; + if (ifm->ifm_type == RTM_IFINFO) + { + sdl = (struct sockaddr_dl *)(ifm + 1); + addr= (uchar *)LLADDR(sdl); + res= memcpy_and_test(to, addr, ETHER_ADDR_LEN); + } + } + +err: + return res; +} + +#elif defined(_AIX) || defined(__linux__) || defined(__sun) +#include +#include +#include +#ifdef HAVE_SYS_SOCKIO_H +#include +#endif + +#define ETHER_ADDR_LEN 6 + +my_bool my_gethwaddr(uchar *to) +{ + int fd, res= 1; +#ifdef _AIX + struct ifhwaddr_req ifr[32]; +#else + struct ifreq ifr[32]; +#endif + struct ifconf ifc; + DBUG_ENTER("my_gethwaddr"); + + ifc.ifc_req= (struct ifreq *) ifr; + ifc.ifc_len= sizeof(ifr); + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + { + DBUG_PRINT("error", ("socket() call failed with %d", errno)); + goto err; + } + + if (ioctl(fd, SIOCGIFCONF, (char*)&ifc) >= 0) + { + uint i; + for (i= 0; res && i < ifc.ifc_len / sizeof(ifr[0]); i++) + { +#if defined(_AIX) || defined(__linux__) +#if defined(__linux__) +#define HWADDR_DATA ifr[i].ifr_hwaddr.sa_data +#else +#define HWADDR_DATA ifr[i].ifr_hwaddr +#endif + if (ioctl(fd, SIOCGIFHWADDR, &ifr[i]) >= 0) + res= memcpy_and_test(to, (uchar *)&HWADDR_DATA, + ETHER_ADDR_LEN); +#else + /* + A bug in OpenSolaris used to prevent non-root from getting a mac + address: {no url. Oracle killed the old OpenSolaris bug database} + + Thus, we'll use an alternative method and extract the address from the + arp table. + */ + struct arpreq arpr; + arpr.arp_pa= ifr[i].ifr_addr; + + if (ioctl(fd, SIOCGARP, (char*)&arpr) >= 0) + res= memcpy_and_test(to, (uchar *)&arpr.arp_ha.sa_data, + ETHER_ADDR_LEN); +#endif + } + } + + close(fd); +err: + DBUG_RETURN(res); +} + +#elif defined(_WIN32) +#include +#include +#pragma comment(lib, "iphlpapi.lib") + +#define ETHER_ADDR_LEN 6 + +my_bool my_gethwaddr(uchar *to) +{ + my_bool res= 1; + + IP_ADAPTER_INFO *info= NULL; + ULONG info_len= 0; + + if (GetAdaptersInfo(info, &info_len) != ERROR_BUFFER_OVERFLOW) + goto err; + + info= (IP_ADAPTER_INFO *)alloca(info_len); + + if (GetAdaptersInfo(info, &info_len) != NO_ERROR) + goto err; + + while (info && res) + { + if (info->Type == MIB_IF_TYPE_ETHERNET && + info->AddressLength == ETHER_ADDR_LEN) + { + res= memcpy_and_test(to, info->Address, ETHER_ADDR_LEN); + } + info = info->Next; + } + +err: + return res; +} + +#else /* unsupported system */ +/* just fail */ +my_bool my_gethwaddr(uchar *to __attribute__((unused))) +{ + return 1; +} +#endif + +#else /* MAIN */ +int main(int argc __attribute__((unused)),char **argv) +{ + uchar mac[6]; + uint i; + MY_INIT(argv[0]); + if (my_gethwaddr(mac)) + { + printf("my_gethwaddr failed with errno %d\n", errno); + exit(1); + } + for (i= 0; i < sizeof(mac); i++) + { + if (i) printf(":"); + printf("%02x", mac[i]); + } + printf("\n"); + return 0; +} +#endif diff --git a/mysys/my_getncpus.c b/mysys/my_getncpus.c new file mode 100644 index 00000000..1f5fa794 --- /dev/null +++ b/mysys/my_getncpus.c @@ -0,0 +1,85 @@ +/* + Copyright (c) 2006, 2010, Oracle and/or its affiliates + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* get the number of (online) CPUs */ + +#include "mysys_priv.h" +#ifdef HAVE_UNISTD_H +#include +#endif + +#if defined(__FreeBSD__) && defined(HAVE_PTHREAD_GETAFFINITY_NP) +#include +#include +#endif + +static int ncpus=0; + +int my_getncpus(void) +{ + if (!ncpus) + { + /* + First attempt to get the total number of available cores. sysconf is + the fallback, but it can return a larger number. It will return the + total number of cores, not the ones available to the process - as + configured via core affinity. + */ +#if (defined(__linux__) || defined(__FreeBSD__)) && defined(HAVE_PTHREAD_GETAFFINITY_NP) +#ifdef __linux__ + cpu_set_t set; +#else + cpuset_t set; +#endif + if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0) + { +#ifdef CPU_COUNT + /* CPU_COUNT was introduced with glibc 2.6. */ + ncpus= CPU_COUNT(&set); +#else + /* Implementation for platforms with glibc < 2.6 */ + size_t i; + + for (i= 0; i < CPU_SETSIZE; i++) + if (CPU_ISSET(i, &set)) + ncpus++; +#endif + return ncpus; + } +#endif /* (__linux__ || __FreeBSD__) && HAVE_PTHREAD_GETAFFINITY_NP */ + +#ifdef _SC_NPROCESSORS_ONLN + ncpus= sysconf(_SC_NPROCESSORS_ONLN); +#elif defined(_WIN32) + SYSTEM_INFO sysinfo; + + /* + We are not calling GetNativeSystemInfo here because (1) we + don't believe that they return different values for number + of processors and (2) if WOW64 limits processors for Win32 + then we don't want to try to override that. + */ + GetSystemInfo(&sysinfo); + + ncpus= sysinfo.dwNumberOfProcessors; +#else + /* Unknown so play safe: assume SMP and forbid uniprocessor build */ + ncpus= 2; +#endif + } + + return ncpus; +} diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c new file mode 100644 index 00000000..eb665b1e --- /dev/null +++ b/mysys/my_getopt.c @@ -0,0 +1,1768 @@ +/* + Copyright (c) 2002, 2013, Oracle and/or its affiliates + Copyright (c) 2009, 2020, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include +#include +#include +#include + +my_bool is_file_marker(const char* arg); +typedef void (*init_func_p)(const struct my_option *option, void *variable, + longlong value); + +static void default_reporter(enum loglevel level, const char *format, ...); +my_error_reporter my_getopt_error_reporter= &default_reporter; + +static int findopt(char *, uint, const struct my_option **, const char **); +static my_bool getopt_compare_strings(const char *, const char *, uint); +static longlong getopt_ll(char *arg, const struct my_option *optp, int *err); +static ulonglong getopt_ull(char *, const struct my_option *, int *); +static double getopt_double(char *arg, const struct my_option *optp, int *err); +static void init_variables(const struct my_option *, init_func_p); +static void init_one_value(const struct my_option *, void *, longlong); +static void fini_one_value(const struct my_option *, void *, longlong); +static int setval(const struct my_option *, void *, char *, my_bool, const char *); +static char *check_struct_option(char *cur_arg, char *key_name); + +/* + The following three variables belong to same group and the number and + order of their arguments must correspond to each other. +*/ +static const char *special_opt_prefix[]= +{"skip", "disable", "enable", "maximum", "loose", "autoset", 0}; +static const uint special_opt_prefix_lengths[]= +{ 4, 7, 6, 7, 5, 7, 0}; +enum enum_special_opt +{ OPT_SKIP, OPT_DISABLE, OPT_ENABLE, OPT_MAXIMUM, OPT_LOOSE, OPT_AUTOSET}; + +char *disabled_my_option= (char*) "0"; +char *enabled_my_option= (char*) "1"; +char *autoset_my_option= (char*) "auto"; + +/* + This is a flag that can be set in client programs. 0 means that + my_getopt will not print error messages, but the client should do + it by itself +*/ + +my_bool my_getopt_print_errors= 1; + +/* + This is a flag that can be set in client programs. 1 means that + my_getopt will skip over options it does not know how to handle. +*/ + +my_bool my_getopt_skip_unknown= 0; + + +/* + This is a flag that can be set in client programs. 1 means that + my_getopt will reconize command line options by their unambiguous + prefixes. 0 means an option must be always specified in full. +*/ +my_bool my_getopt_prefix_matching= 1; + +/* + This is a flag that can be set in client programs. 1 means that + handle_options() will not initialize options to default values. +*/ +my_bool my_handle_options_init_variables = 1; + +my_getopt_value my_getopt_get_addr= 0; + +static void default_reporter(enum loglevel level, const char *format, ...) +{ + va_list args; + DBUG_ENTER("default_reporter"); + + va_start(args, format); + if (level == WARNING_LEVEL) + fprintf(stderr, "%s", "Warning: "); + else if (level == INFORMATION_LEVEL) + fprintf(stderr, "%s", "Info: "); + vfprintf(stderr, format, args); + va_end(args); + fputc('\n', stderr); + fflush(stderr); + DBUG_VOID_RETURN; +} + +union ull_dbl +{ + ulonglong ull; + double dbl; +}; + +/** + Returns an ulonglong value containing a raw + representation of the given double value. +*/ +ulonglong getopt_double2ulonglong(double v) +{ + union ull_dbl u; + u.dbl= v; + compile_time_assert(sizeof(ulonglong) >= sizeof(double)); + return u.ull; +} + +/** + Returns the double value which corresponds to + the given raw representation. +*/ +double getopt_ulonglong2double(ulonglong v) +{ + union ull_dbl u; + u.ull= v; + return u.dbl; +} + +#ifdef _WIN32 +/** + + On Windows, if program is running in UTF8 mode, but some arguments are not UTF8. + + This will mostly likely be a sign of old "ANSI" my.ini, and it is likely that + something will go wrong, e.g file access error. +*/ +static void validate_value(const char *key, const char *value, + const char *filename) +{ + MY_STRCOPY_STATUS status; + const struct charset_info_st *cs= &my_charset_utf8mb4_bin; + size_t len; + if (GetACP() != CP_UTF8) + return; + if (!(len= strlen(value))) + return; + cs->cset->well_formed_char_length(cs, value, value + len, len, &status); + if (!status.m_well_formed_error_pos) + return; + if (filename && *filename) + { + my_getopt_error_reporter(WARNING_LEVEL, + "%s: invalid (non-UTF8) characters found for option '%s'" + " in file '%s'", + my_progname, key, filename); + } + else + { + my_getopt_error_reporter( + WARNING_LEVEL, "%s: invalid (non-UTF8) characters for option %s", + my_progname, key); + } +} +#else +#define validate_value(key, value, filename) (void)filename +#endif + +/** + Handle command line options. + Sort options. + Put options first, until special end of options (--), + or until the end of argv. Parse options, check that the given option + matches with one of the options in struct 'my_option'. + Check that option was given an argument if it requires one + Call the 'get_one_option()' function once for each option. + + Note that handle_options() can be invoked multiple times to + parse a command line in several steps. + In this case, use the global flag @c my_getopt_skip_unknown to indicate + that options unknown in the current step should be preserved in the + command line for later parsing in subsequent steps. + + For 'long' options (--a_long_option), @c my_getopt_skip_unknown is + fully supported. Command line parameters such as: + - "--a_long_option" + - "--a_long_option=value" + - "--a_long_option value" + will be preserved as is when the option is not known. + + For 'short' options (-S), support for @c my_getopt_skip_unknown + comes with some limitation, because several short options + can also be specified together in the same command line argument, + as in "-XYZ". + + The first use case supported is: all short options are declared. + handle_options() will be able to interpret "-XYZ" as one of: + - an unknown X option + - "-X -Y -Z", three short options with no arguments + - "-X -YZ", where Y is a short option with argument Z + - "-XYZ", where X is a short option with argument YZ + based on the full short options specifications. + + The second use case supported is: no short option is declared. + handle_options() will reject "-XYZ" as unknown, to be parsed later. + + The use case that is explicitly not supported is to provide + only a partial list of short options to handle_options(). + This function can not be expected to extract some option Y + in the middle of the string "-XYZ" in these conditions, + without knowing if X will be declared an option later. + + Note that this limitation only impacts parsing of several + short options from the same command line argument, + as in "mysqld -anW5". + When each short option is properly separated out in the command line + argument, for example in "mysqld -a -n -w5", the code would actually + work even with partial options specs given at each stage. + + @param [in, out] argc command line options (count) + @param [in, out] argv command line options (values) + @param [in] longopts descriptor of all valid options + @param [in] get_one_option callback function to process each option + @return error in case of ambiguous or unknown options, + 0 on success. +*/ +int handle_options(int *argc, char ***argv, const struct my_option *longopts, + my_get_one_option get_one_option) +{ + uint UNINIT_VAR(opt_found), argvpos= 0, length; + my_bool end_of_options= 0, must_be_var, set_maximum_value, + option_is_loose, option_is_autoset; + char **pos, **pos_end, *optend, *opt_str, key_name[FN_REFLEN]; + char *filename= (char*)""; + const char *UNINIT_VAR(prev_found); + const struct my_option *optp; + void *value; + int error, i; + my_bool is_cmdline_arg= 1; + DBUG_ENTER("handle_options"); + + /* handle_options() assumes arg0 (program name) always exists */ + DBUG_ASSERT(*argc >= 1); + DBUG_ASSERT(*argv); + (*argc)--; /* Skip the program name */ + (*argv)++; /* --- || ---- */ + if (my_handle_options_init_variables) + init_variables(longopts, init_one_value); + + is_cmdline_arg= !is_file_marker(**argv); + + for (pos= *argv, pos_end=pos+ *argc; pos != pos_end ; pos++) + { + char **first= pos; + char *cur_arg= *pos; + opt_found= 0; + if (!is_cmdline_arg) + { + if (is_file_marker(cur_arg)) + { + pos++; + filename= *pos; + is_cmdline_arg= *filename == 0; /* empty file name = command line */ + if (my_getopt_skip_unknown) + { + (*argv)[argvpos++]= cur_arg; + (*argv)[argvpos++]= filename; + } + else + (*argc)-= 2; + continue; + } + } + if (cur_arg[0] == '-' && cur_arg[1] && !end_of_options) /* must be opt */ + { + char *argument= 0; + must_be_var= 0; + set_maximum_value= 0; + option_is_loose= 0; + option_is_autoset= 0; + + cur_arg++; /* skip '-' */ + if (*cur_arg == '-') /* check for long option, */ + { + if (!*++cur_arg) /* skip the double dash */ + { + /* '--' means end of options, look no further */ + end_of_options= 1; + (*argc)--; + continue; + } + opt_str= check_struct_option(cur_arg, key_name); + optend= strcend(opt_str, '='); + length= (uint) (optend - opt_str); + if (*optend == '=') + optend++; + else + optend= 0; + + /* + Find first the right option. Return error in case of an ambiguous, + or unknown option + */ + optp= longopts; + if (!(opt_found= findopt(opt_str, length, &optp, &prev_found))) + { + /* + Didn't find any matching option. Let's see if someone called + option with a special option prefix + */ + if (!must_be_var) + { + if (optend) + must_be_var= 1; /* option is followed by an argument */ + for (i= 0; special_opt_prefix[i]; i++) + { + if (!getopt_compare_strings(special_opt_prefix[i], opt_str, + special_opt_prefix_lengths[i]) && + (opt_str[special_opt_prefix_lengths[i]] == '-' || + opt_str[special_opt_prefix_lengths[i]] == '_')) + { + /* + We were called with a special prefix, we can reuse opt_found + */ + opt_str+= special_opt_prefix_lengths[i] + 1; + length-= special_opt_prefix_lengths[i] + 1; + if (i == OPT_LOOSE) + option_is_loose= 1; + else if (i == OPT_AUTOSET) + option_is_autoset= 1; + if ((opt_found= findopt(opt_str, length, &optp, &prev_found))) + { + if (opt_found > 1) + { + if (my_getopt_print_errors) + my_getopt_error_reporter(ERROR_LEVEL, + "%s: ambiguous option '--%s-%s' (--%s-%s)", + my_progname, special_opt_prefix[i], + opt_str, special_opt_prefix[i], + prev_found); + DBUG_RETURN(EXIT_AMBIGUOUS_OPTION); + } + switch (i) { + case OPT_SKIP: + case OPT_DISABLE: /* fall through */ + /* + double negation is actually enable again, + for example: --skip-option=0 -> option = TRUE + */ + optend= (optend && *optend == '0' && !(*(optend + 1))) ? + enabled_my_option : disabled_my_option; + break; + case OPT_ENABLE: + optend= (optend && *optend == '0' && !(*(optend + 1))) ? + disabled_my_option : enabled_my_option; + break; + case OPT_MAXIMUM: + set_maximum_value= 1; + must_be_var= 1; + break; + } + break; /* break from the inner loop, main loop continues */ + } + i= -1; /* restart the loop */ + } + } + } + if (!opt_found) + { + if (my_getopt_skip_unknown) + { + /* Preserve all the components of this unknown option. */ + do { + (*argv)[argvpos++]= *first++; + } while (first <= pos); + continue; + } + if (must_be_var) + { + if (my_getopt_print_errors) + my_getopt_error_reporter(option_is_loose ? + WARNING_LEVEL : ERROR_LEVEL, + "%s: unknown variable '%s'", + my_progname, cur_arg); + if (!option_is_loose) + DBUG_RETURN(EXIT_UNKNOWN_VARIABLE); + } + else + { + if (my_getopt_print_errors) + my_getopt_error_reporter(option_is_loose ? + WARNING_LEVEL : ERROR_LEVEL, + "%s: unknown option '--%s'", + my_progname, cur_arg); + if (!option_is_loose) + DBUG_RETURN(EXIT_UNKNOWN_OPTION); + } + if (option_is_loose) + { + (*argc)--; + continue; + } + } + } + if (opt_found > 1) + { + if (must_be_var) + { + if (my_getopt_print_errors) + my_getopt_error_reporter(ERROR_LEVEL, + "%s: variable prefix '%s' is not unique", + my_progname, opt_str); + DBUG_RETURN(EXIT_VAR_PREFIX_NOT_UNIQUE); + } + else + { + if (my_getopt_print_errors) + my_getopt_error_reporter(ERROR_LEVEL, + "%s: ambiguous option '--%s' (%s, %s)", + my_progname, opt_str, prev_found, + optp->name); + DBUG_RETURN(EXIT_AMBIGUOUS_OPTION); + } + } + if ((optp->var_type & GET_TYPE_MASK) == GET_DISABLED) + { + if (my_getopt_print_errors) + fprintf(stderr, + "%s: %s: Option '%s' used, but is disabled\n", my_progname, + option_is_loose ? "WARNING" : "ERROR", opt_str); + if (option_is_loose) + { + (*argc)--; + continue; + } + DBUG_RETURN(EXIT_OPTION_DISABLED); + } + error= 0; + value= optp->var_type & GET_ASK_ADDR + ? (*my_getopt_get_addr)(key_name, (uint)strlen(key_name), optp, &error) + : optp->value; + if (error) + DBUG_RETURN(error); + + if (optp->arg_type == NO_ARG) + { + /* + Due to historical reasons GET_BOOL var_types still accepts arguments + despite the NO_ARG arg_type attribute. This can seems a bit unintuitive + and care should be taken when refactoring this code. + */ + if (optend && (optp->var_type & GET_TYPE_MASK) != GET_BOOL) + { + if (my_getopt_print_errors) + my_getopt_error_reporter(ERROR_LEVEL, + "%s: option '--%s' cannot take an argument", + my_progname, optp->name); + DBUG_RETURN(EXIT_NO_ARGUMENT_ALLOWED); + } + if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL) + { + /* + Set bool to 1 if no argument or if the user has used + --enable-'option-name'. + *optend was set to '0' if one used --disable-option + */ + (*argc)--; + if (!optend || *optend == '1' || + !my_strcasecmp(&my_charset_latin1, optend, "true") || + !my_strcasecmp(&my_charset_latin1, optend, "on")) + *((my_bool*) value)= (my_bool) 1; + else if (*optend == '0' || + !my_strcasecmp(&my_charset_latin1, optend, "false") || + !my_strcasecmp(&my_charset_latin1, optend, "off")) + *((my_bool*) value)= (my_bool) 0; + else + { + my_getopt_error_reporter(WARNING_LEVEL, + "%s: ignoring option '--%s' " + "due to invalid value '%s'", + my_progname, optp->name, optend); + continue; + } + if (get_one_option(optp, *((my_bool*) value) ? + enabled_my_option : disabled_my_option, + filename)) + DBUG_RETURN(EXIT_ARGUMENT_INVALID); + continue; + } + argument= optend; + } + else if (option_is_autoset) + { + if (optend) + { + my_getopt_error_reporter(ERROR_LEVEL, + "%s: automatically set " + "option '--%s' cannot take an argument", + my_progname, optp->name); + + DBUG_RETURN(EXIT_NO_ARGUMENT_ALLOWED); + } + if (!(optp->var_type & GET_AUTO)) + { + my_getopt_error_reporter(option_is_loose ? + WARNING_LEVEL : ERROR_LEVEL, + "%s: automatic setup request is " + "unsupported by option '--%s'", + my_progname, optp->name); + if (!option_is_loose) + DBUG_RETURN(EXIT_ARGUMENT_INVALID); + continue; + } + else + argument= autoset_my_option; + } + else if (optp->arg_type == REQUIRED_ARG && !optend) + { + /* + Check if there are more arguments after this one, + Note: options loaded from config file that requires value + should always be in the form '--option=value'. + */ + if (!is_cmdline_arg || !*++pos) + { + if (my_getopt_print_errors) + my_getopt_error_reporter(ERROR_LEVEL, + "%s: option '--%s' requires an argument", + my_progname, optp->name); + DBUG_RETURN(EXIT_ARGUMENT_REQUIRED); + } + argument= *pos; + (*argc)--; + } + else + argument= optend; + } + else /* must be short option */ + { + for (optend= cur_arg; *optend; optend++) + { + opt_found= 0; + for (optp= longopts; optp->name; optp++) + { + if (optp->id && optp->id == (int) (uchar) *optend) + { + /* Option recognized. Find next what to do with it */ + opt_found= 1; + if ((optp->var_type & GET_TYPE_MASK) == GET_DISABLED) + { + if (my_getopt_print_errors) + fprintf(stderr, + "%s: ERROR: Option '-%c' used, but is disabled\n", + my_progname, optp->id); + DBUG_RETURN(EXIT_OPTION_DISABLED); + } + if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL && + optp->arg_type == NO_ARG) + { + *((my_bool*) optp->value)= (my_bool) 1; + if (get_one_option(optp, argument, filename)) + DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + continue; + } + else if (optp->arg_type == REQUIRED_ARG || + optp->arg_type == OPT_ARG) + { + if (*(optend + 1)) + { + /* The rest of the option is option argument */ + argument= optend + 1; + /* This is in effect a jump out of the outer loop */ + optend= (char*) " "; + } + else + { + if (optp->arg_type == OPT_ARG) + { + if (optp->var_type == GET_BOOL) + *((my_bool*) optp->value)= (my_bool) 1; + if (get_one_option(optp, argument, filename)) + DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + continue; + } + /* Check if there are more arguments after this one */ + if (!pos[1]) + { + if (my_getopt_print_errors) + my_getopt_error_reporter(ERROR_LEVEL, + "%s: option '-%c' requires an argument", + my_progname, optp->id); + DBUG_RETURN(EXIT_ARGUMENT_REQUIRED); + } + argument= *++pos; + (*argc)--; + /* the other loop will break, because *optend + 1 == 0 */ + } + } + if ((error= setval(optp, optp->value, argument, + set_maximum_value,filename))) + DBUG_RETURN(error); + if (get_one_option(optp, argument, filename)) + DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + break; + } + } + if (!opt_found) + { + if (my_getopt_skip_unknown) + { + /* + We are currently parsing a single argv[] argument + of the form "-XYZ". + One or the argument found (say Y) is not an option. + Hack the string "-XYZ" to make a "-YZ" substring in it, + and push that to the output as an unrecognized parameter. + */ + DBUG_ASSERT(optend > *pos); + DBUG_ASSERT(optend >= cur_arg); + DBUG_ASSERT(optend <= *pos + strlen(*pos)); + DBUG_ASSERT(*optend); + optend--; + optend[0]= '-'; /* replace 'X' or '-' by '-' */ + (*argv)[argvpos++]= optend; + /* + Do not continue to parse at the current "-XYZ" argument, + skip to the next argv[] argument instead. + */ + optend= (char*) " "; + } + else + { + if (my_getopt_print_errors) + my_getopt_error_reporter(ERROR_LEVEL, + "%s: unknown option '-%c'", + my_progname, *optend); + DBUG_RETURN(EXIT_UNKNOWN_OPTION); + } + } + } + if (opt_found) + (*argc)--; /* option handled (short), decrease argument count */ + continue; + } + if ((!option_is_autoset) && + ((error= setval(optp, value, argument, set_maximum_value,filename))) && + !option_is_loose) + DBUG_RETURN(error); + if (get_one_option(optp, argument, filename)) + DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); + + (*argc)--; /* option handled (long), decrease argument count */ + } + else /* non-option found */ + (*argv)[argvpos++]= cur_arg; + } + /* + Destroy the first, already handled option, so that programs that look + for arguments in 'argv', without checking 'argc', know when to stop. + Items in argv, before the destroyed one, are all non-option -arguments + to the program, yet to be (possibly) handled. + */ + (*argv)[argvpos]= 0; + DBUG_RETURN(0); +} + + +/* + function: check_struct_option + + Arguments: Current argument under processing from argv and a variable + where to store the possible key name. + + Return value: In case option is a struct option, returns a pointer to + the current argument at the position where the struct option (key_name) + ends, the next character after the dot. In case argument is not a struct + option, returns a pointer to the argument. + + key_name will hold the name of the key, or 0 if not found. +*/ + +static char *check_struct_option(char *cur_arg, char *key_name) +{ + char *ptr, *end; + DBUG_ENTER("check_struct_option"); + + ptr= strcend(cur_arg + 1, '.'); /* Skip the first character */ + end= strcend(cur_arg, '='); + + /* + If the first dot is after an equal sign, then it is part + of a variable value and the option is not a struct option. + Also, if the last character in the string before the ending + NULL, or the character right before equal sign is the first + dot found, the option is not a struct option. + */ + if (end - ptr > 1) + { + uint len= (uint) (ptr - cur_arg); + set_if_smaller(len, FN_REFLEN-1); + strmake(key_name, cur_arg, len); + DBUG_RETURN(++ptr); + } + else + { + key_name[0]= 0; + DBUG_RETURN(cur_arg); + } +} + +/** + Parse a boolean command line argument + + "ON", "TRUE" and "1" will return true, + other values will return false. + + @param[in] argument The value argument + @return boolean value +*/ +static my_bool get_bool_argument(const struct my_option *opts, + const char *argument) +{ + DBUG_ENTER("get_bool_argument"); + + if (!my_strcasecmp(&my_charset_latin1, argument, "true") || + !my_strcasecmp(&my_charset_latin1, argument, "on") || + !my_strcasecmp(&my_charset_latin1, argument, "1")) + DBUG_RETURN(1); + else if (!my_strcasecmp(&my_charset_latin1, argument, "false") || + !my_strcasecmp(&my_charset_latin1, argument, "off") || + !my_strcasecmp(&my_charset_latin1, argument, "0")) + DBUG_RETURN(0); + my_getopt_error_reporter(WARNING_LEVEL, + "option '%s': boolean value '%s' wasn't recognized. Set to OFF.", + opts->name, argument); + DBUG_RETURN(0); +} + +/* + function: setval + + Arguments: opts, argument + Will set the option value to given value +*/ + +static int setval(const struct my_option *opts, void *value, char *argument, + my_bool set_maximum_value, const char *option_file) +{ + int err= 0, res= 0; + DBUG_ENTER("setval"); + + if (!argument) + argument= enabled_my_option; + + if (value) + { + if (set_maximum_value && !(value= opts->u_max_value)) + { + my_getopt_error_reporter(ERROR_LEVEL, + "%s: Maximum value of '%s' cannot be set", + my_progname, opts->name); + DBUG_RETURN(EXIT_NO_PTR_TO_VARIABLE); + } + + switch ((opts->var_type & GET_TYPE_MASK)) { + case GET_BOOL: /* If argument differs from 0, enable option, else disable */ + *((my_bool*) value)= get_bool_argument(opts, argument); + break; + case GET_INT: + *((int*) value)= (int) getopt_ll(argument, opts, &err); + break; + case GET_UINT: + *((uint*) value)= (uint) getopt_ull(argument, opts, &err); + break; + case GET_LONG: + *((long*) value)= (long) getopt_ll(argument, opts, &err); + break; + case GET_ULONG: + *((long*) value)= (long) getopt_ull(argument, opts, &err); + break; + case GET_LL: + *((longlong*) value)= getopt_ll(argument, opts, &err); + break; + case GET_ULL: + *((ulonglong*) value)= getopt_ull(argument, opts, &err); + break; + case GET_DOUBLE: + *((double*) value)= getopt_double(argument, opts, &err); + break; + case GET_STR: + /* If no argument or --enable-string-option, set string to "" */ + *((char**) value)= argument == enabled_my_option ? (char*) "" : argument; + break; + case GET_STR_ALLOC: + my_free(*((char**) value)); + if (!(*((char**) value)= my_strdup(key_memory_defaults, + argument == enabled_my_option ? "" : + argument, MYF(MY_WME)))) + { + res= EXIT_OUT_OF_MEMORY; + goto ret; + }; + break; + case GET_ENUM: + { + int type= find_type(argument, opts->typelib, FIND_TYPE_BASIC); + if (type == 0) + { + /* + Accept an integer representation of the enumerated item. + */ + char *endptr; + ulong arg= strtoul(argument, &endptr, 10); + if (*endptr || arg >= opts->typelib->count) + { + res= EXIT_ARGUMENT_INVALID; + goto ret; + } + *(ulong*)value= arg; + } + else if (type < 0) + { + res= EXIT_AMBIGUOUS_OPTION; + goto ret; + } + else + *(ulong*)value= type - 1; + } + break; + case GET_SET: + *((ulonglong*)value)= find_typeset(argument, opts->typelib, &err); + if (err) + { + /* Check if option 'all' is used (to set all bits) */ + if (!my_strcasecmp(&my_charset_latin1, argument, "all")) + *(ulonglong*) value= ((1ULL << opts->typelib->count) - 1); + else + { + /* Accept an integer representation of the set */ + char *endptr; + ulonglong arg= (ulonglong) strtol(argument, &endptr, 10); + if (*endptr || (arg >> 1) >= (1ULL << (opts->typelib->count-1))) + { + res= EXIT_ARGUMENT_INVALID; + goto ret; + }; + *(ulonglong*)value= arg; + } + err= 0; + } + break; + case GET_FLAGSET: + { + char *error; + uint error_len; + + *((ulonglong*)value)= + find_set_from_flags(opts->typelib, opts->typelib->count, + *(ulonglong *)value, opts->def_value, + argument, (uint)strlen(argument), + &error, &error_len); + if (error) + { + res= EXIT_ARGUMENT_INVALID; + goto ret; + }; + } + break; + case GET_BIT: + { + uint tmp; + ulonglong bit= (opts->block_size >= 0 ? + opts->block_size : + -opts->block_size); + /* + This sets a bit stored in a longlong. + The bit to set is stored in block_size. If block_size is positive + then setting the bit means value is true. If block_size is negatitive, + then setting the bit means value is false. + */ + tmp= get_bool_argument(opts, argument); + if (opts->block_size < 0) + tmp= !tmp; + if (tmp) + (*(ulonglong*)value)|= bit; + else + (*(ulonglong*)value)&= ~bit; + break; + } + case GET_NO_ARG: /* get_one_option has taken care of the value already */ + default: /* dummy default to avoid compiler warnings */ + break; + } + if (err) + { + res= EXIT_UNKNOWN_SUFFIX; + goto ret; + }; + } + validate_value(opts->name, argument, option_file); + DBUG_RETURN(0); + +ret: + my_getopt_error_reporter(ERROR_LEVEL, + "%s: Error while setting value '%s' to '%s'", + my_progname, argument, opts->name); + DBUG_RETURN(res); +} + + +/* + Find option + + SYNOPSIS + findopt() + optpat Prefix of option to find (with - or _) + length Length of optpat + opt_res Options + ffname Place for pointer to first found name + + IMPLEMENTATION + Go through all options in the my_option struct. Return number + of options found that match the pattern and in the argument + list the option found, if any. In case of ambiguous option, store + the name in ffname argument + + RETURN + 0 No matching options + # Number of matching options + ffname points to first matching option +*/ + +static int findopt(char *optpat, uint length, + const struct my_option **opt_res, + const char **ffname) +{ + uint count; + const struct my_option *opt= *opt_res; + DBUG_ENTER("findopt"); + + for (count= 0; opt->name; opt++) + { + if (!getopt_compare_strings(opt->name, optpat, length)) /* match found */ + { + (*opt_res)= opt; + if (!opt->name[length]) /* Exact match */ + DBUG_RETURN(1); + + if (!my_getopt_prefix_matching) + continue; + + if (!count) + { + /* We only need to know one prev */ + count= 1; + *ffname= opt->name; + } + else if (strcmp(*ffname, opt->name)) + { + /* + The above test is to not count same option twice + (see mysql.cc, option "help") + */ + count++; + } + } + } + + if (count == 1) + my_getopt_error_reporter(INFORMATION_LEVEL, + "Using unique option prefix '%.*s' is error-prone " + "and can break in the future. " + "Please use the full name '%s' instead.", + length, optpat, *ffname); + + DBUG_RETURN(count); +} + + +/* + function: compare_strings + + Works like strncmp, other than 1.) considers '-' and '_' the same. + 2.) Returns -1 if strings differ, 0 if they are equal +*/ + +my_bool getopt_compare_strings(register const char *s, register const char *t, + uint length) +{ + char const *end= s + length; + DBUG_ENTER("getopt_compare_strings"); + + for (;s != end ; s++, t++) + { + if ((*s != '-' ? *s : '_') != (*t != '-' ? *t : '_')) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +/* + function: eval_num_suffix + + Transforms suffix like k/m/g/t/p/e to their real value. +*/ + +static inline ulonglong eval_num_suffix(char *suffix, int *error) +{ + switch (*suffix) { + case '\0': + return 1ULL; + case 'k': + case 'K': + return 1ULL << 10; + case 'm': + case 'M': + return 1ULL << 20; + case 'g': + case 'G': + return 1ULL << 30; + case 't': + case 'T': + return 1ULL << 40; + case 'p': + case 'P': + return 1ULL << 50; + case 'e': + case 'E': + return 1ULL << 60; + default: + *error= 1; + return 0ULL; + } +} + +/* + function: eval_num_suffix_ll + + Transforms a number with a suffix to real number. Suffix can + be k|K for kilo, m|M for mega, etc. +*/ + +static longlong eval_num_suffix_ll(char *argument, + int *error, char *option_name) +{ + char *endchar; + longlong num; + DBUG_ENTER("eval_num_suffix_ll"); + + + *error= 0; + errno= 0; + num= strtoll(argument, &endchar, 10); + if (errno == ERANGE) + { + my_getopt_error_reporter(ERROR_LEVEL, + "Incorrect integer value: '%s'", argument); + *error= 1; + DBUG_RETURN(0); + } + num*= eval_num_suffix(endchar, error); + if (*error) + fprintf(stderr, + "Unknown suffix '%c' used for variable '%s' (value '%s')\n", + *endchar, option_name, argument); + DBUG_RETURN(num); +} + +/* + function: eval_num_suffix_ull + + Transforms a number with a suffix to positive Integer. Suffix can + be k|K for kilo, m|M for mega, etc. +*/ + +static ulonglong eval_num_suffix_ull(char *argument, + int *error, char *option_name) +{ + char *endchar; + ulonglong num; + DBUG_ENTER("eval_num_suffix_ull"); + + if (*argument == '-') + { + my_getopt_error_reporter(ERROR_LEVEL, + "Incorrect unsigned value: '%s' for %s", + argument, option_name); + *error= 1; + DBUG_RETURN(0); + } + *error= 0; + errno= 0; + num= strtoull(argument, &endchar, 10); + if (errno == ERANGE) + { + my_getopt_error_reporter(ERROR_LEVEL, + "Incorrect integer value: '%s' for %s", + argument, option_name); + *error= 1; + DBUG_RETURN(0); + } + num*= eval_num_suffix(endchar, error); + if (*error) + my_getopt_error_reporter(ERROR_LEVEL, + "Unknown suffix '%c' used for variable '%s' (value '%s')", + *endchar, option_name, argument); + DBUG_RETURN(num); +} + + +/* + function: getopt_ll + + Evaluates and returns the value that user gave as an argument + to a variable. Recognizes (case insensitive) K as KILO, M as MEGA + and G as GIGA bytes. Some values must be in certain blocks, as + defined in the given my_option struct, this function will check + that those values are honored. + In case of an error, set error value in *err. +*/ + +static longlong getopt_ll(char *arg, const struct my_option *optp, int *err) +{ + longlong num=eval_num_suffix_ll(arg, err, (char*) optp->name); + return getopt_ll_limit_value(num, optp, NULL); +} + +/* + function: getopt_ll_limit_value + + Applies min/max/block_size to a numeric value of an option. + Returns "fixed" value. +*/ + +longlong getopt_ll_limit_value(longlong num, const struct my_option *optp, + my_bool *fix) +{ + longlong old= num; + my_bool adjusted= FALSE; + char buf1[255], buf2[255]; + ulonglong block_size= (optp->block_size ? (ulonglong) optp->block_size : 1L); + DBUG_ENTER("getopt_ll_limit_value"); + + if (num > 0 && ((ulonglong) num > (ulonglong) optp->max_value) && + optp->max_value) /* if max value is not set -> no upper limit */ + { + num= (ulonglong) optp->max_value; + adjusted= TRUE; + } + + switch ((optp->var_type & GET_TYPE_MASK)) { + case GET_INT: + if (num > (longlong) INT_MAX) + { + num= ((longlong) INT_MAX); + adjusted= TRUE; + } + break; + case GET_LONG: +#if SIZEOF_LONG < SIZEOF_LONG_LONG + if (num > (longlong) LONG_MAX) + { + num= ((longlong) LONG_MAX); + adjusted= TRUE; + } +#endif + break; + default: + DBUG_ASSERT((optp->var_type & GET_TYPE_MASK) == GET_LL); + break; + } + + num= (num / block_size); + num= (longlong) (num * block_size); + + if (num < optp->min_value) + { + num= optp->min_value; + if (old < optp->min_value) + adjusted= TRUE; + } + + if (fix) + *fix= old != num; + else if (adjusted) + my_getopt_error_reporter(WARNING_LEVEL, + "option '%s': signed value %s adjusted to %s", + optp->name, llstr(old, buf1), llstr(num, buf2)); + DBUG_RETURN(num); +} + +/* + function: getopt_ull + + This is the same as getopt_ll, but is meant for unsigned long long + values. +*/ + +static ulonglong getopt_ull(char *arg, const struct my_option *optp, int *err) +{ + ulonglong num= eval_num_suffix_ull(arg, err, (char*) optp->name); + return getopt_ull_limit_value(num, optp, NULL); +} + + +ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp, + my_bool *fix) +{ + my_bool adjusted= FALSE; + ulonglong old= num; + char buf1[255], buf2[255]; + DBUG_ENTER("getopt_ull_limit_value"); + + if ((ulonglong) num > (ulonglong) optp->max_value && + optp->max_value) /* if max value is not set -> no upper limit */ + { + num= (ulonglong) optp->max_value; + adjusted= TRUE; + } + + switch ((optp->var_type & GET_TYPE_MASK)) { + case GET_UINT: + if (num > (ulonglong) UINT_MAX) + { + num= ((ulonglong) UINT_MAX); + adjusted= TRUE; + } + break; + case GET_ULONG: +#if SIZEOF_LONG < SIZEOF_LONG_LONG + if (num > (ulonglong) ULONG_MAX) + { + num= ((ulonglong) ULONG_MAX); + adjusted= TRUE; + } +#endif + break; + default: + DBUG_ASSERT((optp->var_type & GET_TYPE_MASK) == GET_ULL); + break; + } + + if (optp->block_size > 1) + { + num/= (ulonglong) optp->block_size; + num*= (ulonglong) optp->block_size; + } + + if (num < (ulonglong) optp->min_value) + { + num= (ulonglong) optp->min_value; + if (old < (ulonglong) optp->min_value) + adjusted= TRUE; + } + + if (fix) + *fix= old != num; + else if (adjusted) + my_getopt_error_reporter(WARNING_LEVEL, + "option '%s': unsigned value %s adjusted to %s", + optp->name, ullstr(old, buf1), ullstr(num, buf2)); + + DBUG_RETURN(num); +} + +double getopt_double_limit_value(double num, const struct my_option *optp, + my_bool *fix) +{ + my_bool adjusted= FALSE; + double old= num; + double min, max; + DBUG_ENTER("getopt_double_limit_value"); + + max= getopt_ulonglong2double(optp->max_value); + min= getopt_ulonglong2double(optp->min_value); + if (max && num > max) + { + num= max; + adjusted= TRUE; + } + if (num < min) + { + num= min; + adjusted= TRUE; + } + if (fix) + *fix= adjusted; + else if (adjusted) + my_getopt_error_reporter(WARNING_LEVEL, + "option '%s': value %g adjusted to %g", + optp->name, old, num); + DBUG_RETURN(num); +} + +/* + Get double value withing ranges + + Evaluates and returns the value that user gave as an argument to a variable. + + RETURN + decimal value of arg + + In case of an error, prints an error message and sets *err to + EXIT_ARGUMENT_INVALID. Otherwise err is not touched +*/ + +static double getopt_double(char *arg, const struct my_option *optp, int *err) +{ + double num; + int error; + char *end= arg + 1000; /* Big enough as *arg is \0 terminated */ + num= my_strtod(arg, &end, &error); + if (end[0] != 0 || error) + { + my_getopt_error_reporter(ERROR_LEVEL, + "Invalid decimal value for option '%s'\n", optp->name); + *err= EXIT_ARGUMENT_INVALID; + return 0.0; + } + return getopt_double_limit_value(num, optp, NULL); +} + +/* + Init one value to it's default values + + SYNOPSIS + init_one_value() + option Option to initialize + value Pointer to variable +*/ + +static void init_one_value(const struct my_option *option, void *variable, + longlong value) +{ + DBUG_ENTER("init_one_value"); + switch ((option->var_type & GET_TYPE_MASK)) { + case GET_BOOL: + *((my_bool*) variable)= (my_bool) value; + break; + case GET_INT: + *((int*) variable)= (int) getopt_ll_limit_value((int) value, option, NULL); + break; + case GET_ENUM: + *((ulong*) variable)= (ulong) value; + break; + case GET_UINT: + *((uint*) variable)= (uint) getopt_ull_limit_value((uint) value, option, NULL); + break; + case GET_LONG: + *((long*) variable)= (long) getopt_ll_limit_value((long) value, option, NULL); + break; + case GET_ULONG: + *((ulong*) variable)= (ulong) getopt_ull_limit_value((ulong) value, option, NULL); + break; + case GET_LL: + *((longlong*) variable)= (longlong) getopt_ll_limit_value((longlong) value, option, NULL); + break; + case GET_ULL: + *((ulonglong*) variable)= (ulonglong) getopt_ull_limit_value((ulonglong) value, option, NULL); + break; + case GET_SET: + case GET_FLAGSET: + *((ulonglong*) variable)= (ulonglong) value; + break; + case GET_BIT: + { + ulonglong bit= (option->block_size >= 0 ? + option->block_size : + -option->block_size); + if (option->block_size < 0) + value= !value; + if (value) + (*(ulonglong*)variable)|= bit; + else + (*(ulonglong*)variable)&= ~bit; + break; + } + case GET_DOUBLE: + *((double*) variable)= getopt_ulonglong2double(value); + break; + case GET_STR: + /* + Do not clear variable value if it has no default value. + The default value may already be set. + NOTE: To avoid compiler warnings, we first cast longlong to intptr, + so that the value has the same size as a pointer. + */ + if ((char*) (intptr) value) + *((char**) variable)= (char*) (intptr) value; + break; + case GET_STR_ALLOC: + /* + Do not clear variable value if it has no default value. + The default value may already be set. + NOTE: To avoid compiler warnings, we first cast longlong to intptr, + so that the value has the same size as a pointer. + */ + if ((char*) (intptr) value) + { + char **pstr= (char **) variable; + my_free(*pstr); + *pstr= my_strdup(key_memory_defaults, (char*) (intptr) value, MYF(MY_WME)); + } + break; + default: /* dummy default to avoid compiler warnings */ + break; + } + DBUG_VOID_RETURN; +} + + +/* + Init one value to it's default values + + SYNOPSIS + init_one_value() + option Option to initialize + value Pointer to variable +*/ + +static void fini_one_value(const struct my_option *option, void *variable, + longlong value __attribute__ ((unused))) +{ + DBUG_ENTER("fini_one_value"); + switch ((option->var_type & GET_TYPE_MASK)) { + case GET_STR_ALLOC: + my_free(*((char**) variable)); + *((char**) variable)= NULL; + break; + default: /* dummy default to avoid compiler warnings */ + break; + } + DBUG_VOID_RETURN; +} + + +void my_cleanup_options(const struct my_option *options) +{ + DBUG_ENTER("my_cleanup_options"); + init_variables(options, fini_one_value); + DBUG_VOID_RETURN; +} + + +/* + initialize all variables to their default values + + SYNOPSIS + init_variables() + options Array of options + func_init_one_value Call this function to init the variable + + NOTES + We will initialize the value that is pointed to by options->value. + If the value is of type GET_ASK_ADDR, we will ask for the address + for a value and initialize. +*/ + +static void init_variables(const struct my_option *options, + init_func_p func_init_one_value) +{ + DBUG_ENTER("init_variables"); + for (; options->name; options++) + { + void *value; + DBUG_PRINT("options", ("name: '%s'", options->name)); + /* + We must set u_max_value first as for some variables + options->u_max_value == options->value and in this case we want to + set the value to default value. + */ + if (options->u_max_value) + func_init_one_value(options, options->u_max_value, options->max_value); + value= options->var_type & GET_ASK_ADDR ? + (*my_getopt_get_addr)("", 0, options, 0) : options->value; + if (value) + func_init_one_value(options, value, options->def_value); + } + DBUG_VOID_RETURN; +} + +/** Prints variable or option name, replacing _ with - */ +static uint print_name(const struct my_option *optp) +{ + const char *s= optp->name; + + for (;*s;s++) + putchar(*s == '_' ? '-' : *s); + return (uint)(s - optp->name); +} + +/** prints option comment with indentation and wrapping. + + The comment column starts at startpos, and has width of width + Current cursor position is curpos, returns new cursor position + + @note can print one character beyond width! +*/ +static uint print_comment(const char *comment, + int curpos, int startpos, int width) +{ + const char *end= strend(comment); + int endpos= startpos + width; + + for (; curpos < startpos; curpos++) + putchar(' '); + + if (*comment == '.' || *comment == ',') + { + putchar(*comment); + comment++; + curpos++; + } + + while (end - comment > endpos - curpos) + { + const char *line_end; + for (line_end= comment + endpos - curpos; + line_end > comment && *line_end != ' '; + line_end--); + for (; comment < line_end; comment++) + putchar(*comment); + while (*comment == ' ') + comment++; /* skip the space, as a newline will take it's place now */ + putchar('\n'); + for (curpos= 0; curpos < startpos; curpos++) + putchar(' '); + } + printf("%s", comment); + return curpos + (int)(end - comment); +} + + +/* + function: my_print_options + + Print help for all options and variables. +*/ + +void my_print_help(const struct my_option *options) +{ + uint col, name_space= 22, comment_space= 57; + const struct my_option *optp; + DBUG_ENTER("my_print_help"); + + for (optp= options; optp->name; optp++) + { + const char *typelib_help= 0; + if (!optp->comment) + continue; + if (optp->id && optp->id < 256) + { + printf(" -%c%s", optp->id, strlen(optp->name) ? ", " : " "); + col= 6; + } + else + { + printf(" "); + col= 2; + } + if (strlen(optp->name)) + { + printf("--"); + col+= 2 + print_name(optp); + if (optp->arg_type == NO_ARG || + (optp->var_type & GET_TYPE_MASK) == GET_BOOL || + (optp->var_type & GET_TYPE_MASK) == GET_BIT) + { + putchar(' '); + col++; + } + else if ((optp->var_type & GET_TYPE_MASK) == GET_STR || + (optp->var_type & GET_TYPE_MASK) == GET_STR_ALLOC || + (optp->var_type & GET_TYPE_MASK) == GET_ENUM || + (optp->var_type & GET_TYPE_MASK) == GET_SET || + (optp->var_type & GET_TYPE_MASK) == GET_FLAGSET ) + { + printf("%s=name%s ", optp->arg_type == OPT_ARG ? "[" : "", + optp->arg_type == OPT_ARG ? "]" : ""); + col+= (optp->arg_type == OPT_ARG) ? 8 : 6; + } + else + { + printf("%s=#%s ", optp->arg_type == OPT_ARG ? "[" : "", + optp->arg_type == OPT_ARG ? "]" : ""); + col+= (optp->arg_type == OPT_ARG) ? 5 : 3; + } + } + if (optp->comment && *optp->comment) + { + uint count; + + if (col > name_space) + { + putchar('\n'); + col= 0; + } + + col= print_comment(optp->comment, col, name_space, comment_space); + if (optp->var_type & GET_AUTO) + { + col= print_comment(" (Automatically configured unless set explicitly)", + col, name_space, comment_space); + } + + switch (optp->var_type & GET_TYPE_MASK) { + case GET_ENUM: + typelib_help= ". One of: "; + count= optp->typelib->count; + break; + case GET_SET: + typelib_help= ". Any combination of: "; + count= optp->typelib->count; + break; + case GET_FLAGSET: + typelib_help= ". Takes a comma-separated list of option=value pairs, " + "where value is on, off, or default, and options are: "; + count= optp->typelib->count - 1; + break; + } + if (typelib_help && + strstr(optp->comment, optp->typelib->type_names[0]) == NULL) + { + uint i; + col= print_comment(typelib_help, col, name_space, comment_space); + col= print_comment(optp->typelib->type_names[0], col, name_space, comment_space); + for (i= 1; i < count; i++) + { + col= print_comment(", ", col, name_space, comment_space); + col= print_comment(optp->typelib->type_names[i], col, name_space, comment_space); + } + } + } + putchar('\n'); + if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL || + (optp->var_type & GET_TYPE_MASK) == GET_BIT) + { + if (optp->def_value != 0) + { + printf("%*s(Defaults to on; use --skip-", name_space, ""); + print_name(optp); + printf(" to disable.)\n"); + } + } + else if ((optp->var_type & GET_TYPE_MASK) == GET_SET) + printf(" Use 'ALL' to set all combinations.\n"); + } + DBUG_VOID_RETURN; +} + + +/* + function: my_print_options + + Print variables. +*/ + +void my_print_variables(const struct my_option *options) +{ + uint name_space= 34, length, nr; + ulonglong llvalue; + char buff[255]; + const struct my_option *optp; + DBUG_ENTER("my_print_variables"); + + for (optp= options; optp->name; optp++) + { + length= (uint)strlen(optp->name)+1; + if (length > name_space) + name_space= length; + } + + printf("\nVariables (--variable-name=value)\n"); + printf("%-*s%s", name_space, "and boolean options {FALSE|TRUE}", + "Value (after reading options)\n"); + for (length=1; length < 75; length++) + putchar(length == name_space ? ' ' : '-'); + putchar('\n'); + + for (optp= options; optp->name; optp++) + { + void *value= optp->var_type & GET_ASK_ADDR ? + (*my_getopt_get_addr)("", 0, optp, 0) : optp->value; + if (value) + { + length= print_name(optp); + for (; length < name_space; length++) + putchar(' '); + switch ((optp->var_type & GET_TYPE_MASK)) { + case GET_SET: + if (!(llvalue= *(ulonglong*) value)) + printf("%s\n", ""); + else + for (nr= 0; llvalue && nr < optp->typelib->count; nr++, llvalue >>=1) + { + if (llvalue & 1) + printf( llvalue > 1 ? "%s," : "%s\n", get_type(optp->typelib, nr)); + } + break; + case GET_FLAGSET: + llvalue= *(ulonglong*) value; + for (nr= 0; llvalue && nr < optp->typelib->count; nr++, llvalue >>=1) + { + printf("%s%s=", (nr ? "," : ""), get_type(optp->typelib, nr)); + printf(llvalue & 1 ? "on" : "off"); + } + printf("\n"); + break; + case GET_ENUM: + printf("%s\n", get_type(optp->typelib, *(ulong*) value)); + break; + case GET_STR: + case GET_STR_ALLOC: /* fall through */ + printf("%s\n", *((char**) value) ? *((char**) value) : + "(No default value)"); + break; + case GET_BOOL: + printf("%s\n", *((my_bool*) value) ? "TRUE" : "FALSE"); + break; + case GET_BIT: + { + ulonglong bit= (optp->block_size >= 0 ? + optp->block_size : + -optp->block_size); + my_bool reverse= optp->block_size < 0; + printf("%s\n", ((*((ulonglong*) value) & bit) != 0) ^ reverse ? + "TRUE" : "FALSE"); + break; + } + case GET_INT: + printf("%d\n", *((int*) value)); + break; + case GET_UINT: + printf("%u\n", *((uint*) value)); + break; + case GET_LONG: + printf("%ld\n", *((long*) value)); + break; + case GET_ULONG: + printf("%lu\n", *((ulong*) value)); + break; + case GET_LL: + printf("%s\n", llstr(*((longlong*) value), buff)); + break; + case GET_ULL: + longlong10_to_str(*((ulonglong*) value), buff, 10); + printf("%s\n", buff); + break; + case GET_DOUBLE: + printf("%g\n", *(double*) value); + break; + case GET_NO_ARG: + printf("(No default value)\n"); + break; + default: + printf("(Disabled)\n"); + break; + } + } + } + DBUG_VOID_RETURN; +} diff --git a/mysys/my_getpagesize.c b/mysys/my_getpagesize.c new file mode 100644 index 00000000..62d077cc --- /dev/null +++ b/mysys/my_getpagesize.c @@ -0,0 +1,41 @@ +/* Copyright (c) 2000-2003, 2006 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" + +#ifndef HAVE_GETPAGESIZE + +#if defined _WIN32 + +int my_getpagesize(void) +{ + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +} + +#else + +/* Default implementation */ +int my_getpagesize(void) +{ + return (int)8192; +} + +#endif + +#endif + diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c new file mode 100644 index 00000000..09360078 --- /dev/null +++ b/mysys/my_getsystime.c @@ -0,0 +1,146 @@ +/* Copyright (c) 2004, 2011, Oracle and/or its affiliates. + Copyright (c) 2009-2011 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "mysys_priv.h" +#include "my_static.h" + +#ifdef _WIN32 +#define OFFSET_TO_EPOC 116444736000000000LL +static ulonglong query_performance_frequency=1; +#endif + +#ifdef HAVE_LINUX_UNISTD_H +#include +#endif + +/* For CYGWIN */ +#if !defined(CLOCK_THREAD_CPUTIME_ID) && defined(CLOCK_THREAD_CPUTIME) +#define CLOCK_THREAD_CPUTIME_ID CLOCK_THREAD_CPUTIME +#endif + +/* + return number of nanoseconds since unspecified (but always the same) + point in the past + + NOTE: + Thus to get the current time we should use the system function + with the highest possible resolution + + The value is not anchored to any specific point in time (e.g. epoch) nor + is it subject to resetting or drifting by way of adjtime() or settimeofday(), + and thus it is *NOT* appropriate for getting the current timestamp. It can be + used for calculating time intervals, though. +*/ + +ulonglong my_interval_timer() +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC, &tp); + return tp.tv_sec*1000000000ULL+tp.tv_nsec; +#elif defined(HAVE_GETHRTIME) + return gethrtime(); +#elif defined(_WIN32) + DBUG_ASSERT(query_performance_frequency); + LARGE_INTEGER t_cnt; + QueryPerformanceCounter(&t_cnt); + return (t_cnt.QuadPart / query_performance_frequency * 1000000000ULL) + + ((t_cnt.QuadPart % query_performance_frequency) * 1000000000ULL / + query_performance_frequency); +#else + /* TODO: check for other possibilities for hi-res timestamping */ + struct timeval tv; + gettimeofday(&tv,NULL); + return tv.tv_sec*1000000000ULL+tv.tv_usec*1000ULL; +#endif +} + + +/* Return current time in HRTIME_RESOLUTION (microseconds) since epoch */ + +my_hrtime_t my_hrtime() +{ + my_hrtime_t hrtime; +#if defined(_WIN32) + ulonglong newtime; + GetSystemTimePreciseAsFileTime((FILETIME*)&newtime); + hrtime.val= (newtime - OFFSET_TO_EPOC)/10; +#elif defined(HAVE_CLOCK_GETTIME) + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + hrtime.val= tp.tv_sec*1000000ULL+tp.tv_nsec/1000ULL; +#else + struct timeval t; + /* The following loop is here because gettimeofday may fail */ + while (gettimeofday(&t, NULL) != 0) {} + hrtime.val= t.tv_sec*1000000ULL + t.tv_usec; +#endif + DBUG_EXECUTE_IF("system_time_plus_one_hour", hrtime.val += 3600*1000000ULL;); + DBUG_EXECUTE_IF("system_time_minus_one_hour", hrtime.val -= 3600*1000000ULL;); + return hrtime; +} + +#ifdef _WIN32 + +/* + Low accuracy, "coarse" timer. + Has lower latency than my_hrtime(). Used in situations, where microsecond + precision is not needed, e.g in Windows pthread_cond_timedwait, where POSIX + interface needs nanoseconds, yet the underlying Windows function only + accepts millisecons. +*/ +my_hrtime_t my_hrtime_coarse() +{ + my_hrtime_t hrtime; + ulonglong t; + GetSystemTimeAsFileTime((FILETIME*)&t); + hrtime.val= (t - OFFSET_TO_EPOC)/10; + return hrtime; +} + +#endif + +void my_time_init() +{ +#ifdef _WIN32 + compile_time_assert(sizeof(LARGE_INTEGER) == + sizeof(query_performance_frequency)); + QueryPerformanceFrequency((LARGE_INTEGER *)&query_performance_frequency); + DBUG_ASSERT(query_performance_frequency); +#endif +} + + +/* + Return cpu time in 1/10th on a microsecond (1e-7 s) +*/ + +ulonglong my_getcputime() +{ +#ifdef CLOCK_THREAD_CPUTIME_ID + struct timespec tp; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)) + return 0; + return (ulonglong)tp.tv_sec*10000000+(ulonglong)tp.tv_nsec/100; +#elif defined(__NR_clock_gettime) + struct timespec tp; + if (syscall(__NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tp)) + return 0; + return (ulonglong)tp.tv_sec*10000000+(ulonglong)tp.tv_nsec/100; +#endif /* CLOCK_THREAD_CPUTIME_ID */ + return 0; +} diff --git a/mysys/my_getwd.c b/mysys/my_getwd.c new file mode 100644 index 00000000..d74c263d --- /dev/null +++ b/mysys/my_getwd.c @@ -0,0 +1,168 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* my_setwd() and my_getwd() works with intern_filenames !! */ + +#include "mysys_priv.h" +#include +#include "mysys_err.h" +#ifdef HAVE_GETWD +#include +#endif +#if defined(_WIN32) +#include +#include +#include +#endif + +/* Gets current working directory in buff. + + SYNPOSIS + my_getwd() + buf Buffer to store result. Can be curr_dir[]. + size Size of buffer + MyFlags Flags + + NOTES + Directory is always ended with FN_LIBCHAR + + RESULT + 0 ok + # error +*/ + +int my_getwd(char * buf, size_t size, myf MyFlags) +{ + char * pos; + DBUG_ENTER("my_getwd"); + DBUG_PRINT("my",("buf:%p size: %u MyFlags %lu", + buf, (uint) size, MyFlags)); + + if (size < 1) + DBUG_RETURN(-1); + + if (curr_dir[0]) /* Current pos is saved here */ + (void) strmake(buf,&curr_dir[0],size-1); + else + { +#if defined(HAVE_GETCWD) + if (size < 2) + DBUG_RETURN(-1); + if (!getcwd(buf,(uint) (size-2)) && MyFlags & MY_WME) + { + my_errno=errno; + my_error(EE_GETWD,MYF(ME_BELL),errno); + DBUG_RETURN(-1); + } +#elif defined(HAVE_GETWD) + { + char pathname[MAXPATHLEN]; + getwd(pathname); + strmake(buf,pathname,size-1); + } +#else +#error "No way to get current directory" +#endif + if (*((pos=strend(buf))-1) != FN_LIBCHAR) /* End with FN_LIBCHAR */ + { + pos[0]= FN_LIBCHAR; + pos[1]=0; + } + (void) strmake(&curr_dir[0],buf, (size_t) (FN_REFLEN-1)); + } + DBUG_RETURN(0); +} /* my_getwd */ + + +/* Set new working directory */ + +int my_setwd(const char *dir, myf MyFlags) +{ + int res; + size_t length; + char *start, *pos; + DBUG_ENTER("my_setwd"); + DBUG_PRINT("my",("dir: '%s' MyFlags %lu", dir, MyFlags)); + + start=(char *) dir; + if (! dir[0] || (dir[0] == FN_LIBCHAR && dir[1] == 0)) + dir=FN_ROOTDIR; + if ((res=chdir((char*) dir)) != 0) + { + my_errno=errno; + if (MyFlags & MY_WME) + my_error(EE_SETWD,MYF(ME_BELL),start,errno); + } + else + { + if (test_if_hard_path(start)) + { /* Hard pathname */ + pos= strmake(&curr_dir[0],start,(size_t) FN_REFLEN-1); + if (pos[-1] != FN_LIBCHAR) + { + length=(uint) (pos-(char*) curr_dir); + curr_dir[length]=FN_LIBCHAR; /* must end with '/' */ + curr_dir[length+1]='\0'; + } + } + else + curr_dir[0]='\0'; /* Don't save name */ + } + DBUG_RETURN(res); +} /* my_setwd */ + + + + /* Test if hard pathname */ + /* Returns 1 if dirname is a hard path */ + +int test_if_hard_path(register const char *dir_name) +{ + if (dir_name[0] == FN_HOMELIB && dir_name[1] == FN_LIBCHAR) + return (home_dir != NullS && test_if_hard_path(home_dir)); + if (dir_name[0] == FN_LIBCHAR) + return (TRUE); +#ifdef FN_DEVCHAR + return (strchr(dir_name,FN_DEVCHAR) != 0); +#else + return FALSE; +#endif +} /* test_if_hard_path */ + + +/* + Test if a name contains an (absolute or relative) path. + + SYNOPSIS + has_path() + name The name to test. + + RETURN + TRUE name contains a path. + FALSE name does not contain a path. +*/ + +my_bool has_path(const char *name) +{ + return MY_TEST(strchr(name, FN_LIBCHAR)) +#if FN_LIBCHAR != '/' + || MY_TEST(strchr(name, '/')) +#endif +#ifdef FN_DEVCHAR + || MY_TEST(strchr(name, FN_DEVCHAR)) +#endif + ; +} diff --git a/mysys/my_init.c b/mysys/my_init.c new file mode 100644 index 00000000..44488e58 --- /dev/null +++ b/mysys/my_init.c @@ -0,0 +1,554 @@ +/* + Copyright (c) 2000, 2012, Oracle and/or its affiliates + Copyright (c) 2009, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "my_static.h" +#include "mysys_err.h" +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef _WIN32 +#ifdef _MSC_VER +#include +#include +/* WSAStartup needs winsock library*/ +#pragma comment(lib, "ws2_32") +#endif +static void my_win_init(void); +static my_bool win32_init_tcp_ip(); +static void setup_codepages(); +#else +#define my_win_init() +#endif + +#if defined(_SC_PAGE_SIZE) && !defined(_SC_PAGESIZE) +#define _SC_PAGESIZE _SC_PAGE_SIZE +#endif + +extern pthread_key(struct st_my_thread_var*, THR_KEY_mysys); + +#define SCALE_SEC 100 +#define SCALE_USEC 10000 + +my_bool my_init_done= 0; +uint mysys_usage_id= 0; /* Incremented for each my_init() */ +size_t my_system_page_size= 8192; /* Default if no sysconf() */ + +ulonglong my_thread_stack_size= (sizeof(void*) <= 4)? 65536: ((256-16)*1024); + +static ulong atoi_octal(const char *str) +{ + long int tmp; + while (*str && my_isspace(&my_charset_latin1, *str)) + str++; + str2int(str, + (*str == '0' ? 8 : 10), /* Octalt or decimalt */ + 0, INT_MAX, &tmp); + return (ulong) tmp; +} + +MYSQL_FILE *mysql_stdin= NULL; +static MYSQL_FILE instrumented_stdin; + +#ifdef _WIN32 +static UINT orig_console_cp, orig_console_output_cp; + +static void reset_console_cp(void) +{ + /* + We try not to call SetConsoleCP unnecessarily, to workaround a bug on + older Windows 10 (1803), which could switch truetype console fonts to + raster, eventhough SetConsoleCP would be a no-op (switch from UTF8 to UTF8). + */ + if (GetConsoleCP() != orig_console_cp) + SetConsoleCP(orig_console_cp); + if (GetConsoleOutputCP() != orig_console_output_cp) + SetConsoleOutputCP(orig_console_output_cp); +} + +/* + The below fixes discrepancies in console output and + command line parameter encoding. command line is in + ANSI codepage, output to console by default is in OEM, but + we like them to be in the same encoding. + + We do this only if current codepage is UTF8, i.e when we + know we're on Windows that can handle UTF8 well. +*/ +static void setup_codepages() +{ + UINT acp; + BOOL is_a_tty= fileno(stdout) >= 0 && isatty(fileno(stdout)); + + if (is_a_tty) + { + /* + Save console codepages, in case we change them, + to restore them on exit. + */ + orig_console_cp= GetConsoleCP(); + orig_console_output_cp= GetConsoleOutputCP(); + if (orig_console_cp && orig_console_output_cp) + atexit(reset_console_cp); + } + + if ((acp= GetACP()) != CP_UTF8) + return; + + /* + Use setlocale to make mbstowcs/mkdir/getcwd behave, see + https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale + */ + setlocale(LC_ALL, "en_US.UTF8"); + + if (is_a_tty && (orig_console_cp != acp || orig_console_output_cp != acp)) + { + /* + If ANSI codepage is UTF8, we actually want to switch console + to it as well. + */ + SetConsoleCP(acp); + SetConsoleOutputCP(acp); + } +} +#endif + +/** + Initialize my_sys functions, resources and variables + + @return Initialization result + @retval 0 Success + @retval 1 Error. Couldn't initialize environment +*/ +my_bool my_init(void) +{ + char *str; + + if (my_init_done) + return 0; + + my_init_done= 1; + + mysys_usage_id++; + my_umask= 0660; /* Default umask for new files */ + my_umask_dir= 0700; /* Default umask for new directories */ + my_global_flags= 0; +#ifdef _SC_PAGESIZE + my_system_page_size= sysconf(_SC_PAGESIZE); +#endif + + /* Default creation of new files */ + if ((str= getenv("UMASK")) != 0) + my_umask= (int) (atoi_octal(str) | 0600); + /* Default creation of new dir's */ + if ((str= getenv("UMASK_DIR")) != 0) + my_umask_dir= (int) (atoi_octal(str) | 0700); + + init_glob_errs(); + + instrumented_stdin.m_file= stdin; + instrumented_stdin.m_psi= NULL; /* not yet instrumented */ + mysql_stdin= & instrumented_stdin; + + my_progname_short= "unknown"; + if (my_progname) + my_progname_short= my_progname + dirname_length(my_progname); + + /* Initialize our mutex handling */ + my_mutex_init(); + + if (my_thread_global_init()) + return 1; + +#if defined(SAFEMALLOC) && !defined(DBUG_OFF) + dbug_sanity= sf_sanity; +#endif + + /* $HOME is needed early to parse configuration files located in ~/ */ + if ((home_dir= getenv("HOME")) != 0) + home_dir= intern_filename(home_dir_buff, home_dir); + + { + DBUG_ENTER("my_init"); + DBUG_PROCESS((char*) (my_progname ? my_progname : "unknown")); + my_time_init(); + my_win_init(); + DBUG_PRINT("exit", ("home: '%s'", home_dir)); +#ifdef _WIN32 + if (win32_init_tcp_ip()) + DBUG_RETURN(1); +#endif +#ifdef CHECK_UNLIKELY + init_my_likely(); +#endif + DBUG_RETURN(0); + } +} /* my_init */ + + + /* End my_sys */ + +void my_end(int infoflag) +{ + /* + this code is suboptimal to workaround a bug in + Sun CC: Sun C++ 5.6 2004/06/02 for x86, and should not be + optimized until this compiler is not in use anymore + */ + FILE *info_file= DBUG_FILE; + my_bool print_info= (info_file != stderr); + + if (!my_init_done) + return; + + /* + We do not use DBUG_ENTER here, as after cleanup DBUG is no longer + operational, so we cannot use DBUG_RETURN. + */ + DBUG_PRINT("info",("Shutting down: infoflag: %d print_info: %d", + infoflag, print_info)); + if (!info_file) + { + info_file= stderr; + print_info= 0; + } + + if ((infoflag & MY_CHECK_ERROR) || print_info) + { /* Test if some file is left open */ + char ebuff[512]; + uint i, open_files, open_streams; + + for (open_streams= open_files= i= 0 ; i < my_file_limit ; i++) + { + if (my_file_info[i].type == UNOPEN) + continue; + if (my_file_info[i].type == STREAM_BY_FOPEN || + my_file_info[i].type == STREAM_BY_FDOPEN) + open_streams++; + else + open_files++; + +#ifdef EXTRA_DEBUG + fprintf(stderr, EE(EE_FILE_NOT_CLOSED), my_file_info[i].name, i); + fputc('\n', stderr); +#endif + } + if (open_files || open_streams) + { + my_snprintf(ebuff, sizeof(ebuff), EE(EE_OPEN_WARNING), + open_files, open_streams); + my_message_stderr(EE_OPEN_WARNING, ebuff, ME_BELL); + DBUG_PRINT("error", ("%s", ebuff)); + } + +#ifdef CHECK_UNLIKELY + end_my_likely(info_file); +#endif + } + free_charsets(); + my_error_unregister_all(); + my_once_free(); + + if ((infoflag & MY_GIVE_INFO) || print_info) + { +#ifdef HAVE_GETRUSAGE + struct rusage rus; +#ifdef HAVE_valgrind + /* Purify assumes that rus is uninitialized after getrusage call */ + bzero((char*) &rus, sizeof(rus)); +#endif + if (!getrusage(RUSAGE_SELF, &rus)) + fprintf(info_file,"\n\ +User time %.2f, System time %.2f\n\ +Maximum resident set size %ld, Integral resident set size %ld\n\ +Non-physical pagefaults %ld, Physical pagefaults %ld, Swaps %ld\n\ +Blocks in %ld out %ld, Messages in %ld out %ld, Signals %ld\n\ +Voluntary context switches %ld, Involuntary context switches %ld\n", + (rus.ru_utime.tv_sec * SCALE_SEC + + rus.ru_utime.tv_usec / SCALE_USEC) / 100.0, + (rus.ru_stime.tv_sec * SCALE_SEC + + rus.ru_stime.tv_usec / SCALE_USEC) / 100.0, + rus.ru_maxrss, rus.ru_idrss, + rus.ru_minflt, rus.ru_majflt, + rus.ru_nswap, rus.ru_inblock, rus.ru_oublock, + rus.ru_msgsnd, rus.ru_msgrcv, rus.ru_nsignals, + rus.ru_nvcsw, rus.ru_nivcsw); +#endif +#if defined(_MSC_VER) + _CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE ); + _CrtSetReportFile( _CRT_WARN, _CRTDBG_FILE_STDERR ); + _CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE ); + _CrtSetReportFile( _CRT_ERROR, _CRTDBG_FILE_STDERR ); + _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE ); + _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR ); + _CrtCheckMemory(); +#endif + } + + my_thread_end(); + my_thread_global_end(); + + if (!(infoflag & MY_DONT_FREE_DBUG)) + DBUG_END(); /* Must be done as late as possible */ + + my_mutex_end(); +#if defined(SAFE_MUTEX) + /* + Check on destroying of mutexes. A few may be left that will get cleaned + up by C++ destructors + */ + safe_mutex_end((infoflag & (MY_GIVE_INFO | MY_CHECK_ERROR)) ? stderr : + (FILE *) 0); +#endif /* defined(SAFE_MUTEX) */ + +#ifdef _WIN32 + WSACleanup(); +#endif + + /* At very last, delete mysys key, it is used everywhere including DBUG */ + pthread_key_delete(THR_KEY_mysys); + my_init_done= my_thr_key_mysys_exists= 0; +} /* my_end */ + +#ifdef DBUG_ASSERT_EXISTS +/* Dummy tag function for debugging */ + +void my_debug_put_break_here(void) +{ +} +#endif + +#ifdef _WIN32 + + +/* + my_parameter_handler + + Invalid parameter handler we will use instead of the one "baked" + into the CRT. +*/ + +void my_parameter_handler(const wchar_t * expression, const wchar_t * function, + const wchar_t * file, unsigned int line, + uintptr_t pReserved) +{ + __debugbreak(); +} + + +#ifdef __MSVC_RUNTIME_CHECKS +#include + +/* Turn off runtime checks for 'handle_rtc_failure' */ +#pragma runtime_checks("", off) + +/* + handle_rtc_failure + Catch the RTC error and dump it to stderr +*/ + +int handle_rtc_failure(int err_type, const char *file, int line, + const char* module, const char *format, ...) +{ + va_list args; + va_start(args, format); + fprintf(stderr, "Error:"); + vfprintf(stderr, format, args); + fprintf(stderr, " At %s:%d\n", file, line); + va_end(args); + (void) fflush(stderr); + __debugbreak(); + + return 0; /* Error is handled */ +} +#pragma runtime_checks("", restore) +#endif + + +static void my_win_init(void) +{ + DBUG_ENTER("my_win_init"); + +#if defined(_MSC_VER) + _set_invalid_parameter_handler(my_parameter_handler); +#endif + +#ifdef __MSVC_RUNTIME_CHECKS + /* + Install handler to send RTC (Runtime Error Check) warnings + to log file + */ + _RTC_SetErrorFunc(handle_rtc_failure); +#endif + + _tzset(); + + /* Disable automatic LF->CRLF translation. */ + FILE* stdf[]= {stdin, stdout, stderr}; + for (int i= 0; i < array_elements(stdf); i++) + { + int fd= fileno(stdf[i]); + if (fd >= 0) + (void) _setmode(fd, O_BINARY); + } + _set_fmode(O_BINARY); + setup_codepages(); + DBUG_VOID_RETURN; +} + + +static my_bool win32_init_tcp_ip() +{ + WORD wVersionRequested = MAKEWORD( 2, 2 ); + WSADATA wsaData; + if (WSAStartup(wVersionRequested, &wsaData)) + { + fprintf(stderr, "WSAStartup() failed with error: %d\n", WSAGetLastError()); + return 1; + } + return(0); +} +#endif /* _WIN32 */ + +PSI_stage_info stage_waiting_for_table_level_lock= +{0, "Waiting for table level lock", 0}; + +#ifdef HAVE_PSI_INTERFACE +#if !defined(HAVE_PREAD) && !defined(_WIN32) +PSI_mutex_key key_my_file_info_mutex; +#endif /* !defined(HAVE_PREAD) && !defined(_WIN32) */ + +#if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) +PSI_mutex_key key_LOCK_localtime_r; +#endif /* !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) */ + +PSI_mutex_key key_BITMAP_mutex, key_IO_CACHE_append_buffer_lock, + key_IO_CACHE_SHARE_mutex, key_KEY_CACHE_cache_lock, + key_LOCK_alarm, key_LOCK_timer, + key_my_thread_var_mutex, key_THR_LOCK_charset, key_THR_LOCK_heap, + key_THR_LOCK_lock, key_THR_LOCK_malloc, + key_THR_LOCK_mutex, key_THR_LOCK_myisam, key_THR_LOCK_net, + key_THR_LOCK_open, key_THR_LOCK_threads, + key_TMPDIR_mutex, key_THR_LOCK_myisam_mmap, key_LOCK_uuid_generator; + +static PSI_mutex_info all_mysys_mutexes[]= +{ +#if !defined(HAVE_PREAD) && !defined(_WIN32) + { &key_my_file_info_mutex, "st_my_file_info:mutex", 0}, +#endif /* !defined(HAVE_PREAD) && !defined(_WIN32) */ +#if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) + { &key_LOCK_localtime_r, "LOCK_localtime_r", PSI_FLAG_GLOBAL}, +#endif /* !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) */ + { &key_BITMAP_mutex, "BITMAP::mutex", 0}, + { &key_IO_CACHE_append_buffer_lock, "IO_CACHE::append_buffer_lock", 0}, + { &key_IO_CACHE_SHARE_mutex, "IO_CACHE::SHARE_mutex", 0}, + { &key_KEY_CACHE_cache_lock, "KEY_CACHE::cache_lock", 0}, + { &key_LOCK_alarm, "LOCK_alarm", PSI_FLAG_GLOBAL}, + { &key_LOCK_timer, "LOCK_timer", PSI_FLAG_GLOBAL}, + { &key_my_thread_var_mutex, "my_thread_var::mutex", 0}, + { &key_THR_LOCK_charset, "THR_LOCK_charset", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_heap, "THR_LOCK_heap", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_lock, "THR_LOCK_lock", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_malloc, "THR_LOCK_malloc", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_mutex, "THR_LOCK::mutex", 0}, + { &key_THR_LOCK_myisam, "THR_LOCK_myisam", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_net, "THR_LOCK_net", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_open, "THR_LOCK_open", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_threads, "THR_LOCK_threads", PSI_FLAG_GLOBAL}, + { &key_TMPDIR_mutex, "TMPDIR_mutex", PSI_FLAG_GLOBAL}, + { &key_THR_LOCK_myisam_mmap, "THR_LOCK_myisam_mmap", PSI_FLAG_GLOBAL}, + { &key_LOCK_uuid_generator, "LOCK_uuid_generator", PSI_FLAG_GLOBAL } +}; + +PSI_cond_key key_COND_alarm, key_COND_timer, key_IO_CACHE_SHARE_cond, + key_IO_CACHE_SHARE_cond_writer, key_my_thread_var_suspend, + key_THR_COND_threads, key_WT_RESOURCE_cond; + +static PSI_cond_info all_mysys_conds[]= +{ + { &key_COND_alarm, "COND_alarm", PSI_FLAG_GLOBAL}, + { &key_COND_timer, "COND_timer", PSI_FLAG_GLOBAL}, + { &key_IO_CACHE_SHARE_cond, "IO_CACHE_SHARE::cond", 0}, + { &key_IO_CACHE_SHARE_cond_writer, "IO_CACHE_SHARE::cond_writer", 0}, + { &key_my_thread_var_suspend, "my_thread_var::suspend", 0}, + { &key_THR_COND_threads, "THR_COND_threads", PSI_FLAG_GLOBAL}, + { &key_WT_RESOURCE_cond, "WT_RESOURCE::cond", 0} +}; + +PSI_rwlock_key key_SAFEHASH_mutex; + +static PSI_rwlock_info all_mysys_rwlocks[]= +{ + { &key_SAFEHASH_mutex, "SAFE_HASH::mutex", 0} +}; + +#ifdef USE_ALARM_THREAD +PSI_thread_key key_thread_alarm; +#endif +PSI_thread_key key_thread_timer; + +static PSI_thread_info all_mysys_threads[]= +{ +#ifdef USE_ALARM_THREAD + { &key_thread_alarm, "alarm", PSI_FLAG_GLOBAL}, +#endif + { &key_thread_timer, "statement_timer", PSI_FLAG_GLOBAL} +}; + + +PSI_file_key key_file_charset, key_file_cnf; + +static PSI_file_info all_mysys_files[]= +{ + { &key_file_charset, "charset", 0}, + { &key_file_cnf, "cnf", 0} +}; + +PSI_stage_info *all_mysys_stages[]= +{ + & stage_waiting_for_table_level_lock +}; + +void my_init_mysys_psi_keys() +{ + const char* category= "mysys"; + int count; + + count= sizeof(all_mysys_mutexes)/sizeof(all_mysys_mutexes[0]); + mysql_mutex_register(category, all_mysys_mutexes, count); + + count= sizeof(all_mysys_conds)/sizeof(all_mysys_conds[0]); + mysql_cond_register(category, all_mysys_conds, count); + + count= sizeof(all_mysys_rwlocks)/sizeof(all_mysys_rwlocks[0]); + mysql_rwlock_register(category, all_mysys_rwlocks, count); + + count= sizeof(all_mysys_threads)/sizeof(all_mysys_threads[0]); + mysql_thread_register(category, all_mysys_threads, count); + + count= sizeof(all_mysys_files)/sizeof(all_mysys_files[0]); + mysql_file_register(category, all_mysys_files, count); + + count= array_elements(all_mysys_stages); + mysql_stage_register(category, all_mysys_stages, count); +} +#endif /* HAVE_PSI_INTERFACE */ + diff --git a/mysys/my_largepage.c b/mysys/my_largepage.c new file mode 100644 index 00000000..71527a9b --- /dev/null +++ b/mysys/my_largepage.c @@ -0,0 +1,488 @@ +/* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2019, 2020 IBM. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +#ifdef __linux__ +#include +#endif +#if defined(__linux__) || defined(MAP_ALIGNED) +#include "my_bit.h" +#endif +#ifdef HAVE_LINUX_MMAN_H +#include +#endif + +#ifdef HAVE_SOLARIS_LARGE_PAGES +#if defined(__sun__) && defined(__GNUC__) && defined(__cplusplus) \ + && defined(_XOPEN_SOURCE) +/* memcntl exist within sys/mman.h, but under-defines what is need to use it */ +extern int memcntl(caddr_t, size_t, int, caddr_t, int, int); +#endif /* __sun__ ... */ +#endif /* HAVE_SOLARIS_LARGE_PAGES */ + +#if defined(_WIN32) +static size_t my_large_page_size; +#define HAVE_LARGE_PAGES +#elif defined(HAVE_MMAP) +#define HAVE_LARGE_PAGES +#endif + +#ifdef HAVE_LARGE_PAGES +static my_bool my_use_large_pages= 0; +#else +#define my_use_large_pages 0 +#endif + +#if defined(HAVE_GETPAGESIZES) || defined(__linux__) +/* Descending sort */ + +static int size_t_cmp(const void *a, const void *b) +{ + const size_t ia= *(const size_t *) a; + const size_t ib= *(const size_t *) b; + if (ib > ia) + { + return 1; + } + else if (ib < ia) + { + return -1; + } + return 0; +} +#endif /* defined(HAVE_GETPAGESIZES) || defined(__linux__) */ + + +#if defined(__linux__) || defined(HAVE_GETPAGESIZES) +#define my_large_page_sizes_length 8 +static size_t my_large_page_sizes[my_large_page_sizes_length]; +#endif + +/** + Linux-specific function to determine the sizes of large pages +*/ +#ifdef __linux__ +static inline my_bool my_is_2pow(size_t n) { return !((n) & ((n) - 1)); } + +static void my_get_large_page_sizes(size_t sizes[my_large_page_sizes_length]) +{ + DIR *dirp; + struct dirent *r; + int i= 0; + DBUG_ENTER("my_get_large_page_sizes"); + + dirp= opendir("/sys/kernel/mm/hugepages"); + if (dirp == NULL) + { + my_error(EE_DIR, MYF(ME_BELL), "/sys/kernel/mm/hugepages", errno); + } + else + { + while (i < my_large_page_sizes_length && (r= readdir(dirp))) + { + if (strncmp("hugepages-", r->d_name, 10) == 0) + { + sizes[i]= strtoull(r->d_name + 10, NULL, 10) * 1024ULL; + if (!my_is_2pow(sizes[i])) + { + my_printf_error(0, + "non-power of 2 large page size (%zu) found," + " skipping", MYF(ME_NOTE | ME_ERROR_LOG_ONLY), + sizes[i]); + sizes[i]= 0; + continue; + } + ++i; + } + } + if (closedir(dirp)) + { + my_error(EE_BADCLOSE, MYF(ME_BELL), "/sys/kernel/mm/hugepages", errno); + } + qsort(sizes, i, sizeof(size_t), size_t_cmp); + } + DBUG_VOID_RETURN; +} + + +#elif defined(HAVE_GETPAGESIZES) +static void my_get_large_page_sizes(size_t sizes[my_large_page_sizes_length]) +{ + int nelem; + + nelem= getpagesizes(NULL, 0); + + assert(nelem <= my_large_page_sizes_length); + getpagesizes(sizes, my_large_page_sizes_length); + qsort(sizes, nelem, sizeof(size_t), size_t_cmp); + if (nelem < my_large_page_sizes_length) + { + sizes[nelem]= 0; + } +} + + +#elif defined(_WIN32) +#define my_large_page_sizes_length 0 +#define my_get_large_page_sizes(A) do {} while(0) + +#else +#define my_large_page_sizes_length 1 +static size_t my_large_page_sizes[my_large_page_sizes_length]; +static void my_get_large_page_sizes(size_t sizes[]) +{ + sizes[0]= my_getpagesize(); +} +#endif + + +/** + Returns the next large page size smaller or equal to the passed in size. + + The search starts at my_large_page_sizes[*start]. + + Assumes my_get_large_page_sizes(my_large_page_sizes) has been called before + use. + + For first use, have *start=0. There is no need to increment *start. + + @param[in] sz size to be searched for. + @param[in,out] start ptr to int representing offset in my_large_page_sizes to + start from. + *start is updated during search and can be used to search again if 0 isn't + returned. + + @returns the next size found. *start will be incremented to the next potential + size. + @retval a large page size that is valid on this system or 0 if no large page + size possible. +*/ +#if defined(HAVE_MMAP) && !defined(_WIN32) +static size_t my_next_large_page_size(size_t sz, int *start) +{ + DBUG_ENTER("my_next_large_page_size"); + + while (*start < my_large_page_sizes_length && my_large_page_sizes[*start] > 0) + { + size_t cur= *start; + (*start)++; + if (my_large_page_sizes[cur] <= sz) + { + DBUG_RETURN(my_large_page_sizes[cur]); + } + } + DBUG_RETURN(0); +} +#endif /* defined(MMAP) || !defined(_WIN32) */ + + +int my_init_large_pages(my_bool super_large_pages) +{ +#ifdef _WIN32 + if (!my_obtain_privilege(SE_LOCK_MEMORY_NAME)) + { + my_printf_error(EE_PERM_LOCK_MEMORY, + "Lock Pages in memory access rights required for use with" + " large-pages, see https://mariadb.com/kb/en/library/" + "mariadb-memory-allocation/#huge-pages", MYF(MY_WME)); + } + my_large_page_size= GetLargePageMinimum(); +#endif + + my_use_large_pages= 1; + my_get_large_page_sizes(my_large_page_sizes); + +#ifndef HAVE_LARGE_PAGES + my_printf_error(EE_OUTOFMEMORY, "No large page support on this platform", + MYF(MY_WME)); +#endif + +#ifdef HAVE_SOLARIS_LARGE_PAGES + /* + tell the kernel that we want to use 4/256MB page for heap storage + and also for the stack. We use 4 MByte as default and if the + super-large-page is set we increase it to 256 MByte. 256 MByte + is for server installations with GBytes of RAM memory where + the MySQL Server will have page caches and other memory regions + measured in a number of GBytes. + We use as big pages as possible which isn't bigger than the above + desired page sizes. + */ + int nelem= 0; + size_t max_desired_page_size= (super_large_pages ? 256 : 4) * 1024 * 1024; + size_t max_page_size= my_next_large_page_size(max_desired_page_size, &nelem); + + if (max_page_size > 0) + { + struct memcntl_mha mpss; + + mpss.mha_cmd= MHA_MAPSIZE_BSSBRK; + mpss.mha_pagesize= max_page_size; + mpss.mha_flags= 0; + if (memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t) &mpss, 0, 0)) + { + my_error(EE_MEMCNTL, MYF(ME_WARNING | ME_ERROR_LOG_ONLY), "MC_HAT_ADVISE", + "MHA_MAPSIZE_BSSBRK"); + } + mpss.mha_cmd= MHA_MAPSIZE_STACK; + if (memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t) &mpss, 0, 0)) + { + my_error(EE_MEMCNTL, MYF(ME_WARNING | ME_ERROR_LOG_ONLY), "MC_HAT_ADVISE", + "MHA_MAPSIZE_STACK"); + } + } +#endif /* HAVE_SOLARIS_LARGE_PAGES */ + return 0; +} + + +/** + Large page size helper. + This rounds down, if needed, the size parameter to the largest + multiple of an available large page size on the system. +*/ +void my_large_page_truncate(size_t *size) +{ + if (my_use_large_pages) + { + size_t large_page_size= 0; +#ifdef _WIN32 + large_page_size= my_large_page_size; +#elif defined(HAVE_MMAP) + int page_i= 0; + large_page_size= my_next_large_page_size(*size, &page_i); +#endif + if (large_page_size > 0) + *size-= *size % large_page_size; + } +} + + +#if defined(HAVE_MMAP) && !defined(_WIN32) +/* Solaris for example has only MAP_ANON, FreeBSD has MAP_ANONYMOUS and +MAP_ANON but MAP_ANONYMOUS is marked "for compatibility" */ +#if defined(MAP_ANONYMOUS) +#define OS_MAP_ANON MAP_ANONYMOUS +#elif defined(MAP_ANON) +#define OS_MAP_ANON MAP_ANON +#else +#error unsupported mmap - no MAP_ANON{YMOUS} +#endif +#endif /* HAVE_MMAP && !_WIN32 */ + +/** + General large pages allocator. + Tries to allocate memory from large pages pool and falls back to + my_malloc_lock() in case of failure. + Every implementation returns a zero filled buffer here. +*/ +uchar *my_large_malloc(size_t *size, myf my_flags) +{ + uchar *ptr= NULL; + +#ifdef _WIN32 + DWORD alloc_type= MEM_COMMIT | MEM_RESERVE; + size_t orig_size= *size; + DBUG_ENTER("my_large_malloc"); + + if (my_use_large_pages) + { + alloc_type|= MEM_LARGE_PAGES; + /* Align block size to my_large_page_size */ + *size= MY_ALIGN(*size, (size_t) my_large_page_size); + } + ptr= VirtualAlloc(NULL, *size, alloc_type, PAGE_READWRITE); + if (!ptr) + { + if (my_flags & MY_WME) + { + if (my_use_large_pages) + { + my_printf_error(EE_OUTOFMEMORY, + "Couldn't allocate %zu bytes (MEM_LARGE_PAGES page " + "size %zu); Windows error %lu", + MYF(ME_WARNING | ME_ERROR_LOG_ONLY), *size, + my_large_page_size, GetLastError()); + } + else + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_ERROR_LOG), *size); + } + } + if (my_use_large_pages) + { + *size= orig_size; + ptr= VirtualAlloc(NULL, *size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!ptr && my_flags & MY_WME) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_ERROR_LOG), *size); + } + } + } +#elif defined(HAVE_MMAP) + int mapflag; + int page_i= 0; + size_t large_page_size= 0; + size_t aligned_size= *size; + DBUG_ENTER("my_large_malloc"); + + while (1) + { + mapflag= MAP_PRIVATE | OS_MAP_ANON; + if (my_use_large_pages) + { + large_page_size= my_next_large_page_size(*size, &page_i); + /* this might be 0, in which case we do a standard mmap */ + if (large_page_size) + { +#if defined(MAP_HUGETLB) /* linux 2.6.32 */ + mapflag|= MAP_HUGETLB; +#if defined(MAP_HUGE_SHIFT) /* Linux-3.8+ */ + mapflag|= my_bit_log2_size_t(large_page_size) << MAP_HUGE_SHIFT; +#else +# warning "No explicit large page (HUGETLB pages) support in Linux < 3.8" +#endif +#elif defined(MAP_ALIGNED) + mapflag|= MAP_ALIGNED(my_bit_log2_size_t(large_page_size)); +#if defined(MAP_ALIGNED_SUPER) + mapflag|= MAP_ALIGNED_SUPER; +#endif +#endif + aligned_size= MY_ALIGN(*size, (size_t) large_page_size); + } + else + { + aligned_size= *size; + } + } + ptr= mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, mapflag, -1, 0); + if (ptr == (void*) -1) + { + ptr= NULL; + if (my_flags & MY_WME) + { + if (large_page_size && errno == ENOMEM) + { + my_printf_error(EE_OUTOFMEMORY, + "Couldn't allocate %zu bytes (Large/HugeTLB memory " + "page size %zu); errno %u; continuing to smaller size", + MYF(ME_WARNING | ME_ERROR_LOG_ONLY), + aligned_size, large_page_size, errno); + } + else + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_ERROR_LOG), aligned_size); + } + } + /* try next smaller memory size */ + if (large_page_size && errno == ENOMEM) + continue; + + /* other errors are more serious */ + break; + } + else /* success */ + { + if (large_page_size) + { + /* + we do need to record the adjustment so that munmap gets called with + the right size. This is only the case for HUGETLB pages. + */ + *size= aligned_size; + } + break; + } + if (large_page_size == 0) + { + break; /* no more options to try */ + } + } +#else + DBUG_RETURN(my_malloc_lock(*size, my_flags)); +#endif /* defined(HAVE_MMAP) */ + + if (ptr != NULL) + { + MEM_MAKE_DEFINED(ptr, *size); + update_malloc_size(*size, 0); + } + + DBUG_RETURN(ptr); +} + + +/** + General large pages deallocator. + Tries to deallocate memory as if it was from large pages pool and falls back + to my_free_lock() in case of failure +*/ +void my_large_free(void *ptr, size_t size) +{ + DBUG_ENTER("my_large_free"); + + /* + The following implementations can only fail if ptr was not allocated with + my_large_malloc(), i.e. my_malloc_lock() was used so we should free it + with my_free_lock() + + For ASAN, we need to explicitly unpoison this memory region because the OS + may reuse that memory for some TLS or stack variable. It will remain + poisoned if it was explicitly poisioned before release. If this happens, + we'll have hard to debug false positives like in MDEV-21239. + For valgrind, we mark it as UNDEFINED rather than NOACCESS because of the + implict reuse possiblility. + */ +#if defined(HAVE_MMAP) && !defined(_WIN32) + if (munmap(ptr, size)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno); + } +#if !__has_feature(memory_sanitizer) + else + { + MEM_MAKE_ADDRESSABLE(ptr, size); + } +#endif + update_malloc_size(- (longlong) size, 0); +#elif defined(_WIN32) + /* + When RELEASE memory, the size parameter must be 0. + Do not use MEM_RELEASE with MEM_DECOMMIT. + */ + if (ptr) + { + if (!VirtualFree(ptr, 0, MEM_RELEASE)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, + GetLastError()); + } + update_malloc_size(- (longlong) size, 0); + } +#if !__has_feature(memory_sanitizer) + else + { + MEM_MAKE_ADDRESSABLE(ptr, size); + } +#endif /* memory_sanitizer */ +#else + my_free_lock(ptr); +#endif /* HAVE_MMMAP */ + + DBUG_VOID_RETURN; +} diff --git a/mysys/my_lib.c b/mysys/my_lib.c new file mode 100644 index 00000000..fb03f0aa --- /dev/null +++ b/mysys/my_lib.c @@ -0,0 +1,381 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2008, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. +*/ + +/* TODO: check for overrun of memory for names. */ + +#include "mysys_priv.h" +#include +#include /* Structs used by my_dir,includes sys/types */ +#include "mysys_err.h" +#if defined(HAVE_DIRENT_H) +# include +#else +# define dirent direct +# if defined(HAVE_SYS_NDIR_H) +# include +# endif +# if defined(HAVE_SYS_DIR_H) +# include +# endif +# if defined(HAVE_NDIR_H) +# include +# endif +# if defined(_WIN32) +# ifdef __BORLANDC__ +# include +# endif +# endif +#endif + +#if defined(HAVE_READDIR_R) +#define READDIR(A,B,C) ((errno=readdir_r(A,B,&C)) != 0 || !C) +#else +#define READDIR(A,B,C) (!(C=readdir(A))) +#endif + +/* + We are assuming that directory we are reading is either has less than + 100 files and so can be read in one initial chunk or has more than 1000 + files and so big increment are suitable. +*/ +#define ENTRIES_START_SIZE (8192/sizeof(FILEINFO)) +#define ENTRIES_INCREMENT (65536/sizeof(FILEINFO)) +#define NAMES_START_SIZE 32768 + + +static int comp_names(struct fileinfo *a,struct fileinfo *b); + +typedef struct { + MY_DIR dir; + DYNAMIC_ARRAY array; + MEM_ROOT root; +} MY_DIR_HANDLE; + +/* We need this because the caller doesn't know which malloc we've used */ + +void my_dirend(MY_DIR *dir) +{ + MY_DIR_HANDLE *dirh= (MY_DIR_HANDLE*) dir; + DBUG_ENTER("my_dirend"); + if (dirh) + { + delete_dynamic(&dirh->array); + free_root(&dirh->root, MYF(0)); + my_free(dirh); + } + DBUG_VOID_RETURN; +} /* my_dirend */ + + + /* Compare in sort of filenames */ + +static int comp_names(struct fileinfo *a, struct fileinfo *b) +{ + return (strcmp(a->name,b->name)); +} /* comp_names */ + + +#if !defined(_WIN32) + +static char *directory_file_name (char * dst, const char *src) +{ + /* Process as Unix format: just remove test the final slash. */ + char *end; + DBUG_ASSERT(strlen(src) < (FN_REFLEN + 1)); + + if (src[0] == 0) + src= (char*) "."; /* Use empty as current */ + end= strnmov(dst, src, FN_REFLEN + 1); + if (end[-1] != FN_LIBCHAR) + { + *end++= FN_LIBCHAR; /* Add last '/' */ + *end='\0'; + } + return end; +} + +MY_DIR *my_dir(const char *path, myf MyFlags) +{ + MY_DIR_HANDLE *dirh; + FILEINFO finfo; + DIR *dirp; + struct dirent *dp; + char tmp_path[FN_REFLEN + 2], *tmp_file; + char dirent_tmp[sizeof(struct dirent)+_POSIX_PATH_MAX+1]; + + DBUG_ENTER("my_dir"); + DBUG_PRINT("my",("path: '%s' MyFlags: %lu",path,MyFlags)); + + tmp_file= directory_file_name(tmp_path, path); + + if (!(dirp= opendir(tmp_path))) + { + my_errno= errno; + goto err_open; + } + + if (!(dirh= my_malloc(key_memory_MY_DIR, sizeof(*dirh), + MYF(MyFlags | MY_ZEROFILL)))) + goto err_alloc; + + if (my_init_dynamic_array(key_memory_MY_DIR, &dirh->array, sizeof(FILEINFO), + ENTRIES_START_SIZE, ENTRIES_INCREMENT, + MYF(MyFlags))) + goto error; + + init_alloc_root(key_memory_MY_DIR, &dirh->root, NAMES_START_SIZE, + NAMES_START_SIZE, MYF(MyFlags)); + + dp= (struct dirent*) dirent_tmp; + + while (!(READDIR(dirp,(struct dirent*) dirent_tmp,dp))) + { + MY_STAT statbuf, *mystat= 0; + + if (dp->d_name[0] == '.' && + (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) + continue; /* . or .. */ + + if (MyFlags & MY_WANT_STAT) + { + mystat= &statbuf; + bzero(mystat, sizeof(*mystat)); + (void) strmov(tmp_file, dp->d_name); + (void) my_stat(tmp_path, mystat, MyFlags); + if (!(mystat->st_mode & MY_S_IREAD)) + continue; + } + + if (!(finfo.name= strdup_root(&dirh->root, dp->d_name))) + goto error; + + if (mystat && + !((mystat= memdup_root(&dirh->root, mystat, sizeof(*mystat))))) + goto error; + + finfo.mystat= mystat; + + if (push_dynamic(&dirh->array, (uchar*)&finfo)) + goto error; + } + + (void) closedir(dirp); + + if (MyFlags & MY_WANT_SORT) + sort_dynamic(&dirh->array, (qsort_cmp) comp_names); + + dirh->dir.dir_entry= dynamic_element(&dirh->array, 0, FILEINFO *); + dirh->dir.number_of_files= dirh->array.elements; + + DBUG_RETURN(&dirh->dir); + +error: + my_dirend(&dirh->dir); +err_alloc: + (void) closedir(dirp); +err_open: + if (MyFlags & (MY_FAE | MY_WME)) + my_error(EE_DIR, MYF(ME_BELL), path, my_errno); + DBUG_RETURN(NULL); +} /* my_dir */ + + +#else + +/* +***************************************************************************** +** Read long filename using windows rutines +***************************************************************************** +*/ + +MY_DIR *my_dir(const char *path, myf MyFlags) +{ + MY_DIR_HANDLE *dirh= 0; + FILEINFO finfo; + struct _finddata_t find; + ushort mode; + char tmp_path[FN_REFLEN], *tmp_file,attrib; +#ifdef _WIN64 + __int64 handle= -1; +#else + long handle= -1; +#endif + DBUG_ENTER("my_dir"); + DBUG_PRINT("my",("path: '%s' MyFlags: %d",path,(int)MyFlags)); + + /* Put LIB-CHAR as last path-character if not there */ + tmp_file=tmp_path; + if (!*path) + *tmp_file++ ='.'; /* From current dir */ + tmp_file= strnmov(tmp_file, path, FN_REFLEN-5); + if (tmp_file[-1] == FN_DEVCHAR) + *tmp_file++= '.'; /* From current dev-dir */ + if (tmp_file[-1] != FN_LIBCHAR) + *tmp_file++ =FN_LIBCHAR; + tmp_file[0]='*'; /* Windows needs this !??? */ + tmp_file[1]='.'; + tmp_file[2]='*'; + tmp_file[3]='\0'; + + if (!(dirh= my_malloc(PSI_INSTRUMENT_ME, sizeof(*dirh), MyFlags | MY_ZEROFILL))) + goto error; + + if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &dirh->array, sizeof(FILEINFO), + ENTRIES_START_SIZE, ENTRIES_INCREMENT, + MYF(MyFlags))) + goto error; + + init_alloc_root(PSI_INSTRUMENT_ME, &dirh->root, NAMES_START_SIZE, NAMES_START_SIZE, + MYF(MyFlags)); + + if ((handle=_findfirst(tmp_path,&find)) == -1L) + { + DBUG_PRINT("info", ("findfirst returned error, errno: %d", errno)); + if (errno != EINVAL) + goto error; + /* + Could not read the directory, no read access. + Probably because by "chmod -r". + continue and return zero files in dir + */ + } + else + { + do + { + attrib= find.attrib; + /* + Do not show hidden and system files which Windows sometimes create. + Note. Because Borland's findfirst() is called with the third + argument = 0 hidden/system files are excluded from the search. + */ + if (attrib & (_A_HIDDEN | _A_SYSTEM)) + continue; + + if (find.name[0] == '.' && + (find.name[1] == '\0' || + (find.name[1] == '.' && find.name[2] == '\0'))) + continue; /* . or .. */ + + if (!(finfo.name= strdup_root(&dirh->root, find.name))) + goto error; + if (MyFlags & MY_WANT_STAT) + { + if (!(finfo.mystat= (MY_STAT*)alloc_root(&dirh->root, sizeof(MY_STAT)))) + goto error; + + bzero(finfo.mystat, sizeof(MY_STAT)); + finfo.mystat->st_size=find.size; + mode= MY_S_IREAD; + if (!(attrib & _A_RDONLY)) + mode|= MY_S_IWRITE; + if (attrib & _A_SUBDIR) + mode|= MY_S_IFDIR; + finfo.mystat->st_mode= mode; + finfo.mystat->st_mtime= ((uint32) find.time_write); + } + else + finfo.mystat= NULL; + + if (push_dynamic(&dirh->array, (uchar*)&finfo)) + goto error; + } + while (_findnext(handle,&find) == 0); + _findclose(handle); + } + + if (MyFlags & MY_WANT_SORT) + sort_dynamic(&dirh->array, (qsort_cmp) comp_names); + + dirh->dir.dir_entry= dynamic_element(&dirh->array, 0, FILEINFO *); + dirh->dir.number_of_files= dirh->array.elements; + + DBUG_PRINT("exit", ("found %d files", dirh->dir.number_of_files)); + DBUG_RETURN(&dirh->dir); +error: + my_errno=errno; + if (handle != -1) + _findclose(handle); + my_dirend(&dirh->dir); + if (MyFlags & (MY_FAE | MY_WME)) + my_error(EE_DIR,MYF(ME_BELL), path, errno); + DBUG_RETURN(NULL); +} /* my_dir */ + +#endif /* _WIN32 */ + +/**************************************************************************** +** File status +** Note that MY_STAT is assumed to be same as struct stat +****************************************************************************/ + + +int my_fstat(File Filedes, MY_STAT *stat_area, + myf MyFlags __attribute__((unused))) +{ + DBUG_ENTER("my_fstat"); + DBUG_PRINT("my",("fd: %d MyFlags: %lu", Filedes, MyFlags)); +#ifdef _WIN32 + DBUG_RETURN(my_win_fstat(Filedes, stat_area)); +#elif defined HAVE_valgrind + { + int s= fstat(Filedes, stat_area); + if (!s) + MSAN_STAT_WORKAROUND(stat_area); + DBUG_RETURN(s); + } +#else + DBUG_RETURN(fstat(Filedes, (struct stat *) stat_area)); +#endif +} + + +MY_STAT *my_stat(const char *path, MY_STAT *stat_area, myf my_flags) +{ + int m_used; + DBUG_ENTER("my_stat"); + DBUG_PRINT("my", ("path: '%s' stat_area: %p MyFlags: %lu", path, + stat_area, my_flags)); + + if ((m_used= (stat_area == NULL))) + if (!(stat_area= (MY_STAT *) my_malloc(key_memory_MY_STAT, sizeof(MY_STAT), + my_flags))) + goto error; +#ifndef _WIN32 + if (!stat((char *) path, (struct stat *) stat_area)) + { + MSAN_STAT_WORKAROUND(stat_area); + DBUG_RETURN(stat_area); + } +#else + if (!my_win_stat(path, stat_area)) + DBUG_RETURN(stat_area); +#endif + DBUG_PRINT("error",("Got errno: %d from stat", errno)); + my_errno= errno; + if (m_used) /* Free if new area */ + my_free(stat_area); + +error: + if (my_flags & (MY_FAE+MY_WME)) + { + my_error(EE_STAT, MYF(ME_BELL), path, my_errno); + DBUG_RETURN((MY_STAT *) NULL); + } + DBUG_RETURN((MY_STAT *) NULL); +} /* my_stat */ diff --git a/mysys/my_libwrap.c b/mysys/my_libwrap.c new file mode 100644 index 00000000..45ab3a7f --- /dev/null +++ b/mysys/my_libwrap.c @@ -0,0 +1,42 @@ +/* Copyright (c) 2003, 2006 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This is needed to be able to compile with original libwrap header + files that don't have the prototypes +*/ + +#include +#include + +#ifdef HAVE_LIBWRAP + +void my_fromhost(struct request_info *req) +{ + fromhost(req); +} + +int my_hosts_access(struct request_info *req) +{ + return hosts_access(req); +} + +char *my_eval_client(struct request_info *req) +{ + return eval_client(req); +} + +#endif /* HAVE_LIBWRAP */ diff --git a/mysys/my_likely.c b/mysys/my_likely.c new file mode 100644 index 00000000..d52074f0 --- /dev/null +++ b/mysys/my_likely.c @@ -0,0 +1,172 @@ +/* Copyright (c) 2018, MariaDB Corporation Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + Checks that my_likely/my_unlikely is correctly used + + Note that we can't use mysql_mutex or my_malloc here as these + uses likely() macros and the likely_mutex would be used twice +*/ + +#include "mysys_priv.h" +#include +#include + +#ifndef CHECK_UNLIKEY +my_bool likely_inited= 0; + +typedef struct st_likely_entry +{ + const char *key; + size_t key_length; + uint line; + ulonglong ok,fail; +} LIKELY_ENTRY; + +static uchar *get_likely_key(LIKELY_ENTRY *part, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= part->key_length; + return (uchar*) part->key; +} + +pthread_mutex_t likely_mutex; +HASH likely_hash; + +void init_my_likely() +{ + /* Allocate big enough to avoid malloc calls */ + my_hash_init2(PSI_NOT_INSTRUMENTED, &likely_hash, 10000, &my_charset_bin, + 1024, 0, 0, (my_hash_get_key) get_likely_key, 0, free, + HASH_UNIQUE); + likely_inited= 1; + pthread_mutex_init(&likely_mutex, MY_MUTEX_INIT_FAST); +} + +static int likely_cmp(LIKELY_ENTRY **a, LIKELY_ENTRY **b) +{ + int cmp; + if ((cmp= strcmp((*a)->key, (*b)->key))) + return cmp; + return (int) ((*a)->line - (*b)->line); +} + + +void end_my_likely(FILE *out) +{ + uint i; + FILE *likely_file; + my_bool do_close= 0; + LIKELY_ENTRY **sort_ptr= 0; + + likely_inited= 0; + + if (!(likely_file= out)) + { + char name[80]; + snprintf(name, sizeof(name), "/tmp/unlikely-%lu.out", (ulong) getpid()); + if ((likely_file= my_fopen(name, O_TRUNC | O_WRONLY, MYF(MY_WME)))) + do_close= 1; + else + likely_file= stderr; + } + fflush(likely_file); + fputs("Wrong likely/unlikely usage:\n", likely_file); + if (!(sort_ptr= (LIKELY_ENTRY**) + malloc(sizeof(LIKELY_ENTRY*) *likely_hash.records))) + { + fprintf(stderr, "ERROR: Out of memory in end_my_likely\n"); + goto err; + } + + for (i=0 ; i < likely_hash.records ; i++) + sort_ptr[i]= (LIKELY_ENTRY *) my_hash_element(&likely_hash, i); + + my_qsort(sort_ptr, likely_hash.records, sizeof(LIKELY_ENTRY*), + (qsort_cmp) likely_cmp); + + for (i=0 ; i < likely_hash.records ; i++) + { + LIKELY_ENTRY *entry= sort_ptr[i]; + if (entry->fail > entry->ok) + fprintf(likely_file, + "%50s line: %6u ok: %8lld fail: %8lld\n", + entry->key, entry->line, entry->ok, entry->fail); + } + fputs("\n", likely_file); + fflush(likely_file); +err: + free((void*) sort_ptr); + if (do_close) + my_fclose(likely_file, MYF(MY_WME)); + pthread_mutex_destroy(&likely_mutex); + my_hash_free(&likely_hash); +} + + +static LIKELY_ENTRY *my_likely_find(const char *file_name, uint line) +{ + char key[80], *pos; + LIKELY_ENTRY *entry; + size_t length; + + if (!likely_inited) + return 0; + + pos= strnmov(key, file_name, sizeof(key)-4); + int3store(pos+1, line); + length= (size_t) (pos-key)+4; + + pthread_mutex_lock(&likely_mutex); + if (!(entry= (LIKELY_ENTRY*) my_hash_search(&likely_hash, (uchar*) key, + length))) + { + if (!(entry= (LIKELY_ENTRY *) malloc(sizeof(*entry) + length))) + return 0; + entry->key= (char*) (entry+1); + memcpy((void*) entry->key, key, length); + entry->key_length= length; + entry->line= line; + entry->ok= entry->fail= 0; + + if (my_hash_insert(&likely_hash, (void*) entry)) + { + pthread_mutex_unlock(&likely_mutex); + free(entry); + return 0; + } + } + pthread_mutex_unlock(&likely_mutex); + return entry; +} + + +int my_likely_ok(const char *file_name, uint line) +{ + LIKELY_ENTRY *entry= my_likely_find(file_name, line); + if (entry) + entry->ok++; + return 0; +} + + +int my_likely_fail(const char *file_name, uint line) +{ + LIKELY_ENTRY *entry= my_likely_find(file_name, line); + if (entry) + entry->fail++; + return 0; +} +#endif /* CHECK_UNLIKEY */ diff --git a/mysys/my_lock.c b/mysys/my_lock.c new file mode 100644 index 00000000..7597436f --- /dev/null +++ b/mysys/my_lock.c @@ -0,0 +1,228 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#undef MY_HOW_OFTEN_TO_ALARM +#define MY_HOW_OFTEN_TO_ALARM ((int) my_time_to_wait_for_lock) +#ifdef NO_ALARM_LOOP +#undef NO_ALARM_LOOP +#endif +#include + +#ifdef _WIN32 +#define WIN_LOCK_INFINITE -1 +#define WIN_LOCK_SLEEP_MILLIS 100 + +static int win_lock(File fd, int locktype, my_off_t start, my_off_t length, + int timeout_sec) +{ + LARGE_INTEGER liOffset,liLength; + DWORD dwFlags; + OVERLAPPED ov= {0}; + HANDLE hFile= (HANDLE)my_get_osfhandle(fd); + int i; + int timeout_millis= timeout_sec * 1000; + + DBUG_ENTER("win_lock"); + + liOffset.QuadPart= start; + liLength.QuadPart= length; + + ov.Offset= liOffset.LowPart; + ov.OffsetHigh= liOffset.HighPart; + + if (locktype == F_UNLCK) + { + if (UnlockFileEx(hFile, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + /* + For compatibility with fcntl implementation, ignore error, + if region was not locked + */ + if (GetLastError() == ERROR_NOT_LOCKED) + { + SetLastError(0); + DBUG_RETURN(0); + } + goto error; + } + else if (locktype == F_RDLCK) + /* read lock is mapped to a shared lock. */ + dwFlags= 0; + else + /* write lock is mapped to an exclusive lock. */ + dwFlags= LOCKFILE_EXCLUSIVE_LOCK; + + /* + Drop old lock first to avoid double locking. + During analyze of Bug#38133 (Myisamlog test fails on Windows) + I met the situation that the program myisamlog locked the file + exclusively, then additionally shared, then did one unlock, and + then blocked on an attempt to lock it exclusively again. + Unlocking before every lock fixed the problem. + Note that this introduces a race condition. When the application + wants to convert an exclusive lock into a shared one, it will now + first unlock the file and then lock it shared. A waiting exclusive + lock could step in here. For reasons described in Bug#38133 and + Bug#41124 (Server hangs on Windows with --external-locking after + INSERT...SELECT) and in the review thread at + http://lists.mysql.com/commits/60721 it seems to be the better + option than not to unlock here. + If one day someone notices a way how to do file lock type changes + on Windows without unlocking before taking the new lock, please + change this code accordingly to fix the race condition. + */ + if (!UnlockFileEx(hFile, 0, liLength.LowPart, liLength.HighPart, &ov) && + (GetLastError() != ERROR_NOT_LOCKED)) + goto error; + + if (timeout_sec == WIN_LOCK_INFINITE) + { + if (LockFileEx(hFile, dwFlags, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + goto error; + } + + dwFlags|= LOCKFILE_FAIL_IMMEDIATELY; + timeout_millis= timeout_sec * 1000; + /* Try lock in a loop, until the lock is acquired or timeout happens */ + for(i= 0; ;i+= WIN_LOCK_SLEEP_MILLIS) + { + if (LockFileEx(hFile, dwFlags, 0, liLength.LowPart, liLength.HighPart, &ov)) + DBUG_RETURN(0); + + if (GetLastError() != ERROR_LOCK_VIOLATION) + goto error; + + if (i >= timeout_millis) + break; + Sleep(WIN_LOCK_SLEEP_MILLIS); + } + + /* timeout */ + errno= EAGAIN; + DBUG_RETURN(-1); + +error: + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} +#endif + + + +/* + Lock a part of a file + + RETURN VALUE + 0 Success + -1 An error has occurred and 'my_errno' is set + to indicate the actual error code. +*/ + +int my_lock(File fd, int locktype, my_off_t start, my_off_t length, + myf MyFlags) +{ +#ifdef HAVE_FCNTL + int value; + ALARM_VARIABLES; +#endif + + DBUG_ENTER("my_lock"); + DBUG_PRINT("my",("fd: %d Op: %d start: %ld Length: %ld MyFlags: %lu", + fd,locktype,(long) start,(long) length,MyFlags)); + if (my_disable_locking && ! (MyFlags & MY_FORCE_LOCK)) + DBUG_RETURN(0); + +#if defined(_WIN32) + { + int timeout_sec; + if (MyFlags & MY_NO_WAIT) + timeout_sec= 0; + else + timeout_sec= WIN_LOCK_INFINITE; + + if (win_lock(fd, locktype, start, length, timeout_sec) == 0) + DBUG_RETURN(0); + } +#else +#if defined(HAVE_FCNTL) + { + struct flock lock; + + lock.l_type= (short) locktype; + lock.l_whence= SEEK_SET; + lock.l_start= (off_t) start; + lock.l_len= (off_t) length; + + if (MyFlags & (MY_NO_WAIT | MY_SHORT_WAIT)) + { + if (fcntl(fd,F_SETLK,&lock) != -1) /* Check if we can lock */ + DBUG_RETURN(0); /* Ok, file locked */ + if (MyFlags & MY_NO_WAIT) + { + my_errno= (errno == EACCES) ? EAGAIN : errno ? errno : -1; + DBUG_RETURN(-1); + } + + DBUG_PRINT("info",("Was locked, trying with alarm")); + ALARM_INIT; + while ((value=fcntl(fd,F_SETLKW,&lock)) && ! ALARM_TEST && + errno == EINTR) + { /* Setup again so we don`t miss it */ + ALARM_REINIT; + } + ALARM_END; + if (value != -1) + DBUG_RETURN(0); + if (errno == EINTR) + errno=EAGAIN; + } + else if (fcntl(fd,F_SETLKW,&lock) != -1) /* Wait until a lock */ + DBUG_RETURN(0); + } +#else + if (MyFlags & MY_SEEK_NOT_DONE) + { + if (my_seek(fd,start,MY_SEEK_SET,MYF(MyFlags & ~MY_SEEK_NOT_DONE)) + == MY_FILEPOS_ERROR) + { + /* + If an error has occurred in my_seek then we will already + have an error code in my_errno; Just return error code. + */ + DBUG_RETURN(-1); + } + } + if (lockf(fd,locktype,length) != -1) + DBUG_RETURN(0); +#endif /* HAVE_FCNTL */ +#endif /* _WIN32 */ + + /* We got an error. We don't want EACCES errors */ + my_errno=(errno == EACCES) ? EAGAIN : errno ? errno : -1; + + if (MyFlags & MY_WME) + { + if (locktype == F_UNLCK) + my_error(EE_CANTUNLOCK,MYF(ME_BELL),my_errno); + else + my_error(EE_CANTLOCK,MYF(ME_BELL),my_errno); + } + DBUG_PRINT("error",("my_errno: %d (%d)",my_errno,errno)); + DBUG_RETURN(-1); +} /* my_lock */ diff --git a/mysys/my_lockmem.c b/mysys/my_lockmem.c new file mode 100644 index 00000000..45f16bac --- /dev/null +++ b/mysys/my_lockmem.c @@ -0,0 +1,101 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Alloc a block of locked memory (memory protected against swap) */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include "aligned.h" +#include + +#ifdef HAVE_MLOCK + +struct st_mem_list +{ + LIST list; + uchar *page; + uint size; +}; + +LIST *mem_list; + +uchar *my_malloc_lock(size_t size, myf MyFlags) +{ + int success; + uint pagesize= my_system_page_size; + uchar *ptr; + struct st_mem_list *element; + DBUG_ENTER("my_malloc_lock"); + + size=((size-1) & ~(pagesize-1))+pagesize; + if (!(ptr=aligned_malloc(size,pagesize))) + { + if (MyFlags & (MY_FAE+MY_WME)) + my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_FATAL), size); + DBUG_RETURN(0); + } + success = mlock((uchar*) ptr,size); + if (success != 0 && geteuid() == 0) + { + DBUG_PRINT("warning",("Failed to lock memory. errno %d\n", + errno)); + fprintf(stderr, "Warning: Failed to lock memory. errno %d\n", + errno); + } + else + { + /* Add block in a list for munlock */ + if (!(element=(struct st_mem_list*) my_malloc(sizeof(*element),MyFlags))) + { + (void) munlock((uchar*) ptr,size); + free(ptr); + DBUG_RETURN(0); + } + element->list.data=(uchar*) element; + element->page=ptr; + element->size=size; + mysql_mutex_lock(&THR_LOCK_malloc); + mem_list=list_add(mem_list,&element->list); + mysql_mutex_unlock(&THR_LOCK_malloc); + update_malloc_size((longlong) size, 0); + } + DBUG_RETURN(ptr); +} + + +void my_free_lock(uchar *ptr) +{ + LIST *list; + struct st_mem_list *element=0; + + mysql_mutex_lock(&THR_LOCK_malloc); + for (list=mem_list ; list ; list=list->next) + { + element=(struct st_mem_list*) list->data; + if (ptr == element->page) + { /* Found locked mem */ + (void) munlock((uchar*) ptr,element->size); + mem_list=list_delete(mem_list,list); + update_malloc_size(- (longlong) element->size, 0); + break; + } + } + mysql_mutex_unlock(&THR_LOCK_malloc); + my_free(element); + aligned_free(ptr); /* Free even if not locked */ +} + +#endif /* HAVE_MLOCK */ diff --git a/mysys/my_malloc.c b/mysys/my_malloc.c new file mode 100644 index 00000000..5a5ed6c8 --- /dev/null +++ b/mysys/my_malloc.c @@ -0,0 +1,260 @@ +/* + Copyright (c) 2000, 2013, Oracle and/or its affiliates + Copyright (c) 2009, 2014, SkySQL Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include + +struct my_memory_header +{ + PSI_thread *m_owner; + size_t m_size; + PSI_memory_key m_key; +}; +typedef struct my_memory_header my_memory_header; +#define HEADER_SIZE 24 + +#define USER_TO_HEADER(P) ((my_memory_header*)((char *)(P) - HEADER_SIZE)) +#define HEADER_TO_USER(P) ((char*)(P) + HEADER_SIZE) + +/** + Inform application that memory usage has changed + + @param size Size of memory segment allocated or freed + @param flag 1 if thread specific (allocated by MY_THREAD_SPECIFIC), + 0 if system specific. + + The type os size is long long, to be able to handle negative numbers to + decrement the memory usage + + @return 0 - ok + 1 - failure, abort the allocation +*/ + +static void dummy(long long size __attribute__((unused)), + my_bool is_thread_specific __attribute__((unused))) +{} + +MALLOC_SIZE_CB update_malloc_size= dummy; + +void set_malloc_size_cb(MALLOC_SIZE_CB func) +{ + update_malloc_size= func; +} + + +/** + Allocate a sized block of memory. + + @param key Key to register instrumented memory + @param size The size of the memory block in bytes. + @param flags Failure action modifiers (bitmasks). + + @return A pointer to the allocated memory block, or NULL on failure. +*/ +void *my_malloc(PSI_memory_key key, size_t size, myf my_flags) +{ + my_memory_header *mh; + void *point; + DBUG_ENTER("my_malloc"); + DBUG_PRINT("my",("size: %zu flags: %lu", size, my_flags)); + compile_time_assert(sizeof(my_memory_header) <= HEADER_SIZE); + + if (!(my_flags & (MY_WME | MY_FAE))) + my_flags|= my_global_flags; + + /* Safety */ + if (!size) + size=1; + if (size > SIZE_T_MAX - 1024L*1024L*16L) /* Wrong call */ + DBUG_RETURN(0); + + /* We have to align size as we store MY_THREAD_SPECIFIC flag in the LSB */ + size= ALIGN_SIZE(size); + + if (DBUG_IF("simulate_out_of_memory")) + mh= NULL; + else + mh= (my_memory_header*) sf_malloc(size + HEADER_SIZE, my_flags); + + if (mh == NULL) + { + my_errno=errno; + if (my_flags & MY_FAE) + error_handler_hook=fatal_error_handler_hook; + if (my_flags & (MY_FAE+MY_WME)) + my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_ERROR_LOG+ME_FATAL),size); + if (my_flags & MY_FAE) + abort(); + point= NULL; + } + else + { + int flag= MY_TEST(my_flags & MY_THREAD_SPECIFIC); + mh->m_size= size | flag; + mh->m_key= PSI_CALL_memory_alloc(key, size, & mh->m_owner); + if (update_malloc_size) + { + mh->m_size|=2; + update_malloc_size(size + HEADER_SIZE, flag); + } + point= HEADER_TO_USER(mh); + if (my_flags & MY_ZEROFILL) + bzero(point, size); + else + TRASH_ALLOC(point, size); + } + DBUG_PRINT("exit",("ptr: %p", point)); + DBUG_RETURN(point); +} + + +/** + @brief wrapper around realloc() + + @param key key to register instrumented memory + @param old_point pointer to currently allocated area + @param size new size requested, must be >0 + @param my_flags flags + + @note if size==0 realloc() may return NULL; my_realloc() treats this as an + error which is not the intention of realloc() +*/ +void *my_realloc(PSI_memory_key key, void *old_point, size_t size, myf my_flags) +{ + my_memory_header *old_mh, *mh; + void *point; + size_t old_size; + my_bool old_flags; + DBUG_ENTER("my_realloc"); + DBUG_PRINT("my",("ptr: %p size: %zu flags: %lu", old_point, size, my_flags)); + + DBUG_ASSERT(size > 0); + if (!old_point && (my_flags & MY_ALLOW_ZERO_PTR)) + DBUG_RETURN(my_malloc(key, size, my_flags)); + + old_mh= USER_TO_HEADER(old_point); + old_size= old_mh->m_size & ~3; + old_flags= old_mh->m_size & 3; + + DBUG_ASSERT(old_mh->m_key == key || old_mh->m_key == PSI_NOT_INSTRUMENTED); + DBUG_ASSERT((old_flags & 1) == MY_TEST(my_flags & MY_THREAD_SPECIFIC)); + + size= ALIGN_SIZE(size); + mh= sf_realloc(old_mh, size + HEADER_SIZE, my_flags); + + if (mh == NULL) + { + if (size < old_size) + DBUG_RETURN(old_point); + my_errno=errno; + if (my_flags & MY_FREE_ON_ERROR) + { + /* my_free will take care of size accounting */ + my_free(old_point); + old_point= 0; + } + if (my_flags & (MY_FAE+MY_WME)) + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_FATAL), size); + point= NULL; + } + else + { + mh->m_size= size | old_flags; + mh->m_key= PSI_CALL_memory_realloc(key, old_size, size, & mh->m_owner); + if (update_malloc_size && (old_flags & 2)) + update_malloc_size((longlong)size - (longlong)old_size, old_flags & 1); + point= HEADER_TO_USER(mh); + } + + DBUG_PRINT("exit",("ptr: %p", point)); + DBUG_RETURN(point); +} + + +/** + Free memory allocated with my_malloc. + + @param ptr Pointer to the memory allocated by my_malloc. +*/ +void my_free(void *ptr) +{ + my_memory_header *mh; + size_t old_size; + my_bool old_flags; + DBUG_ENTER("my_free"); + DBUG_PRINT("my",("ptr: %p", ptr)); + + if (ptr == NULL) + DBUG_VOID_RETURN; + + mh= USER_TO_HEADER(ptr); + old_size= mh->m_size & ~3; + old_flags= mh->m_size & 3; + PSI_CALL_memory_free(mh->m_key, old_size, mh->m_owner); + + if (update_malloc_size && (old_flags & 2)) + update_malloc_size(- (longlong) old_size - HEADER_SIZE, old_flags & 1); + +#ifndef SAFEMALLOC + /* + Trash memory if not safemalloc. We don't have to do this if safemalloc + is used as safemalloc will also do trashing + */ + TRASH_FREE(ptr, old_size); +#endif + sf_free(mh); + DBUG_VOID_RETURN; +} + + +void *my_memdup(PSI_memory_key key, const void *from, size_t length, myf my_flags) +{ + void *ptr; + DBUG_ENTER("my_memdup"); + + if ((ptr= my_malloc(key, length,my_flags)) != 0) + memcpy(ptr, from, length); + DBUG_RETURN(ptr); +} + + +char *my_strdup(PSI_memory_key key, const char *from, myf my_flags) +{ + char *ptr; + size_t length= strlen(from)+1; + DBUG_ENTER("my_strdup"); + + if ((ptr= (char*) my_malloc(key, length, my_flags))) + memcpy(ptr, from, length); + DBUG_RETURN(ptr); +} + + +char *my_strndup(PSI_memory_key key, const char *from, size_t length, myf my_flags) +{ + char *ptr; + DBUG_ENTER("my_strndup"); + + if ((ptr= (char*) my_malloc(key, length+1, my_flags))) + { + memcpy(ptr, from, length); + ptr[length]= 0; + } + DBUG_RETURN(ptr); +} + diff --git a/mysys/my_memmem.c b/mysys/my_memmem.c new file mode 100644 index 00000000..98b45fed --- /dev/null +++ b/mysys/my_memmem.c @@ -0,0 +1,84 @@ +/* Copyright (c) 2000, 2006, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include + +/* + my_memmem, port of a GNU extension. + + Returns a pointer to the beginning of the substring, needle, or NULL if the + substring is not found in haystack. +*/ + +void *my_memmem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen) +{ + const unsigned char *cursor; + const unsigned char *last_possible_needle_location = + (unsigned char *)haystack + haystacklen - needlelen; + + /* Easy answers */ + if (needlelen > haystacklen) return(NULL); + if (needle == NULL) return(NULL); + if (haystack == NULL) return(NULL); + if (needlelen == 0) return(NULL); + if (haystacklen == 0) return(NULL); + + for (cursor = haystack; cursor <= last_possible_needle_location; cursor++) { + if (memcmp(needle, cursor, needlelen) == 0) { + return((void *) cursor); + } + } + return(NULL); +} + + + +#ifdef MAIN +#include + +int main(int argc, char *argv[]) { + char haystack[10], needle[3]; + + memmove(haystack, "0123456789", 10); + + memmove(needle, "no", 2); + assert(my_memmem(haystack, 10, needle, 2) == NULL); + + memmove(needle, "345", 3); + assert(my_memmem(haystack, 10, needle, 3) != NULL); + + memmove(needle, "789", 3); + assert(my_memmem(haystack, 10, needle, 3) != NULL); + assert(my_memmem(haystack, 9, needle, 3) == NULL); + + memmove(needle, "012", 3); + assert(my_memmem(haystack, 10, needle, 3) != NULL); + assert(my_memmem(NULL, 10, needle, 3) == NULL); + + assert(my_memmem(NULL, 10, needle, 3) == NULL); + assert(my_memmem(haystack, 0, needle, 3) == NULL); + assert(my_memmem(haystack, 10, NULL, 3) == NULL); + assert(my_memmem(haystack, 10, needle, 0) == NULL); + + assert(my_memmem(haystack, 1, needle, 3) == NULL); + + printf("success\n"); + return(0); +} + +#endif diff --git a/mysys/my_mess.c b/mysys/my_mess.c new file mode 100644 index 00000000..c9a1aee6 --- /dev/null +++ b/mysys/my_mess.c @@ -0,0 +1,36 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" + +void my_message_stderr(uint error __attribute__((unused)), + const char *str, myf MyFlags) +{ + DBUG_ENTER("my_message_stderr"); + DBUG_PRINT("enter",("message: %s",str)); + (void) fflush(stdout); + if (MyFlags & (ME_NOTE | ME_ERROR_LOG_ONLY)) + DBUG_VOID_RETURN; + if (MyFlags & ME_BELL) + (void) fputc('\007', stderr); + if (my_progname) + { + (void)fputs(my_progname,stderr); (void)fputs(": ",stderr); + } + (void)fputs(str,stderr); + (void)fputc('\n',stderr); + (void)fflush(stderr); + DBUG_VOID_RETURN; +} diff --git a/mysys/my_minidump.cc b/mysys/my_minidump.cc new file mode 100644 index 00000000..d81aab2f --- /dev/null +++ b/mysys/my_minidump.cc @@ -0,0 +1,115 @@ +/* Copyright (c) 2021, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include + +#define VERBOSE(fmt,...) \ + if (verbose) { fprintf(stderr, "my_create_minidump : " fmt,__VA_ARGS__); } + +extern "C" BOOL my_create_minidump(DWORD pid, BOOL verbose) +{ + HANDLE file = 0; + HANDLE process= 0; + DWORD size= MAX_PATH; + char path[MAX_PATH]; + char working_dir[MAX_PATH]; + char tmpname[MAX_PATH]; + char *filename= 0; + bool ret= FALSE; + process= OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid); + if (!process) + { + VERBOSE("cannot open process pid=%lu to create dump, last error %lu\n", + pid, GetLastError()); + goto exit; + } + + if (QueryFullProcessImageName(process, 0, path, &size) == 0) + { + VERBOSE("cannot read process path for pid %lu, last error %lu\n", + pid, GetLastError()); + goto exit; + } + + filename= strrchr(path, '\\'); + if (filename) + { + filename++; + // We are not interested in dump of some proceses (my_safe_process.exe,cmd.exe) + // since they are only used to start up other programs. + // We're interested however in their children; + const char *exclude_programs[] = {"my_safe_process.exe","cmd.exe", 0}; + for(size_t i=0; exclude_programs[i]; i++) + if (_stricmp(filename, exclude_programs[i]) == 0) + goto exit; + } + else + filename= path; + + // Add .dmp extension + char *p; + if ((p= strrchr(filename, '.')) == 0) + p= filename + strlen(filename); + + strncpy(p, ".dmp", path + MAX_PATH - p); + + // Íf file with this name exist, generate unique name with .dmp extension + if (GetFileAttributes(filename) != INVALID_FILE_ATTRIBUTES) + { + if (!GetTempFileName(".", filename, 0, tmpname)) + { + fprintf(stderr, "GetTempFileName failed, last error %lu", GetLastError()); + goto exit; + } + strncat_s(tmpname, ".dmp", sizeof(tmpname)); + filename= tmpname; + } + + if (!GetCurrentDirectory(MAX_PATH, working_dir)) + { + VERBOSE("GetCurrentDirectory failed, last error %lu", GetLastError()); + goto exit; + } + + file= CreateFile(filename, GENERIC_READ | GENERIC_WRITE, + 0, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0); + + if (file == INVALID_HANDLE_VALUE) + { + VERBOSE("CreateFile() failed for file %s, working dir %s, last error = %lu\n", + filename, working_dir, GetLastError()); + goto exit; + } + + if (!MiniDumpWriteDump(process, pid, file, MiniDumpNormal, 0, 0, 0)) + { + VERBOSE("Failed to write minidump to %s, working dir %s, last error %lu\n", + filename, working_dir, GetLastError()); + goto exit; + } + + VERBOSE("Minidump written to %s, directory %s\n", filename, working_dir); + ret= TRUE; +exit: + if (process != 0 && process != INVALID_HANDLE_VALUE) + CloseHandle(process); + + if (file != 0 && file != INVALID_HANDLE_VALUE) + CloseHandle(file); + return ret; +} diff --git a/mysys/my_mkdir.c b/mysys/my_mkdir.c new file mode 100644 index 00000000..00bcca77 --- /dev/null +++ b/mysys/my_mkdir.c @@ -0,0 +1,48 @@ +/* Copyright (c) 2000, 2001, 2006 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include +#ifdef _WIN32 +#include +#endif + +int my_mkdir(const char *dir, int Flags, myf MyFlags) +{ + DBUG_ENTER("my_dir"); + DBUG_PRINT("enter",("dir: %s",dir)); +#ifdef _WIN32 + LPSECURITY_ATTRIBUTES attr = + my_dir_security_attributes.lpSecurityDescriptor? + &my_dir_security_attributes : NULL; + BOOL ok = CreateDirectory(dir, attr); + if (!ok) + { + my_osmaperr(GetLastError()); +#else + if (mkdir((char*) dir, Flags & my_umask_dir)) + { +#endif + my_errno=errno; + DBUG_PRINT("error",("error %d when creating directory %s",my_errno,dir)); + if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) + my_error(EE_CANT_MKDIR, MYF(ME_BELL), dir, my_errno); + DBUG_RETURN(-1); + } + DBUG_RETURN(0); +} diff --git a/mysys/my_mmap.c b/mysys/my_mmap.c new file mode 100644 index 00000000..e04f4add --- /dev/null +++ b/mysys/my_mmap.c @@ -0,0 +1,105 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" + +#ifdef HAVE_SYS_MMAN_H + +/* + system msync() only syncs mmap'ed area to fs cache. + fsync() is required to really sync to disc +*/ +int my_msync(int fd, void *addr, size_t len, int flags) +{ + msync(addr, len, flags); + return my_sync(fd, MYF(0)); +} + +#elif defined(_WIN32) + +#ifndef FILE_DAX_VOLUME +#define FILE_DAX_VOLUME 0x20000000 +#endif + +static SECURITY_ATTRIBUTES mmap_security_attributes= + {sizeof(SECURITY_ATTRIBUTES), 0, TRUE}; + +void *my_mmap(void *addr, size_t len, int prot, + int flags, File fd, my_off_t offset) +{ + HANDLE hFileMap; + LPVOID ptr; + HANDLE hFile= (HANDLE)my_get_osfhandle(fd); + DBUG_ENTER("my_mmap"); + DBUG_PRINT("mysys", ("map fd: %d", fd)); + + if (hFile == INVALID_HANDLE_VALUE) + DBUG_RETURN(MAP_FAILED); + + hFileMap=CreateFileMapping(hFile, &mmap_security_attributes, + PAGE_READWRITE, 0, (DWORD) len, NULL); + if (hFileMap == 0) + DBUG_RETURN(MAP_FAILED); + + ptr=MapViewOfFile(hFileMap, + prot & PROT_WRITE ? FILE_MAP_WRITE : FILE_MAP_READ, + (DWORD)(offset >> 32), (DWORD)offset, len); + + /* + MSDN explicitly states that it's possible to close File Mapping Object + even when a view is not unmapped - then the object will be held open + implicitly until unmap, as every view stores internally a handler of + a corresponding File Mapping Object + */ + CloseHandle(hFileMap); + + if (flags & MAP_SYNC) + { + DWORD filesystemFlags; + if (!GetVolumeInformationByHandleW(hFile, NULL, 0, NULL, NULL, + &filesystemFlags, NULL, 0) || + !(filesystemFlags & FILE_DAX_VOLUME)) + { + UnmapViewOfFile(ptr); + ptr= NULL; + } + } + + if (ptr) + { + DBUG_PRINT("mysys", ("mapped addr: %p", ptr)); + DBUG_RETURN(ptr); + } + + DBUG_RETURN(MAP_FAILED); +} + +int my_munmap(void *addr, size_t len) +{ + DBUG_ENTER("my_munmap"); + DBUG_PRINT("mysys", ("unmap addr: %p", addr)); + DBUG_RETURN(UnmapViewOfFile(addr) ? 0 : -1); +} + +int my_msync(int fd, void *addr, size_t len, int flags) +{ + return FlushViewOfFile(addr, len) && + FlushFileBuffers(my_get_osfhandle(fd)) ? 0 : -1; +} + +#else +#warning "no mmap!" +#endif + diff --git a/mysys/my_new.cc b/mysys/my_new.cc new file mode 100644 index 00000000..ae92d6a2 --- /dev/null +++ b/mysys/my_new.cc @@ -0,0 +1,117 @@ +/* Copyright (c) 2000, 2001, 2003-2006 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + This is a replacement of new/delete operators to be used when compiling + with gcc 3.0.x to avoid including libstdc++ + + It is also used to make all memory allocations to go through + my_malloc/my_free wrappers (for debugging/safemalloc and accounting) +*/ + +#include "mysys_priv.h" +#include + +/* + We don't yet enable the my new operators by default. + The reasons (for MariaDB) are: + + - There are several global objects in plugins (wsrep_info, InnoDB, + tpool) that allocates data with 'new'. These objects are not + freed properly before exit() is called and safemalloc will report + these as lost memory. The proper fix is to ensure that all plugins + either ensure that all objects frees there data or the global object are + changed to a pointer that as allocated and freed on demand. + Doing this will make it easier to find leaks and also speed up plugin + loads when we don't have to initialize a lot of objects until they + are really needed. + - Rocksdb calls malloc_usable_size, that will crash if used with new based + on my_malloc. One suggested fix would be to not define + ROCKSDB_MALLOC_USABLE_SIZE if MYSYS_USE_NEW is defined. + + When the above is fixed, we can add enable ADD_DEFINITIONS(-DUSE_MYSYS_NEW) + in CMakeLists.txt +*/ + +#if defined(USE_MYSYS_NEW) + +void *operator new (size_t sz) +{ + return (void *) my_malloc(key_memory_new, sz ? sz : 1, MYF(0)); +} + +void *operator new[] (size_t sz) +{ + return (void *) my_malloc(key_memory_new, sz ? sz : 1, MYF(0)); +} + +void* operator new(std::size_t sz, const std::nothrow_t&) throw() +{ + return (void *) my_malloc(key_memory_new, sz ? sz : 1, MYF(0)); +} + +void* operator new[](std::size_t sz, const std::nothrow_t&) throw() +{ + return (void *) my_malloc(key_memory_new, sz ? sz : 1, MYF(0)); +} + +void operator delete (void *ptr, std::size_t) throw () +{ + my_free(ptr); +} + +void operator delete (void *ptr) throw () +{ + my_free(ptr); +} + +void operator delete[] (void *ptr) throw () +{ + my_free(ptr); +} + +void operator delete[] (void *ptr, std::size_t) throw () +{ + my_free(ptr); +} + +void operator delete(void* ptr, const std::nothrow_t&) throw() +{ + my_free(ptr); +} + +void operator delete[](void* ptr, const std::nothrow_t&) throw() +{ + my_free(ptr); +} + +C_MODE_START + +int __cxa_pure_virtual() +{ + assert(! "Aborted: pure virtual method called."); + return 0; +} + +C_MODE_END +#else +/* + Define a dummy symbol, just to avoid compiler/linker warnings + about compiling an essentially empty file. +*/ +int my_new_cc_symbol; +#endif /* USE_MYSYS_NEW */ + diff --git a/mysys/my_once.c b/mysys/my_once.c new file mode 100644 index 00000000..e2bea0a4 --- /dev/null +++ b/mysys/my_once.c @@ -0,0 +1,119 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Not MT-SAFE */ + +#include "mysys_priv.h" +#include "my_static.h" +#include "mysys_err.h" +#include + +/* + Alloc for things we don't nend to free run-time (that only + should be free'd on exit) + + SYNOPSIS + my_once_alloc() + Size + MyFlags + + NOTES + No DBUG_ENTER... here to get smaller dbug-startup +*/ + +void* my_once_alloc(size_t Size, myf MyFlags) +{ + size_t get_size, max_left; + uchar* point; + reg1 USED_MEM *next; + reg2 USED_MEM **prev; + + Size= ALIGN_SIZE(Size); + prev= &my_once_root_block; + max_left=0; + for (next=my_once_root_block ; next && next->left < Size ; next= next->next) + { + if (next->left > max_left) + max_left=next->left; + prev= &next->next; + } + if (! next) + { /* Time to alloc new block */ + get_size= Size+ALIGN_SIZE(sizeof(USED_MEM)); + if (max_left*4 < my_once_extra && get_size < my_once_extra) + get_size=my_once_extra; /* Normal alloc */ + + if ((next = (USED_MEM*) malloc(get_size)) == 0) + { + my_errno=errno; + if (MyFlags & (MY_FAE+MY_WME)) + my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_FATAL), get_size); + return((uchar*) 0); + } + DBUG_PRINT("test",("my_once_malloc %lu byte malloced", (ulong) get_size)); + next->next= 0; + next->size= get_size; + next->left= get_size-ALIGN_SIZE(sizeof(USED_MEM)); + *prev=next; + } + point= (uchar*) ((char*) next+ (next->size-next->left)); + next->left-= Size; + + if (MyFlags & MY_ZEROFILL) + bzero(point, Size); + return((void*) point); +} /* my_once_alloc */ + + +char *my_once_strdup(const char *src,myf myflags) +{ + size_t len= strlen(src)+1; + uchar *dst= my_once_alloc(len, myflags); + if (dst) + memcpy(dst, src, len); + return (char*) dst; +} + + +void *my_once_memdup(const void *src, size_t len, myf myflags) +{ + uchar *dst= my_once_alloc(len, myflags); + if (dst) + memcpy(dst, src, len); + return dst; +} + + +/* + Deallocate everything that was allocated with my_once_alloc + + SYNOPSIS + my_once_free() +*/ + +void my_once_free(void) +{ + reg1 USED_MEM *next,*old; + DBUG_ENTER("my_once_free"); + + for (next=my_once_root_block ; next ; ) + { + old=next; next= next->next ; + free((uchar*) old); + } + my_once_root_block=0; + + DBUG_VOID_RETURN; +} /* my_once_free */ diff --git a/mysys/my_open.c b/mysys/my_open.c new file mode 100644 index 00000000..4d26a7b2 --- /dev/null +++ b/mysys/my_open.c @@ -0,0 +1,158 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include +#include "my_atomic.h" + +CREATE_NOSYMLINK_FUNCTION( + open_nosymlinks(const char *pathname, int flags, int mode), + openat(dfd, filename, O_NOFOLLOW | flags, mode), + open(pathname, O_NOFOLLOW | flags, mode) +); + +/* + Open a file + + SYNOPSIS + my_open() + FileName Fully qualified file name + Flags Read | write + MyFlags Special flags + + RETURN VALUE + File descriptor +*/ + +File my_open(const char *FileName, int Flags, myf MyFlags) + /* Path-name of file */ + /* Read | write .. */ + /* Special flags */ +{ + File fd; + DBUG_ENTER("my_open"); + DBUG_PRINT("my",("Name: '%s' Flags: %d MyFlags: %lu", + FileName, Flags, MyFlags)); + if (!(MyFlags & (MY_WME | MY_FAE | MY_FFNF))) + MyFlags|= my_global_flags; +#if defined(_WIN32) + fd= my_win_open(FileName, Flags); +#else + if (MyFlags & MY_NOSYMLINKS) + fd = open_nosymlinks(FileName, Flags | O_CLOEXEC, my_umask); + else + fd = open(FileName, Flags | O_CLOEXEC, my_umask); +#endif + + fd= my_register_filename(fd, FileName, FILE_BY_OPEN, + EE_FILENOTFOUND, MyFlags); + DBUG_RETURN(fd); +} /* my_open */ + + +/* + Close a file + + SYNOPSIS + my_close() + fd File sescriptor + myf Special Flags + +*/ + +int my_close(File fd, myf MyFlags) +{ + int err; + char *name= NULL; + DBUG_ENTER("my_close"); + DBUG_PRINT("my",("fd: %d MyFlags: %lu",fd, MyFlags)); + if (!(MyFlags & (MY_WME | MY_FAE))) + MyFlags|= my_global_flags; + + if ((uint) fd < my_file_limit && my_file_info[fd].type != UNOPEN) + { + name= my_file_info[fd].name; + my_file_info[fd].name= NULL; + my_file_info[fd].type= UNOPEN; + } +#ifndef _WIN32 + err= close(fd); +#else + err= my_win_close(fd); +#endif + if (err) + { + DBUG_PRINT("error",("Got error %d on close",err)); + my_errno=errno; + if (MyFlags & (MY_FAE | MY_WME)) + my_error(EE_BADCLOSE, MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + name,errno); + } + if (name) + { + my_free(name); + } + my_atomic_add32_explicit(&my_file_opened, -1, MY_MEMORY_ORDER_RELAXED); + DBUG_RETURN(err); +} /* my_close */ + + +/* + Register file in my_file_info[] + + SYNOPSIS + my_register_filename() + fd File number opened, -1 if error on open + FileName File name + type_file_type How file was created + error_message_number Error message number if caller got error (fd == -1) + MyFlags Flags for my_close() + + RETURN + -1 error + # Filenumber + +*/ + +File my_register_filename(File fd, const char *FileName, enum file_type + type_of_file, uint error_message_number, myf MyFlags) +{ + DBUG_ENTER("my_register_filename"); + if ((int) fd >= MY_FILE_MIN) + { + my_atomic_add32_explicit(&my_file_opened, 1, MY_MEMORY_ORDER_RELAXED); + if ((uint) fd >= my_file_limit || (MyFlags & MY_NO_REGISTER)) + DBUG_RETURN(fd); + my_file_info[fd].name = my_strdup(key_memory_my_file_info, FileName, MyFlags); + statistic_increment(my_file_total_opened,&THR_LOCK_open); + my_file_info[fd].type = type_of_file; + DBUG_PRINT("exit",("fd: %d",fd)); + DBUG_RETURN(fd); + } + my_errno= errno; + + DBUG_PRINT("error",("Got error %d on open", my_errno)); + if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) + { + if (my_errno == EMFILE) + error_message_number= EE_OUT_OF_FILERESOURCES; + my_error(error_message_number, + MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + FileName, my_errno); + } + DBUG_RETURN(-1); +} diff --git a/mysys/my_port.c b/mysys/my_port.c new file mode 100644 index 00000000..c0e36eaf --- /dev/null +++ b/mysys/my_port.c @@ -0,0 +1,40 @@ +/* Copyright (C) 2002 MySQL AB + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; version 2 + of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1335 USA */ + +/* + Small functions to make code portable +*/ + +#include "mysys_priv.h" + +#ifdef _AIX + +/* + On AIX, at least with gcc 3.1, the expression + '(double) (ulonglong) var' doesn't always work for big unsigned + integers like '18446744073709551615'. The end result is that the + high bit is simply dropped. (probably bug in gcc optimizations) + Handling the conversion in a sub function seems to work. + + It doesn't work to make this function inline. +*/ + +double my_ulonglong2double(unsigned long long nr) +{ + return (double) nr; +} +#endif /* _AIX */ diff --git a/mysys/my_pread.c b/mysys/my_pread.c new file mode 100644 index 00000000..56cc91ae --- /dev/null +++ b/mysys/my_pread.c @@ -0,0 +1,200 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include "my_base.h" +#include +#include +#ifndef _WIN32 +#include +#endif + +/* + Read a chunk of bytes from a file from a given position + + SYNOPSIOS + my_pread() + Filedes File descriptor + Buffer Buffer to read data into + Count Number of bytes to read + offset Position to read from + MyFlags Flags + + NOTES + This differs from the normal pread() call in that we don't care + to set the position in the file back to the original position + if the system doesn't support pread(). + + RETURN + (size_t) -1 Error + # Number of bytes read +*/ + +size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, + myf MyFlags) +{ + size_t readbytes, save_count= 0; + + DBUG_ENTER("my_pread"); + + DBUG_PRINT("my",("fd: %d Seek: %llu Buffer: %p Count: %lu MyFlags: %lu", + Filedes, (ulonglong)offset, Buffer, (ulong)Count, MyFlags)); + + if (!(MyFlags & (MY_WME | MY_FAE | MY_FNABP))) + MyFlags|= my_global_flags; + + for (;;) + { + errno= 0; /* Linux, Windows don't reset this on EOF/success */ +#ifdef _WIN32 + readbytes= my_win_pread(Filedes, Buffer, Count, offset); +#else + readbytes= pread(Filedes, Buffer, Count, offset); +#endif + + if (readbytes != Count) + { + /* We should never read with wrong file descriptor! */ + DBUG_ASSERT(readbytes != (size_t)-1 || errno != EBADF); + my_errno= errno; + if (errno == 0 || (readbytes != (size_t) -1 && + (MyFlags & (MY_NABP | MY_FNABP)))) + my_errno= HA_ERR_FILE_TOO_SHORT; + DBUG_PRINT("warning",("Read only %d bytes off %u from %d, errno: %d", + (int) readbytes, (uint) Count,Filedes,my_errno)); + if ((readbytes == 0 || readbytes == (size_t) -1) && errno == EINTR) + { + DBUG_PRINT("debug", ("my_pread() was interrupted and returned %d", + (int) readbytes)); + continue; /* Interrupted */ + } + + /* Do a read retry if we didn't get enough data on first read */ + if (readbytes != (size_t) -1 && readbytes != 0 && + (MyFlags & MY_FULL_IO)) + { + Buffer+= readbytes; + Count-= readbytes; + save_count+= readbytes; + offset+= readbytes; + continue; + } + + if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) + { + if (readbytes == (size_t) -1) + my_error(EE_READ, + MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + my_filename(Filedes),my_errno); + else if (MyFlags & (MY_NABP | MY_FNABP)) + my_error(EE_EOFERR, + MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + my_filename(Filedes),my_errno); + } + if (readbytes == (size_t) -1 || (MyFlags & (MY_FNABP | MY_NABP))) + DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ + } + if (MyFlags & (MY_NABP | MY_FNABP)) + readbytes= 0; /* Read went ok; Return 0 */ + else + readbytes+= save_count; + DBUG_RETURN(readbytes); + } +} /* my_pread */ + + +/* + Write a chunk of bytes to a file at a given position + + SYNOPSIOS + my_pwrite() + Filedes File descriptor + Buffer Buffer to write data from + Count Number of bytes to write + offset Position to write to + MyFlags Flags + + NOTES + This differs from the normal pwrite() call in that we don't care + to set the position in the file back to the original position + if the system doesn't support pwrite() + + RETURN + (size_t) -1 Error + # Number of bytes read +*/ + +size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, + my_off_t offset, myf MyFlags) +{ + size_t writtenbytes, written; + uint errors; + DBUG_ENTER("my_pwrite"); + DBUG_PRINT("my",("fd: %d Seek: %llu Buffer: %p Count: %lu MyFlags: %lu", + Filedes, (ulonglong)offset, Buffer, (ulong)Count, MyFlags)); + errors= 0; + written= 0; + if (!(MyFlags & (MY_WME | MY_FAE | MY_FNABP))) + MyFlags|= my_global_flags; + + for (;;) + { +#ifdef _WIN32 + writtenbytes= my_win_pwrite(Filedes, Buffer, Count,offset); +#else + writtenbytes= pwrite(Filedes, Buffer, Count, offset); +#endif + if (writtenbytes == Count) + break; + my_errno= errno; + if (writtenbytes != (size_t) -1) + { /* Safegueard */ + written+=writtenbytes; + Buffer+=writtenbytes; + Count-=writtenbytes; + offset+=writtenbytes; + } + DBUG_PRINT("error",("Write only %u bytes", (uint) writtenbytes)); +#ifndef NO_BACKGROUND + if (my_thread_var->abort) + MyFlags&= ~ MY_WAIT_IF_FULL; /* End if aborted by user */ + if ((my_errno == ENOSPC || my_errno == EDQUOT) && + (MyFlags & MY_WAIT_IF_FULL)) + { + wait_for_free_space(my_filename(Filedes), errors); + errors++; + continue; + } + if ((writtenbytes && writtenbytes != (size_t) -1) || my_errno == EINTR) + continue; /* Retry */ +#endif + + /* Don't give a warning if it's ok that we only write part of the data */ + if (MyFlags & (MY_NABP | MY_FNABP)) + { + if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) + my_error(EE_WRITE, MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + my_filename(Filedes),my_errno); + DBUG_RETURN(MY_FILE_ERROR); /* Error on write */ + } + break; /* Return bytes written */ + } + DBUG_EXECUTE_IF("check", my_seek(Filedes, -1, SEEK_SET, MYF(0));); + if (MyFlags & (MY_NABP | MY_FNABP)) + DBUG_RETURN(0); /* Want only errors */ + DBUG_RETURN(writtenbytes+written); /* purecov: inspected */ +} /* my_pwrite */ diff --git a/mysys/my_pthread.c b/mysys/my_pthread.c new file mode 100644 index 00000000..9bedfe36 --- /dev/null +++ b/mysys/my_pthread.c @@ -0,0 +1,467 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Functions to get threads more portable */ + +#define DONT_REMAP_PTHREAD_FUNCTIONS + +#include "mysys_priv.h" +#include +#include +#include +#include + +#if (defined(__BSD__) || defined(_BSDI_VERSION)) +#define SCHED_POLICY SCHED_RR +#else +#define SCHED_POLICY SCHED_OTHER +#endif + +/* + Some functions for RTS threads, AIX, Siemens Unix and UnixWare 7 + (and DEC OSF/1 3.2 too) +*/ + +int my_pthread_create_detached=1; + +/* localtime_r for SCO 3.2V4.2 */ + +#if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) + +extern mysql_mutex_t LOCK_localtime_r; + +#endif + +#if !defined(HAVE_LOCALTIME_R) +struct tm *localtime_r(const time_t *clock, struct tm *res) +{ + struct tm *tmp; + mysql_mutex_lock(&LOCK_localtime_r); + tmp=localtime(clock); + *res= *tmp; + mysql_mutex_unlock(&LOCK_localtime_r); + return res; +} +#endif + +#if !defined(HAVE_GMTIME_R) +/* + Reentrant version of standard gmtime() function. + Needed on some systems which don't implement it. +*/ + +struct tm *gmtime_r(const time_t *clock, struct tm *res) +{ + struct tm *tmp; + mysql_mutex_lock(&LOCK_localtime_r); + tmp= gmtime(clock); + *res= *tmp; + mysql_mutex_unlock(&LOCK_localtime_r); + return res; +} +#endif + +/**************************************************************************** +** Replacement of sigwait if the system doesn't have one (like BSDI 3.0) +** +** Note: +** This version of sigwait() is assumed to called in a loop so the signalmask +** is permanently modified to reflect the signal set. This is done to get +** a much faster implementation. +** +** This implementation isn't thread safe: It assumes that only one +** thread is using sigwait. +** +** If one later supplies a different signal mask, all old signals that +** was used before are unblocked and set to SIGDFL. +** +** Author: Gary Wisniewski , much modified by Monty +****************************************************************************/ + +#if !defined(HAVE_SIGWAIT) && !defined(sigwait) && !defined(_WIN32) && !defined(HAVE_rts_threads) + +#if !defined(DONT_USE_SIGSUSPEND) + +static sigset_t sigwait_set,rev_sigwait_set,px_recd; + +void px_handle_sig(int sig) +{ + sigaddset(&px_recd, sig); +} + + +void sigwait_setup(sigset_t *set) +{ + int i; + struct sigaction sact,sact1; + sigset_t unblock_mask; + + sact.sa_flags = 0; + sact.sa_handler = px_handle_sig; + memcpy(&sact.sa_mask, set, sizeof(*set)); /* handler isn't thread_safe */ + sigemptyset(&unblock_mask); + pthread_sigmask(SIG_UNBLOCK,(sigset_t*) 0,&rev_sigwait_set); + + for (i = 1; i <= sizeof(sigwait_set)*8; i++) + { + if (sigismember(set,i)) + { + sigdelset(&rev_sigwait_set,i); + if (!sigismember(&sigwait_set,i)) + sigaction(i, &sact, (struct sigaction*) 0); + } + else + { + sigdelset(&px_recd,i); /* Don't handle this */ + if (sigismember(&sigwait_set,i)) + { /* Remove the old handler */ + sigaddset(&unblock_mask,i); + sigdelset(&rev_sigwait_set,i); + sact1.sa_flags = 0; + sact1.sa_handler = SIG_DFL; + sigemptyset(&sact1.sa_mask); + sigaction(i, &sact1, 0); + } + } + } + memcpy(&sigwait_set, set, sizeof(*set)); + pthread_sigmask(SIG_BLOCK,(sigset_t*) set,(sigset_t*) 0); + pthread_sigmask(SIG_UNBLOCK,&unblock_mask,(sigset_t*) 0); +} + + +int sigwait(sigset_t *setp, int *sigp) +{ + if (memcmp(setp,&sigwait_set,sizeof(sigwait_set))) + sigwait_setup(setp); /* Init or change of set */ + + for (;;) + { + /* + This is a fast, not 100% portable implementation to find the signal. + Because the handler is blocked there should be at most 1 bit set, but + the specification on this is somewhat shady so we use a set instead a + single variable. + */ + + ulong *ptr= (ulong*) &px_recd; + ulong *end=ptr+sizeof(px_recd)/sizeof(ulong); + + for ( ; ptr != end ; ptr++) + { + if (*ptr) + { + ulong set= *ptr; + int found= (int) ((char*) ptr - (char*) &px_recd)*8+1; + while (!(set & 1)) + { + found++; + set>>=1; + } + *sigp=found; + sigdelset(&px_recd,found); + return 0; + } + } + sigsuspend(&rev_sigwait_set); + } + return 0; +} +#else /* !DONT_USE_SIGSUSPEND */ + +/**************************************************************************** +** Replacement of sigwait if the system doesn't have one (like BSDI 3.0) +** +** Note: +** This version of sigwait() is assumed to called in a loop so the signalmask +** is permanently modified to reflect the signal set. This is done to get +** a much faster implementation. +** +** This implementation uses a extra thread to handle the signals and one +** must always call sigwait() with the same signal mask! +** +** BSDI 3.0 NOTE: +** +** pthread_kill() doesn't work on a thread in a select() or sleep() loop? +** After adding the sleep to sigwait_thread, all signals are checked and +** delivered every second. This isn't that terrible performance vice, but +** someone should report this to BSDI and ask for a fix! +** Another problem is that when the sleep() ends, every select() in other +** threads are interrupted! +****************************************************************************/ + +static sigset_t pending_set; +static bool inited=0; +static pthread_cond_t COND_sigwait; +static pthread_mutex_t LOCK_sigwait; + + +void sigwait_handle_sig(int sig) +{ + pthread_mutex_lock(&LOCK_sigwait); + sigaddset(&pending_set, sig); + pthread_cond_signal(&COND_sigwait); /* inform sigwait() about signal */ + pthread_mutex_unlock(&LOCK_sigwait); +} + +void *sigwait_thread(void *set_arg) +{ + sigset_t *set=(sigset_t*) set_arg; + + int i; + struct sigaction sact; + sact.sa_flags = 0; + sact.sa_handler = sigwait_handle_sig; + memcpy(&sact.sa_mask, set, sizeof(*set)); /* handler isn't thread_safe */ + sigemptyset(&pending_set); + + for (i = 1; i <= sizeof(pending_set)*8; i++) + { + if (sigismember(set,i)) + { + sigaction(i, &sact, (struct sigaction*) 0); + } + } + /* Ensure that init_thr_alarm() is called */ + DBUG_ASSERT(thr_client_alarm); + sigaddset(set, thr_client_alarm); + pthread_sigmask(SIG_UNBLOCK,(sigset_t*) set,(sigset_t*) 0); + alarm_thread=pthread_self(); /* For thr_alarm */ + + for (;;) + { /* Wait for signals */ + sleep(1); /* Because of broken BSDI */ + } +} + + +int sigwait(sigset_t *setp, int *sigp) +{ + if (!inited) + { + pthread_attr_t thr_attr; + pthread_t sigwait_thread_id; + inited=1; + sigemptyset(&pending_set); + pthread_mutex_init(&LOCK_sigwait, MY_MUTEX_INIT_FAST); + pthread_cond_init(&COND_sigwait, NULL); + + pthread_attr_init(&thr_attr); + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_PROCESS); + pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + pthread_create(&sigwait_thread_id, &thr_attr, sigwait_thread, setp); + pthread_attr_destroy(&thr_attr); + } + + pthread_mutex_lock(&LOCK_sigwait); + for (;;) + { + ulong *ptr= (ulong*) &pending_set; + ulong *end=ptr+sizeof(pending_set)/sizeof(ulong); + + for ( ; ptr != end ; ptr++) + { + if (*ptr) + { + ulong set= *ptr; + int found= (int) ((char*) ptr - (char*) &pending_set)*8+1; + while (!(set & 1)) + { + found++; + set>>=1; + } + *sigp=found; + sigdelset(&pending_set,found); + pthread_mutex_unlock(&LOCK_sigwait); + return 0; + } + } + pthread_cond_wait(&COND_sigwait, &LOCK_sigwait); + } + return 0; +} + +#endif /* DONT_USE_SIGSUSPEND */ +#endif /* HAVE_SIGWAIT */ + + +/**************************************************************************** + The following functions fixes that all pthread functions should work + according to latest posix standard +****************************************************************************/ + +/* Undefined wrappers set my_pthread.h so that we call os functions */ +#undef pthread_mutex_init +#undef pthread_mutex_lock +#undef pthread_mutex_unlock +#undef pthread_mutex_destroy +#undef pthread_mutex_wait +#undef pthread_mutex_timedwait +#undef pthread_mutex_trylock +#undef pthread_mutex_t +#undef pthread_cond_init +#undef pthread_cond_wait +#undef pthread_cond_timedwait +#undef pthread_cond_t +#undef pthread_attr_getstacksize + +/***************************************************************************** + Patches for HPUX + We need these because the pthread_mutex.. code returns -1 on error, + instead of the error code. + + Note that currently we only remap pthread_ functions used by MySQL. + If we are depending on the value for some other pthread_xxx functions, + this has to be added here. +****************************************************************************/ + +#if defined(HPUX10) + +int my_pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + struct timespec *abstime) +{ + int error=pthread_cond_timedwait(cond, mutex, abstime); + if (error == -1) /* Safety if the lib is fixed */ + { + if (!(error=errno)) + error= ETIMEDOUT; /* Can happen on HPUX */ + } + if (error == EAGAIN) /* Correct errno to Posix */ + error= ETIMEDOUT; + return error; +} +#endif + +#if defined(HPUX10) + +void my_pthread_attr_getstacksize(pthread_attr_t *connection_attrib, + size_t *stack_size) +{ + *stack_size= pthread_attr_getstacksize(*connection_attrib); +} +#endif + + +#ifdef HAVE_POSIX1003_4a_MUTEX +/* + In HP-UX-10.20 and other old Posix 1003.4a Draft 4 implementations + pthread_mutex_trylock returns 1 on success, not 0 like + pthread_mutex_lock + + From the HP-UX-10.20 man page: + RETURN VALUES + If the function fails, errno may be set to one of the following + values: + Return | Error | Description + _______|__________|_________________________________________ + 1 | | Successful completion. + 0 | | The mutex is locked; therefore, it was + | | not acquired. + -1 | [EINVAL] | The value specified by mutex is invalid. + +*/ + +/* + Convert pthread_mutex_trylock to return values according to latest POSIX + + RETURN VALUES + 0 If we are able successfully lock the mutex. + EBUSY Mutex was locked by another thread + # Other error number returned by pthread_mutex_trylock() + (Not likely) +*/ + +int my_pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + int error= pthread_mutex_trylock(mutex); + if (error == 1) + return 0; /* Got lock on mutex */ + if (error == 0) /* Someon else is locking mutex */ + return EBUSY; + if (error == -1) /* Safety if the lib is fixed */ + error= errno; /* Probably invalid parameter */ + return error; +} +#endif /* HAVE_POSIX1003_4a_MUTEX */ + +/* Some help functions */ + +int pthread_dummy(int ret) +{ + return ret; +} + + +/* + pthread_attr_setstacksize() without so much platform-dependency + + Return: The actual stack size if possible. +*/ + +size_t my_setstacksize(pthread_attr_t *attr, size_t stacksize) +{ + size_t guard_size __attribute__((unused))= 0; + +#if defined(__ia64__) || defined(__ia64) + /* + On IA64, half of the requested stack size is used for "normal stack" + and half for "register stack". The space measured by check_stack_overrun + is the "normal stack", so double the request to make sure we have the + caller-expected amount of normal stack. + + NOTE: there is no guarantee that the register stack can't grow faster + than normal stack, so it's very unclear that we won't dump core due to + stack overrun despite check_stack_overrun's efforts. Experimentation + shows that in the execution_constants test, the register stack grows + less than half as fast as normal stack, but perhaps other scenarios are + less forgiving. If it turns out that more space is needed for the + register stack, that could be forced (rather inefficiently) by using a + multiplier higher than 2 here. + */ + stacksize *= 2; +#endif + + /* + On many machines, the "guard space" is subtracted from the requested + stack size, and that space is quite large on some platforms. So add + it to our request, if we can find out what it is. + */ +#ifdef HAVE_PTHREAD_ATTR_GETGUARDSIZE + if (pthread_attr_getguardsize(attr, &guard_size)) + guard_size = 0; /* if can't find it out, treat as 0 */ +#endif /* HAVE_PTHREAD_ATTR_GETGUARDSIZE */ + + pthread_attr_setstacksize(attr, stacksize + guard_size); + + /* Retrieve actual stack size if possible */ +#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE + { + size_t real_stack_size= 0; + /* We must ignore real_stack_size = 0 as Solaris 2.9 can return 0 here */ + if (pthread_attr_getstacksize(attr, &real_stack_size) == 0 && + real_stack_size > guard_size) + { + real_stack_size -= guard_size; + if (real_stack_size < stacksize) + stacksize= real_stack_size; + } + } +#endif /* HAVE_PTHREAD_ATTR_GETSTACKSIZE */ + +#if defined(__ia64__) || defined(__ia64) + stacksize /= 2; +#endif + return stacksize; +} diff --git a/mysys/my_quick.c b/mysys/my_quick.c new file mode 100644 index 00000000..ae7f5cca --- /dev/null +++ b/mysys/my_quick.c @@ -0,0 +1,82 @@ +/* Copyright (c) 2000, 2006, 2007 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Quicker interface to read & write. Used with my_nosys.h */ + +#include "mysys_priv.h" +#include "my_nosys.h" + + +#ifdef _WIN32 +extern size_t my_win_read(File Filedes,uchar *Buffer,size_t Count); +#endif + +size_t my_quick_read(File Filedes,uchar *Buffer,size_t Count,myf MyFlags) +{ + size_t readbytes; +#ifdef _WIN32 + readbytes= my_win_read(Filedes, Buffer, Count); +#else + readbytes= read(Filedes, Buffer, Count); +#endif + if(readbytes != Count) + { +#ifndef DBUG_OFF + if ((readbytes == 0 || readbytes == (size_t) -1) && errno == EINTR) + { + DBUG_PRINT("error", ("my_quick_read() was interrupted and returned %d" + ". This function does not retry the read!", + (int) readbytes)); + } +#endif + my_errno=errno; + return readbytes; + } + return (MyFlags & (MY_NABP | MY_FNABP)) ? 0 : readbytes; +} + + + +size_t my_quick_write(File Filedes, const uchar *Buffer, size_t Count) +{ +#ifdef _WIN32 + return my_win_write(Filedes, Buffer, Count); +#else + +#ifndef DBUG_OFF + size_t writtenbytes; +#endif + + if (( +#ifndef DBUG_OFF + writtenbytes = +#endif + (size_t) write(Filedes,Buffer,Count)) != Count) + { +#ifndef DBUG_OFF + if ((writtenbytes == 0 || writtenbytes == (size_t) -1) && errno == EINTR) + { + DBUG_PRINT("error", ("my_quick_write() was interrupted and returned %d" + ". This function does not retry the write!", + (int) writtenbytes)); + } +#endif + my_errno=errno; + return (size_t) -1; + } + return 0; +#endif +} diff --git a/mysys/my_rdtsc.c b/mysys/my_rdtsc.c new file mode 100644 index 00000000..e16825ce --- /dev/null +++ b/mysys/my_rdtsc.c @@ -0,0 +1,796 @@ +/* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2019, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + rdtsc3 -- multi-platform timer code + pgulutzan@mysql.com, 2005-08-29 + modified 2008-11-02 + + Functions: + + my_timer_cycles ulonglong cycles + my_timer_nanoseconds ulonglong nanoseconds + my_timer_microseconds ulonglong "microseconds" + my_timer_milliseconds ulonglong milliseconds + my_timer_ticks ulonglong ticks + my_timer_init initialization / test + + We'll call the first 5 functions (the ones that return + a ulonglong) "my_timer_xxx" functions. + Each my_timer_xxx function returns a 64-bit timing value + since an arbitrary 'epoch' start. Since the only purpose + is to determine elapsed times, wall-clock time-of-day + is not known and not relevant. + + The my_timer_init function is necessary for initializing. + It returns information (underlying routine name, + frequency, resolution, overhead) about all my_timer_xxx + functions. A program should call my_timer_init once, + use the information to decide what my_timer_xxx function + to use, and subsequently call that function by function + pointer. + + A typical use would be: + my_timer_init() ... once, at program start + ... + time1= my_timer_xxx() ... time before start + [code that's timed] + time2= my_timer_xxx() ... time after end + elapsed_time= (time2 - time1) - overhead +*/ + +#include "my_global.h" +#include "my_rdtsc.h" + +#if defined(_WIN32) +#include +#include "windows.h" +#else +#include +#endif + +#if !defined(_WIN32) +#if TIME_WITH_SYS_TIME +#include +#include /* for clock_gettime */ +#else +#if HAVE_SYS_TIME_H +#include +#elif defined(HAVE_TIME_H) +#include +#endif +#endif +#endif + +#if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME) +#include /* for ftime */ +#endif + +#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES) +#include /* for times */ +#endif + +#if defined(__APPLE__) && defined(__MACH__) +#include +#endif + +/* + For nanoseconds, most platforms have nothing available that + (a) doesn't require bringing in a 40-kb librt.so library + (b) really has nanosecond resolution. +*/ + +ulonglong my_timer_nanoseconds(void) +{ +#if defined(HAVE_READ_REAL_TIME) + { + timebasestruct_t tr; + read_real_time(&tr, TIMEBASE_SZ); + return (ulonglong) tr.tb_high * 1000000000 + (ulonglong) tr.tb_low; + } +#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME) + /* SunOS 5.10+, Solaris, HP-UX: hrtime_t gethrtime(void) */ + return (ulonglong) gethrtime(); +#elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_REALTIME) + { + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec; + } +#elif defined(__APPLE__) && defined(__MACH__) + { + ulonglong tm; + static mach_timebase_info_data_t timebase_info= {0,0}; + if (timebase_info.denom == 0) + (void) mach_timebase_info(&timebase_info); + tm= mach_absolute_time(); + return (tm * timebase_info.numer) / timebase_info.denom; + } +#else + return 0; +#endif +} + +/* + For microseconds, gettimeofday() is available on + almost all platforms. On Windows we use + QueryPerformanceCounter which will usually tick over + 3.5 million times per second, and we don't throw + away the extra precision. (On Windows Server 2003 + the frequency is same as the cycle frequency.) +*/ + +ulonglong my_timer_microseconds(void) +{ +#if defined(HAVE_GETTIMEOFDAY) + { + static ulonglong last_value= 0; + struct timeval tv; + if (gettimeofday(&tv, NULL) == 0) + last_value= (ulonglong) tv.tv_sec * 1000000 + (ulonglong) tv.tv_usec; + else + { + /* + There are reports that gettimeofday(2) can have intermittent failures + on some platform, see for example Bug#36819. + We are not trying again or looping, just returning the best value possible + under the circumstances ... + */ + last_value++; + } + return last_value; + } +#elif defined(_WIN32) + { + /* QueryPerformanceCounter usually works with about 1/3 microsecond. */ + LARGE_INTEGER t_cnt; + + QueryPerformanceCounter(&t_cnt); + return (ulonglong) t_cnt.QuadPart; + } +#else + return 0; +#endif +} + +/* + For milliseconds, we use ftime() if it's supported + or time()*1000 if it's not. With modern versions of + Windows and with HP Itanium, resolution is 10-15 + milliseconds. +*/ + +#if defined(HAVE_CLOCK_GETTIME) +#if defined(CLOCK_MONOTONIC_FAST) +/* FreeBSD */ +#define MY_CLOCK_ID CLOCK_MONOTONIC_FAST +#elif defined(CLOCK_MONOTONIC_COARSE) +/* Linux */ +#define MY_CLOCK_ID CLOCK_MONOTONIC_COARSE +#elif defined(CLOCK_MONOTONIC) +/* POSIX (includes OSX) */ +#define MY_CLOCK_ID CLOCK_MONOTONIC +#elif defined(CLOCK_REALTIME) +/* Solaris (which doesn't seem to have MONOTONIC) */ +#define MY_CLOCK_ID CLOCK_REALTIME +#endif +#endif + +ulonglong my_timer_milliseconds(void) +{ +#if defined(MY_CLOCK_ID) + struct timespec tp; + clock_gettime(MY_CLOCK_ID, &tp); + return (ulonglong)tp.tv_sec * 1000 + (ulonglong)tp.tv_nsec / 1000000; +#elif defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME) + /* ftime() is obsolete but maybe the platform is old */ + struct timeb ft; + ftime(&ft); + return (ulonglong)ft.time * 1000 + (ulonglong)ft.millitm; +#elif defined(HAVE_TIME) + return (ulonglong) time(NULL) * 1000; +#elif defined(_WIN32) + FILETIME ft; + GetSystemTimeAsFileTime( &ft ); + return ((ulonglong)ft.dwLowDateTime + + (((ulonglong)ft.dwHighDateTime) << 32))/10000; +#else + return 0; +#endif +} + +/* + For ticks, which we handle with times(), the frequency + is usually 100/second and the overhead is surprisingly + bad, sometimes even worse than gettimeofday's overhead. +*/ + +ulonglong my_timer_ticks(void) +{ +#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES) + { + struct tms times_buf; + return (ulonglong) times(×_buf); + } +#elif defined(_WIN32) + return (ulonglong) GetTickCount(); +#else + return 0; +#endif +} + +/* + The my_timer_init() function and its sub-functions + have several loops which call timers. If there's + something wrong with a timer -- which has never + happened in tests -- we want the loop to end after + an arbitrary number of iterations, and my_timer_info + will show a discouraging result. The arbitrary + number is 1,000,000. +*/ +#define MY_TIMER_ITERATIONS 1000000 + +/* + Calculate overhead. Called from my_timer_init(). + Usually best_timer_overhead = cycles.overhead or + nanoseconds.overhead, so returned amount is in + cycles or nanoseconds. We repeat the calculation + ten times, so that we can disregard effects of + caching or interrupts. Result is quite consistent + for cycles, at least. But remember it's a minimum. +*/ + +static void my_timer_init_overhead(ulonglong *overhead, + ulonglong (*cycle_timer)(void), + ulonglong (*this_timer)(void), + ulonglong best_timer_overhead) +{ + ulonglong time1, time2; + int i; + + /* *overhead, least of 20 calculations - cycles.overhead */ + for (i= 0, *overhead= 1000000000; i < 20; ++i) + { + time1= cycle_timer(); + this_timer(); /* rather than 'time_tmp= timer();' */ + time2= cycle_timer() - time1; + if (*overhead > time2) + *overhead= time2; + } + *overhead-= best_timer_overhead; +} + +/* + Calculate Resolution. Called from my_timer_init(). + If a timer goes up by jumps, e.g. 1050, 1075, 1100, ... + then the best resolution is the minimum jump, e.g. 25. + If it's always divisible by 1000 then it's just a + result of multiplication of a lower-precision timer + result, e.g. nanoseconds are often microseconds * 1000. + If the minimum jump is less than an arbitrary passed + figure (a guess based on maximum overhead * 2), ignore. + Usually we end up with nanoseconds = 1 because it's too + hard to detect anything <= 100 nanoseconds. + Often GetTickCount() has resolution = 15. + We don't check with ticks because they take too long. +*/ +static ulonglong my_timer_init_resolution(ulonglong (*this_timer)(void), + ulonglong overhead_times_2) +{ + ulonglong time1, time2; + ulonglong best_jump; + int i, jumps, divisible_by_1000, divisible_by_1000000; + + divisible_by_1000= divisible_by_1000000= 0; + best_jump= 1000000; + for (i= jumps= 0; jumps < 3 && i < MY_TIMER_ITERATIONS * 10; ++i) + { + time1= this_timer(); + time2= this_timer(); + time2-= time1; + if (time2) + { + ++jumps; + if (!(time2 % 1000)) + { + ++divisible_by_1000; + if (!(time2 % 1000000)) + ++divisible_by_1000000; + } + if (best_jump > time2) + best_jump= time2; + /* For milliseconds, one jump is enough. */ + if (overhead_times_2 == 0) + break; + } + } + if (jumps == 3) + { + if (jumps == divisible_by_1000000) + return 1000000; + if (jumps == divisible_by_1000) + return 1000; + } + if (best_jump > overhead_times_2) + return best_jump; + return 1; +} + +/* + Calculate cycle frequency by seeing how many cycles pass + in a 200-microsecond period. I tried with 10-microsecond + periods originally, and the result was often very wrong. +*/ + +static ulonglong my_timer_init_frequency(MY_TIMER_INFO *mti) +{ + int i; + ulonglong time1, time2, time3, time4; + time1= my_timer_cycles(); + time2= my_timer_microseconds(); + time3= time2; /* Avoids a Microsoft/IBM compiler warning */ + for (i= 0; i < MY_TIMER_ITERATIONS; ++i) + { + time3= my_timer_microseconds(); + if (time3 - time2 > 200) break; + } + time4= my_timer_cycles() - mti->cycles.overhead; + time4-= mti->microseconds.overhead; + return (mti->microseconds.frequency * (time4 - time1)) / (time3 - time2); +} + +/* + Call my_timer_init before the first call to my_timer_xxx(). + If something must be initialized, it happens here. + Set: what routine is being used e.g. "rdtsc" + Set: function, overhead, actual frequency, resolution. +*/ + +void my_timer_init(MY_TIMER_INFO *mti) +{ + ulonglong (*best_timer)(void); + ulonglong best_timer_overhead; + ulonglong time1, time2; + int i; + + /* cycles */ + mti->cycles.frequency= 1000000000; + mti->cycles.routine= MY_TIMER_ROUTINE_CYCLES; + + if (!mti->cycles.routine || !my_timer_cycles()) + { + mti->cycles.routine= 0; + mti->cycles.resolution= 0; + mti->cycles.frequency= 0; + mti->cycles.overhead= 0; + } + + /* nanoseconds */ + mti->nanoseconds.frequency= 1000000000; /* initial assumption */ +#if defined(HAVE_READ_REAL_TIME) + mti->nanoseconds.routine= MY_TIMER_ROUTINE_READ_REAL_TIME; +#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME) + mti->nanoseconds.routine= MY_TIMER_ROUTINE_GETHRTIME; +#elif defined(HAVE_CLOCK_GETTIME) + mti->nanoseconds.routine= MY_TIMER_ROUTINE_CLOCK_GETTIME; +#elif defined(__APPLE__) && defined(__MACH__) + mti->nanoseconds.routine= MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME; +#else + mti->nanoseconds.routine= 0; +#endif + if (!mti->nanoseconds.routine || !my_timer_nanoseconds()) + { + mti->nanoseconds.routine= 0; + mti->nanoseconds.resolution= 0; + mti->nanoseconds.frequency= 0; + mti->nanoseconds.overhead= 0; + } + + /* microseconds */ + mti->microseconds.frequency= 1000000; /* initial assumption */ +#if defined(HAVE_GETTIMEOFDAY) + mti->microseconds.routine= MY_TIMER_ROUTINE_GETTIMEOFDAY; +#elif defined(_WIN32) + { + LARGE_INTEGER li; + /* Windows: typical frequency = 3579545, actually 1/3 microsecond. */ + if (!QueryPerformanceFrequency(&li)) + mti->microseconds.routine= 0; + else + { + mti->microseconds.frequency= li.QuadPart; + mti->microseconds.routine= MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER; + } + } +#else + mti->microseconds.routine= 0; +#endif + if (!mti->microseconds.routine || !my_timer_microseconds()) + { + mti->microseconds.routine= 0; + mti->microseconds.resolution= 0; + mti->microseconds.frequency= 0; + mti->microseconds.overhead= 0; + } + + /* milliseconds */ + mti->milliseconds.frequency= 1000; /* initial assumption */ +#ifdef MY_CLOCK_ID + mti->milliseconds.routine= MY_TIMER_ROUTINE_CLOCK_GETTIME; +#elif defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME) + mti->milliseconds.routine= MY_TIMER_ROUTINE_FTIME; +#elif defined(_WIN32) + mti->milliseconds.routine= MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME; +#elif defined(HAVE_TIME) + mti->milliseconds.routine= MY_TIMER_ROUTINE_TIME; +#else + mti->milliseconds.routine= 0; +#endif + if (!mti->milliseconds.routine || !my_timer_milliseconds()) + { + mti->milliseconds.routine= 0; + mti->milliseconds.resolution= 0; + mti->milliseconds.frequency= 0; + mti->milliseconds.overhead= 0; + } + + /* ticks */ + mti->ticks.frequency= 100; /* permanent assumption */ +#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES) + mti->ticks.routine= MY_TIMER_ROUTINE_TIMES; +#elif defined(_WIN32) + mti->ticks.routine= MY_TIMER_ROUTINE_GETTICKCOUNT; +#else + mti->ticks.routine= 0; +#endif + if (!mti->ticks.routine || !my_timer_ticks()) + { + mti->ticks.routine= 0; + mti->ticks.resolution= 0; + mti->ticks.frequency= 0; + mti->ticks.overhead= 0; + } + + /* + Calculate overhead in terms of the timer that + gives the best resolution: cycles or nanoseconds. + I doubt it ever will be as bad as microseconds. + */ + if (mti->cycles.routine) + best_timer= &my_timer_cycles; + else + { + if (mti->nanoseconds.routine) + { + best_timer= &my_timer_nanoseconds; + } + else + best_timer= &my_timer_microseconds; + } + + /* best_timer_overhead = least of 20 calculations */ + for (i= 0, best_timer_overhead= 1000000000; i < 20; ++i) + { + time1= best_timer(); + time2= best_timer() - time1; + if (best_timer_overhead > time2) + best_timer_overhead= time2; + } + if (mti->cycles.routine) + my_timer_init_overhead(&mti->cycles.overhead, + best_timer, + &my_timer_cycles, + best_timer_overhead); + if (mti->nanoseconds.routine) + my_timer_init_overhead(&mti->nanoseconds.overhead, + best_timer, + &my_timer_nanoseconds, + best_timer_overhead); + if (mti->microseconds.routine) + my_timer_init_overhead(&mti->microseconds.overhead, + best_timer, + &my_timer_microseconds, + best_timer_overhead); + if (mti->milliseconds.routine) + my_timer_init_overhead(&mti->milliseconds.overhead, + best_timer, + &my_timer_milliseconds, + best_timer_overhead); + if (mti->ticks.routine) + my_timer_init_overhead(&mti->ticks.overhead, + best_timer, + &my_timer_ticks, + best_timer_overhead); + +/* + Calculate resolution for nanoseconds or microseconds + or milliseconds, by seeing if it's always divisible + by 1000, and by noticing how much jumping occurs. + For ticks, just assume the resolution is 1. +*/ + if (mti->cycles.routine) + mti->cycles.resolution= 1; + if (mti->nanoseconds.routine) + mti->nanoseconds.resolution= + my_timer_init_resolution(&my_timer_nanoseconds, 20000); + if (mti->microseconds.routine) + mti->microseconds.resolution= + my_timer_init_resolution(&my_timer_microseconds, 20); + if (mti->milliseconds.routine) + { + if (mti->milliseconds.routine == MY_TIMER_ROUTINE_TIME) + mti->milliseconds.resolution= 1000; + else + mti->milliseconds.resolution= + my_timer_init_resolution(&my_timer_milliseconds, 0); + } + if (mti->ticks.routine) + mti->ticks.resolution= 1; + +/* + Calculate cycles frequency, + if we have both a cycles routine and a microseconds routine. + In tests, this usually results in a figure within 2% of + what "cat /proc/cpuinfo" says. + If the microseconds routine is QueryPerformanceCounter + (i.e. it's Windows), and the microseconds frequency is > + 500,000,000 (i.e. it's Windows Server so it uses RDTSC) + and the microseconds resolution is > 100 (i.e. dreadful), + then calculate cycles frequency = microseconds frequency. +*/ + if (mti->cycles.routine + && mti->microseconds.routine) + { + if (mti->microseconds.routine == + MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER + && mti->microseconds.frequency > 500000000 + && mti->microseconds.resolution > 100) + mti->cycles.frequency= mti->microseconds.frequency; + else + { + time1= my_timer_init_frequency(mti); + /* Repeat once in case there was an interruption. */ + time2= my_timer_init_frequency(mti); + if (time1 < time2) mti->cycles.frequency= time1; + else mti->cycles.frequency= time2; + } + } + +/* + Calculate milliseconds frequency = + (cycles-frequency/#-of-cycles) * #-of-milliseconds, + if we have both a milliseconds routine and a cycles + routine. + This will be inaccurate if milliseconds resolution > 1. + This is probably only useful when testing new platforms. +*/ + if (mti->milliseconds.routine + && mti->milliseconds.resolution < 1000 + && mti->microseconds.routine + && mti->cycles.routine) + { + ulonglong time3, time4; + time1= my_timer_cycles(); + time2= my_timer_milliseconds(); + time3= time2; /* Avoids a Microsoft/IBM compiler warning */ + for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i) + { + time3= my_timer_milliseconds(); + if (time3 - time2 > 10) break; + } + time4= my_timer_cycles(); + mti->milliseconds.frequency= + (mti->cycles.frequency * (time3 - time2)) / (time4 - time1); + } + +/* + Calculate ticks.frequency = + (cycles-frequency/#-of-cycles * #-of-ticks, + if we have both a ticks routine and a cycles + routine, + This is probably only useful when testing new platforms. +*/ + if (mti->ticks.routine + && mti->microseconds.routine + && mti->cycles.routine) + { + ulonglong time3, time4; + time1= my_timer_cycles(); + time2= my_timer_ticks(); + time3= time2; /* Avoids a Microsoft/IBM compiler warning */ +#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES) + for (i= 0; i < 1000; ++i) +#else + for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i) +#endif + { + time3= my_timer_ticks(); + if (time3 - time2 > 10) break; + } + time4= my_timer_cycles(); + mti->ticks.frequency= + (mti->cycles.frequency * (time3 - time2)) / (time4 - time1); + } +} + +/* + Additional Comments + ------------------- + + This is for timing, i.e. finding out how long a piece of code + takes. If you want time of day matching a wall clock, the + my_timer_xxx functions won't help you. + + The best timer is the one with highest frequency, lowest + overhead, and resolution=1. The my_timer_info() routine will tell + you at runtime which timer that is. Usually it will be + my_timer_cycles() but be aware that, although it's best, + it has possible flaws and dangers. Depending on platform: + - The frequency might change. We don't test for this. It + happens on laptops for power saving, and on blade servers + for avoiding overheating. + - The overhead that my_timer_init() returns is the minimum. + In fact it could be slightly greater because of caching or + because you call the routine by address, as recommended. + It could be hugely greater if there's an interrupt. + - The x86 cycle counter, RDTSC doesn't "serialize". That is, + if there is out-of-order execution, rdtsc might be processed + after an instruction that logically follows it. + (We could force serialization, but that would be slower.) + - It is possible to set a flag which renders RDTSC + inoperative. Somebody responsible for the kernel + of the operating system would have to make this + decision. For the platforms we've tested with, there's + no such problem. + - With a multi-processor arrangement, it's possible + to get the cycle count from one processor in + thread X, and the cycle count from another processor + in thread Y. They may not always be in synch. + - You can't depend on a cycle counter being available for + all platforms. On Alphas, the + cycle counter is only 32-bit, so it would overflow quickly, + so we don't bother with it. On platforms that we haven't + tested, there might be some if/endif combination that we + didn't expect, or some assembler routine that we didn't + supply. + + The recommended way to use the timer routines is: + 1. Somewhere near the beginning of the program, call + my_timer_init(). This should only be necessary once, + although you can call it again if you think that the + frequency has changed. + 2. Determine the best timer based on frequency, resolution, + overhead -- all things that my_timer_init() returns. + Preserve the address of the timer and the my_timer_into + results in an easily-accessible place. + 3. Instrument the code section that you're monitoring, thus: + time1= my_timer_xxx(); + Instrumented code; + time2= my_timer_xxx(); + elapsed_time= (time2 - time1) - overhead; + If the timer is always on, then overhead is always there, + so don't subtract it. + 4. Save the elapsed time, or add it to a totaller. + 5. When all timing processes are complete, transfer the + saved / totalled elapsed time to permanent storage. + Optionally you can convert cycles to microseconds at + this point. (Don't do so every time you calculate + elapsed_time! That would waste time and lose precision!) + For converting cycles to microseconds, use the frequency + that my_timer_init() returns. You'll also need to convert + if the my_timer_microseconds() function is the Windows + function QueryPerformanceCounter(), since that's sometimes + a counter with precision slightly better than microseconds. + + Since we recommend calls by function pointer, we supply + no inline functions. + + Some comments on the many candidate routines for timing ... + + clock() -- We don't use because it would overflow frequently. + + clock_gettime() -- In tests, clock_gettime often had + resolution = 1000. + + ftime() -- A "man ftime" says: "This function is obsolete. + Don't use it." On every platform that we tested, if ftime() + was available, then so was gettimeofday(), and gettimeofday() + overhead was always at least as good as ftime() overhead. + + gettimeofday() -- available on most platforms, though not + on Windows. There is a hardware timer (sometimes a Programmable + Interrupt Timer or "PIT") (sometimes a "HPET") used for + interrupt generation. When it interrupts (a "tick" or "jiffy", + typically 1 centisecond) it sets xtime. For gettimeofday, a + Linux kernel routine usually gets xtime and then gets rdtsc + to get elapsed nanoseconds since the last tick. On Red Hat + Enterprise Linux 3, there was once a bug which caused the + resolution to be 1000, i.e. one centisecond. We never check + for time-zone change. + + getnstimeofday() -- something to watch for in future Linux + + do_gettimeofday() -- exists on Linux but not for "userland" + + get_cycles() -- a multi-platform function, worth watching + in future Linux versions. But we found platform-specific + functions which were better documented in operating-system + manuals. And get_cycles() can fail or return a useless + 32-bit number. It might be available on some platforms, + such as arm, which we didn't test. Using + "include " or "include " + can lead to autoconf or compile errors, depending on system. + + __rdtsc(): available for IA-32 and AMD64. + See "possible flaws and dangers" comments. + + times(): what we use for ticks. Should just read the last + (xtime) tick count, therefore should be fast, but usually + isn't. + + GetTickCount(): we use this for my_timer_ticks() on + Windows. Actually it really is a tick counter, so resolution + >= 10 milliseconds unless you have a very old Windows version. + With Windows 95 or 98 or ME, timeGetTime() has better resolution than + GetTickCount (1ms rather than 55ms). But with Windows NT or XP or 2000, + they're both getting from a variable in the Process Environment Block + (PEB), and the variable is set by the programmable interrupt timer, so + the resolution is the same (usually 10-15 milliseconds). Also timeGetTime + is slower on old machines: + http://www.doumo.jp/aon-java/jsp/postgretips/tips.jsp?tips=74. + Also timeGetTime requires linking winmm.lib, + Therefore we use GetTickCount. + It will overflow every 49 days because the return is 32-bit. + There is also a GetTickCount64 but it requires Vista or Windows Server 2008. + (As for GetSystemTimeAsFileTime, its precision is spurious, it + just reads the tick variable like the other functions do. + However, we don't expect it to overflow every 49 days, so we + will prefer it for my_timer_milliseconds().) + + QueryPerformanceCounter() we use this for my_timer_microseconds() + on Windows. 1-PIT-tick (often 1/3-microsecond). Usually reads + the PIT so it's slow. On some Windows variants, uses RDTSC. + + GetLocalTime() this is available on Windows but we don't use it. + + getclock(): documented for Alpha, but not found during tests. + + mach_absolute_time() and UpTime() are recommended for Apple. + Initially they weren't tried, because ppc_get_timebase seems to do the job. + But now we use mach_absolute_time for nanoseconds. + + Any clock-based timer can be affected by NPT (ntpd program), + which means: + - full-second correction can occur for leap second + - tiny corrections can occcur approimately every 11 minutes + (but I think they only affect the RTC which isn't the PIT). + + We define "precision" as "frequency" and "high precision" is + "frequency better than 1 microsecond". We define "resolution" + as a synonym for "granularity". We define "accuracy" as + "closeness to the truth" as established by some authoritative + clock, but we can't measure accuracy. + + Do not expect any of our timers to be monotonic; we + won't guarantee that they return constantly-increasing + unique numbers. + + We tested with AIX, Solaris (x86 + Sparc), Linux (x86 + + Itanium), Windows, 64-bit Windows, QNX, FreeBSD, HPUX, + Irix, Mac. We didn't test with SCO. + +*/ + diff --git a/mysys/my_read.c b/mysys/my_read.c new file mode 100644 index 00000000..6b4f17e9 --- /dev/null +++ b/mysys/my_read.c @@ -0,0 +1,112 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include + +/* + Read a chunk of bytes from a file with retry's if needed + + The parameters are: + File descriptor + Buffer to hold at least Count bytes + Bytes to read + Flags on what to do on error + + Return: + -1 on error + 0 if flag has bits MY_NABP or MY_FNABP set + N number of bytes read. +*/ + +size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) +{ + size_t readbytes, save_count= 0; + DBUG_ENTER("my_read"); + DBUG_PRINT("my",("fd: %d Buffer: %p Count: %lu MyFlags: %lu", + Filedes, Buffer, (ulong) Count, MyFlags)); + if (!(MyFlags & (MY_WME | MY_FAE | MY_FNABP))) + MyFlags|= my_global_flags; + + for (;;) + { + errno= 0; /* Linux, Windows don't reset this on EOF/success */ +#ifdef _WIN32 + readbytes= my_win_read(Filedes, Buffer, Count); +#else + readbytes= read(Filedes, Buffer, Count); +#endif + DBUG_EXECUTE_IF ("simulate_file_read_error", + { + errno= ENOSPC; + readbytes= (size_t) -1; + DBUG_SET("-d,simulate_file_read_error"); + DBUG_SET("-d,simulate_my_b_fill_error"); + }); + + if (readbytes != Count) + { + int got_errno= my_errno= errno; + DBUG_PRINT("warning",("Read only %d bytes off %lu from %d, errno: %d", + (int) readbytes, (ulong) Count, Filedes, + got_errno)); + + if (got_errno == 0 || (readbytes != (size_t) -1 && + (MyFlags & (MY_NABP | MY_FNABP)))) + my_errno= HA_ERR_FILE_TOO_SHORT; + + if ((readbytes == 0 || (int) readbytes == -1) && got_errno == EINTR) + { + DBUG_PRINT("debug", ("my_read() was interrupted and returned %ld", + (long) readbytes)); + continue; /* Interrupted */ + } + + /* Do a read retry if we didn't get enough data on first read */ + if (readbytes != (size_t) -1 && readbytes != 0 && + (MyFlags & MY_FULL_IO)) + { + Buffer+= readbytes; + Count-= readbytes; + save_count+= readbytes; + continue; + } + + if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) + { + if (readbytes == (size_t) -1) + my_error(EE_READ, + MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + my_filename(Filedes), got_errno); + else if (MyFlags & (MY_NABP | MY_FNABP)) + my_error(EE_EOFERR, + MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + my_filename(Filedes), got_errno); + } + if (readbytes == (size_t) -1 || + ((MyFlags & (MY_FNABP | MY_NABP)) && !(MyFlags & MY_FULL_IO))) + DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ + } + + if (MyFlags & (MY_NABP | MY_FNABP)) + readbytes= 0; /* Ok on read */ + else + readbytes+= save_count; + break; + } + DBUG_RETURN(readbytes); +} /* my_read */ diff --git a/mysys/my_redel.c b/mysys/my_redel.c new file mode 100644 index 00000000..3dacfff9 --- /dev/null +++ b/mysys/my_redel.c @@ -0,0 +1,155 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates + Copyright (c) 2009, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include +#include "mysys_err.h" +#if defined(HAVE_UTIME_H) +#include +#elif defined(HAVE_SYS_UTIME_H) +#include +#elif !defined(HPUX10) +struct utimbuf { + time_t actime; + time_t modtime; +}; +#endif + + /* + Rename with copy stat form old file + Copy stats from old file to new file, deletes original and + changes new file name to old file name + + if MY_REDEL_MAKE_COPY is given, then the original file + is renamed to org_name-'current_time'.BAK + */ + +#define REDEL_EXT ".BAK" + +int my_redel(const char *org_name, const char *tmp_name, + time_t backup_time_stamp, myf MyFlags) +{ + int error=1; + DBUG_ENTER("my_redel"); + DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %lu", + org_name,tmp_name,MyFlags)); + + if (!my_disable_copystat_in_redel && + my_copystat(org_name,tmp_name,MyFlags) < 0) + goto end; + if (MyFlags & MY_REDEL_MAKE_BACKUP) + { + char name_buff[FN_REFLEN + MY_BACKUP_NAME_EXTRA_LENGTH]; + my_create_backup_name(name_buff, org_name, backup_time_stamp); + if (my_rename(org_name, name_buff, MyFlags)) + goto end; + } + else if (my_delete(org_name, MyFlags)) + goto end; + if (my_rename(tmp_name,org_name,MyFlags)) + goto end; + + error=0; +end: + DBUG_RETURN(error); +} /* my_redel */ + + +/** + Copy stat from one file to another + @fn my_copystat() + @param from Copy stat from this file + @param to Copy stat to this file + @param MyFlags Flags: + MY_WME Give error if something goes wrong + MY_FAE Abort operation if something goes wrong + If MY_FAE is not given, we don't return -1 for + errors from chown (which normally require root + privilege) + + @return 0 ok + -1 if can't get stat, + 1 if wrong type of file +*/ + +int my_copystat(const char *from, const char *to, int MyFlags) +{ + MY_STAT statbuf; + + if (my_stat(from, &statbuf, MyFlags) == NULL) + return -1; /* Can't get stat on input file */ + + if ((statbuf.st_mode & S_IFMT) != S_IFREG) + return 1; + + /* Copy modes */ + if (chmod(to, statbuf.st_mode & 07777)) + { + my_errno= errno; + if (MyFlags & (MY_FAE+MY_WME)) + my_error(EE_CHANGE_PERMISSIONS, MYF(ME_BELL), from, errno); + return -1; + } + +#if !defined(_WIN32) + if (statbuf.st_nlink > 1 && MyFlags & MY_LINK_WARNING) + { + if (MyFlags & MY_LINK_WARNING) + my_error(EE_LINK_WARNING,MYF(ME_BELL),from,statbuf.st_nlink); + } + /* Copy ownership */ + if (chown(to, statbuf.st_uid, statbuf.st_gid)) + { + my_errno= errno; + if (MyFlags & MY_WME) + my_error(EE_CHANGE_OWNERSHIP, MYF(ME_BELL), from, errno); + if (MyFlags & MY_FAE) + return -1; + } +#endif /* !_WIN32 */ + + if (MyFlags & MY_COPYTIME) + { + struct utimbuf timep; + timep.actime = statbuf.st_atime; + timep.modtime = statbuf.st_mtime; + (void) utime((char*) to, &timep);/* Update last accessed and modified times */ + } + + return 0; +} /* my_copystat */ + + +/** + Create a backup file name. + @fn my_create_backup_name() + @param to Store new file name here + @param from Original name + + @info + The backup name is made by adding -YYMMDDHHMMSS.BAK to the file name +*/ + +void my_create_backup_name(char *to, const char *from, time_t backup_start) +{ + char ext[MY_BACKUP_NAME_EXTRA_LENGTH+1]; + ext[0]='-'; + get_date(ext+1, GETDATE_SHORT_DATE | GETDATE_HHMMSSTIME, backup_start); + strmov(strend(ext),REDEL_EXT); + strmov(strmov(to, from), ext); +} diff --git a/mysys/my_rename.c b/mysys/my_rename.c new file mode 100644 index 00000000..cc3ed727 --- /dev/null +++ b/mysys/my_rename.c @@ -0,0 +1,104 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include +#include "mysys_err.h" +#include "m_string.h" +#undef my_rename + + +#ifdef _WIN32 + +#define RENAME_MAX_RETRIES 50 + +/* + On Windows, bad 3rd party programs (backup or anitivirus, or something else) + can have file open with a sharing mode incompatible with renaming, i.e they + won't use FILE_SHARE_DELETE when opening file. + + The following function will do a couple of retries, in case MoveFileEx returns + ERROR_SHARING_VIOLATION. +*/ +static BOOL win_rename_with_retries(const char *from, const char *to) +{ + DBUG_INJECT_FILE_SHARING_VIOLATION(from); + + for (int retry= FILE_SHARING_VIOLATION_RETRIES; retry--;) + { + BOOL ret= MoveFileEx(from, to, + MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING); + + DBUG_CLEAR_FILE_SHARING_VIOLATION(); + + if (ret) + return ret; + + DWORD last_error= GetLastError(); + + if (last_error == ERROR_SHARING_VIOLATION || + last_error == ERROR_ACCESS_DENIED) + { + Sleep(FILE_SHARING_VIOLATION_DELAY_MS); + } + else + return ret; + } + return FALSE; +} +#endif + + /* On unix rename deletes to file if it exists */ +int my_rename(const char *from, const char *to, myf MyFlags) +{ + int error = 0; + DBUG_ENTER("my_rename"); + DBUG_PRINT("my",("from %s to %s MyFlags %lu", from, to, MyFlags)); + +#if defined(_WIN32) + if (!win_rename_with_retries(from, to)) + { + my_osmaperr(GetLastError()); +#elif defined(HAVE_RENAME) + if (rename(from,to)) + { +#else + if (link(from, to) || unlink(from)) + { +#endif + if (errno == ENOENT && !access(from, F_OK)) + my_errno= ENOTDIR; + else + my_errno= errno; + error = -1; + if (MyFlags & (MY_FAE+MY_WME)) + my_error(EE_LINK, MYF(ME_BELL),from,to,my_errno); + } + else if (MyFlags & MY_SYNC_DIR) + { +#ifdef NEED_EXPLICIT_SYNC_DIR + /* do only the needed amount of syncs: */ + char dir_from[FN_REFLEN], dir_to[FN_REFLEN]; + size_t dir_from_length, dir_to_length; + dirname_part(dir_from, from, &dir_from_length); + dirname_part(dir_to, to, &dir_to_length); + if (my_sync_dir(dir_from, MyFlags) || + (strcmp(dir_from, dir_to) && + my_sync_dir(dir_to, MyFlags))) + error= -1; +#endif + } + DBUG_RETURN(error); +} /* my_rename */ diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c new file mode 100644 index 00000000..b1dd8b1f --- /dev/null +++ b/mysys/my_rnd.c @@ -0,0 +1,66 @@ +/* Copyright (C) 2007 MySQL AB & Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include +#include + +/* + Initialize random generator + + NOTES + MySQL's password checks depends on this, so don't do any changes + that changes the random numbers that are generated! +*/ + +void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2) +{ +#ifdef HAVE_valgrind + bzero((char*) rand_st,sizeof(*rand_st)); /* Avoid UMC varnings */ +#endif + rand_st->max_value= 0x3FFFFFFFL; + rand_st->max_value_dbl=(double) rand_st->max_value; + rand_st->seed1=seed1%rand_st->max_value ; + rand_st->seed2=seed2%rand_st->max_value; +} + + +/* + Generate random number. + + SYNOPSIS + my_rnd() + rand_st INOUT Structure used for number generation + + RETURN VALUE + generated pseudo random number + + NOTE: + This is codes so that it can be called by two threads at the same time + with minimum impact. + (As the number is supposed to be random, it doesn't matter much if + rand->seed1 or rand->seed2 are updated with slightly wrong numbers or + if two threads gets the same number. +*/ + +double my_rnd(struct my_rnd_struct *rand_st) +{ + unsigned long seed1; + seed1= (rand_st->seed1*3+rand_st->seed2) % rand_st->max_value; + rand_st->seed2=(seed1+rand_st->seed2+33) % rand_st->max_value; + rand_st->seed1= seed1; + return (((double) seed1)/rand_st->max_value_dbl); +} + diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c new file mode 100644 index 00000000..7d37b707 --- /dev/null +++ b/mysys/my_safehash.c @@ -0,0 +1,298 @@ +/* Copyright (C) 2003-2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include "mysys_priv.h" +#include +#include "my_safehash.h" + +/***************************************************************************** + General functions to handle SAFE_HASH objects. + + A SAFE_HASH object is used to store the hash, the mutex and default value + needed by the rest of the key cache code. + This is a separate struct to make it easy to later reuse the code for other + purposes + + All entries are linked in a list to allow us to traverse all elements + and delete selected ones. (HASH doesn't allow any easy ways to do this). +*****************************************************************************/ + + +/* + Free a SAFE_HASH_ENTRY + + SYNOPSIS + safe_hash_entry_free() + entry The entry which should be freed + + NOTE + This function is called by the hash object on delete +*/ + +static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) +{ + DBUG_ENTER("safe_hash_entry_free"); + my_free(entry); + DBUG_VOID_RETURN; +} + + +/* + Get key and length for a SAFE_HASH_ENTRY + + SYNOPSIS + safe_hash_entry_get() + entry The entry for which the key should be returned + length Length of the key + + RETURN + # reference on the key +*/ + +static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length, + my_bool not_used __attribute__((unused))) +{ + *length= entry->length; + return (uchar*) entry->key; +} + + +/* + Init a SAFE_HASH object + + SYNOPSIS + safe_hash_init() + hash safe_hash handler + elements Expected max number of elements + default_value default value + + NOTES + In case of error we set hash->default_value to 0 to allow one to call + safe_hash_free on an object that couldn't be initialized. + + RETURN + 0 OK + 1 error +*/ + +my_bool safe_hash_init(SAFE_HASH *hash, uint elements, + uchar *default_value) +{ + DBUG_ENTER("safe_hash_init"); + if (my_hash_init(key_memory_SAFE_HASH_ENTRY, &hash->hash, &my_charset_bin, + elements, 0, 0, (my_hash_get_key) safe_hash_entry_get, + (void (*)(void*)) safe_hash_entry_free, 0)) + { + hash->default_value= 0; + DBUG_RETURN(1); + } + mysql_rwlock_init(key_SAFEHASH_mutex, &hash->mutex); + hash->default_value= default_value; + hash->root= 0; + DBUG_RETURN(0); +} + + +/* + Free a SAFE_HASH object + + SYNOPSIS + safe_hash_free() + hash Hash handle + + NOTES + This is safe to call on any object that has been sent to safe_hash_init() +*/ + +void safe_hash_free(SAFE_HASH *hash) +{ + /* + Test if safe_hash_init succeeded. This will also guard us against multiple + free calls. + */ + if (hash->default_value) + { + my_hash_free(&hash->hash); + mysql_rwlock_destroy(&hash->mutex); + hash->default_value=0; + } +} + + +/* + Return the value stored for a key or default value if no key + + SYNOPSIS + safe_hash_search() + hash Hash handle + key key (path to table etc..) + length Length of key + def Default value of data + + RETURN + # data associated with the key of default value if data was not found +*/ + +uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length, + uchar *def) +{ + uchar *result; + DBUG_ENTER("safe_hash_search"); + mysql_rwlock_rdlock(&hash->mutex); + result= my_hash_search(&hash->hash, key, length); + mysql_rwlock_unlock(&hash->mutex); + if (!result) + result= def; + else + result= ((SAFE_HASH_ENTRY*) result)->data; + DBUG_PRINT("exit",("data: %p", result)); + DBUG_RETURN(result); +} + + +/* + Associate a key with some data + + SYNOPSIS + safe_hash_set() + hash Hash handle + key key (path to table etc..) + length Length of key + data data to to associate with the data + + NOTES + This can be used both to insert a new entry and change an existing + entry. + If one associates a key with the default key cache, the key is deleted + + RETURN + 0 OK + 1 error (Can only be EOM). In this case my_message() is called. +*/ + +my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length, + uchar *data) +{ + SAFE_HASH_ENTRY *entry; + my_bool error= 0; + DBUG_ENTER("safe_hash_set"); + DBUG_PRINT("enter",("key: %.*s data: %p", length, key, data)); + + mysql_rwlock_wrlock(&hash->mutex); + entry= (SAFE_HASH_ENTRY*) my_hash_search(&hash->hash, key, length); + + if (data == hash->default_value) + { + /* + The key is to be associated with the default entry. In this case + we can just delete the entry (if it existed) from the hash as a + search will return the default entry + */ + if (!entry) /* nothing to do */ + goto end; + /* unlink entry from list */ + if ((*entry->prev= entry->next)) + entry->next->prev= entry->prev; + my_hash_delete(&hash->hash, (uchar*) entry); + goto end; + } + if (entry) + { + /* Entry existed; Just change the pointer to point at the new data */ + entry->data= data; + } + else + { + if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(key_memory_SAFE_HASH_ENTRY, + sizeof(*entry) + length, + MYF(MY_WME)))) + { + error= 1; + goto end; + } + entry->key= (uchar*) (entry +1); + memcpy((char*) entry->key, (char*) key, length); + entry->length= length; + entry->data= data; + /* Link entry to list */ + if ((entry->next= hash->root)) + entry->next->prev= &entry->next; + entry->prev= &hash->root; + hash->root= entry; + if (my_hash_insert(&hash->hash, (uchar*) entry)) + { + /* This can only happen if hash got out of memory */ + my_free(entry); + error= 1; + goto end; + } + } + +end: + mysql_rwlock_unlock(&hash->mutex); + DBUG_RETURN(error); +} + + +/* + Change all entries with one data value to another data value + + SYNOPSIS + safe_hash_change() + hash Hash handle + old_data Old data + new_data Change all 'old_data' to this + + NOTES + We use the linked list to traverse all elements in the hash as + this allows us to delete elements in the case where 'new_data' is the + default value. +*/ + +void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data) +{ + SAFE_HASH_ENTRY *entry, *next; + DBUG_ENTER("safe_hash_change"); + + mysql_rwlock_wrlock(&hash->mutex); + + for (entry= hash->root ; entry ; entry= next) + { + next= entry->next; + if (entry->data == old_data) + { + if (new_data == hash->default_value) + { + if ((*entry->prev= entry->next)) + entry->next->prev= entry->prev; + my_hash_delete(&hash->hash, (uchar*) entry); + } + else + entry->data= new_data; + } + } + + mysql_rwlock_unlock(&hash->mutex); + DBUG_VOID_RETURN; +} diff --git a/mysys/my_safehash.h b/mysys/my_safehash.h new file mode 100644 index 00000000..71aaf5d0 --- /dev/null +++ b/mysys/my_safehash.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include + +/* + Struct to store a key and pointer to object +*/ + +typedef struct st_safe_hash_entry +{ + uchar *key; + uint length; + uchar *data; + struct st_safe_hash_entry *next, **prev; +} SAFE_HASH_ENTRY; + + +typedef struct st_safe_hash_with_default +{ + mysql_rwlock_t mutex; + HASH hash; + uchar *default_value; + SAFE_HASH_ENTRY *root; +} SAFE_HASH; + + +my_bool safe_hash_init(SAFE_HASH *hash, uint elements, + uchar *default_value); +void safe_hash_free(SAFE_HASH *hash); +uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length, + uchar *def); +my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length, + uchar *data); +void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data); diff --git a/mysys/my_seek.c b/mysys/my_seek.c new file mode 100644 index 00000000..db364ccd --- /dev/null +++ b/mysys/my_seek.c @@ -0,0 +1,103 @@ +/* + Copyright (c) 2000, 2010, Oracle and/or its affiliates + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" + +/* + Seek to a position in a file. + + ARGUMENTS + File fd The file descriptor + my_off_t pos The expected position (absolute or relative) + int whence A direction parameter and one of + {SEEK_SET, SEEK_CUR, SEEK_END} + myf MyFlags MY_THREADSAFE must be set in case my_seek may be mixed + with my_pread/my_pwrite calls and fd is shared among + threads. + + DESCRIPTION + The my_seek function is a wrapper around the system call lseek and + repositions the offset of the file descriptor fd to the argument + offset according to the directive whence as follows: + SEEK_SET The offset is set to offset bytes. + SEEK_CUR The offset is set to its current location plus offset bytes + SEEK_END The offset is set to the size of the file plus offset bytes + + RETURN VALUE + my_off_t newpos The new position in the file. + MY_FILEPOS_ERROR An error was encountered while performing + the seek. my_errno is set to indicate the + actual error. +*/ + +my_off_t my_seek(File fd, my_off_t pos, int whence, myf MyFlags) +{ + os_off_t newpos= -1; + DBUG_ENTER("my_seek"); + DBUG_PRINT("my",("fd: %d Pos: %llu Whence: %d MyFlags: %lu", + fd, (ulonglong) pos, whence, MyFlags)); + DBUG_ASSERT(pos != MY_FILEPOS_ERROR); /* safety check */ + + /* + Make sure we are using a valid file descriptor! + */ + DBUG_ASSERT(fd != -1); +#ifdef _WIN32 + newpos= my_win_lseek(fd, pos, whence); +#else + newpos= lseek(fd, pos, whence); +#endif + if (newpos == (os_off_t) -1) + { + my_errno= errno; + if (MyFlags & MY_WME) + my_error(EE_CANT_SEEK, MYF(0), my_filename(fd), my_errno); + DBUG_PRINT("error", ("lseek: %llu errno: %d", (ulonglong) newpos, errno)); + DBUG_RETURN(MY_FILEPOS_ERROR); + } + if ((my_off_t) newpos != pos) + { + DBUG_PRINT("exit",("pos: %llu", (ulonglong) newpos)); + } + DBUG_RETURN((my_off_t) newpos); +} /* my_seek */ + + + /* Tell current position of file */ + /* ARGSUSED */ + +my_off_t my_tell(File fd, myf MyFlags) +{ + os_off_t pos; + DBUG_ENTER("my_tell"); + DBUG_PRINT("my",("fd: %d MyFlags: %lu",fd, MyFlags)); + DBUG_ASSERT(fd >= 0); +#if defined (HAVE_TELL) && !defined (_WIN32) && !defined(_AIX) + pos= tell(fd); +#else + pos= my_seek(fd, 0L, MY_SEEK_CUR,0); +#endif + if (pos == (os_off_t) -1) + { + my_errno= errno; + if (MyFlags & MY_WME) + my_error(EE_CANT_SEEK, MYF(0), my_filename(fd), my_errno); + DBUG_PRINT("error", ("tell: %llu errno: %d", (ulonglong) pos, my_errno)); + } + DBUG_PRINT("exit",("pos: %llu", (ulonglong) pos)); + DBUG_RETURN((my_off_t) pos); +} /* my_tell */ diff --git a/mysys/my_setuser.c b/mysys/my_setuser.c new file mode 100644 index 00000000..e35d6602 --- /dev/null +++ b/mysys/my_setuser.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#ifdef HAVE_PWD_H +#include +#endif +#ifdef HAVE_GRP_H +#include +#endif + +struct passwd *my_check_user(const char *user, myf MyFlags) +{ + struct passwd *user_info; + uid_t user_id= geteuid(); + DBUG_ENTER("my_check_user"); + + // Don't bother if we aren't superuser + if (user_id) + { + if (user) + { + /* Don't give a warning, if real user is same as given with --user */ + user_info= getpwnam(user); + if (!user_info || user_id != user_info->pw_uid) + { + my_errno= EPERM; + if (MyFlags & MY_WME) + my_printf_error(my_errno, "One can only use the --user switch if " + "running as root", MYF(ME_WARNING|ME_ERROR_LOG)); + } + } + DBUG_RETURN(NULL); + } + if (!user) + { + if (MyFlags & MY_FAE) + { + my_errno= EINVAL; + my_printf_error(my_errno, "Please consult the Knowledge Base to find " + "out how to run mysqld as root!", MYF(ME_ERROR_LOG)); + } + DBUG_RETURN(NULL); + } + if (!strcmp(user,"root")) + DBUG_RETURN(NULL); + + if (!(user_info= getpwnam(user))) + { + // Allow a numeric uid to be used + int err= 0; + user_id= my_strtoll10(user, NULL, &err); + if (err || !(user_info= getpwuid(user_id))) + { + my_errno= EINVAL; + my_printf_error(my_errno, "Can't change to run as user '%s'. Please " + "check that the user exists!", MYF(ME_ERROR_LOG), user); + DBUG_RETURN(NULL); + } + } + DBUG_ASSERT(user_info); + DBUG_RETURN(user_info); +} + +int my_set_user(const char *user, struct passwd *user_info, myf MyFlags) +{ + DBUG_ENTER("my_set_user"); + + DBUG_ASSERT(user_info != 0); +#ifdef HAVE_INITGROUPS + initgroups(user, user_info->pw_gid); +#endif + if (setgid(user_info->pw_gid) == -1 || setuid(user_info->pw_uid) == -1) + { + my_errno= errno; + if (MyFlags & MY_WME) + my_printf_error(errno, "Cannot change uid/gid (errno: %d)", MYF(ME_ERROR_LOG), + errno); + DBUG_RETURN(my_errno); + } + DBUG_RETURN(0); +} diff --git a/mysys/my_sleep.c b/mysys/my_sleep.c new file mode 100644 index 00000000..fff58e4a --- /dev/null +++ b/mysys/my_sleep.c @@ -0,0 +1,35 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Wait a given number of microseconds */ + +#include "mysys_priv.h" +#include + +void my_sleep(ulong m_seconds) +{ +#if defined(_WIN32) + Sleep(m_seconds/1000+1); /* Sleep() has millisecond arg */ +#elif defined(HAVE_SELECT) + struct timeval t; + t.tv_sec= m_seconds / 1000000L; + t.tv_usec= m_seconds % 1000000L; + select(0,0,0,0,&t); /* sleep */ +#else + uint sec= (uint) ((m_seconds + 999999L) / 1000000L); + ulong start= (ulong) time((time_t*) 0); + while ((ulong) time((time_t*) 0) < start+sec); +#endif +} diff --git a/mysys/my_static.c b/mysys/my_static.c new file mode 100644 index 00000000..d0f20a5c --- /dev/null +++ b/mysys/my_static.c @@ -0,0 +1,139 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2009, 2019, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Static variables for mysys library. All definied here for easy making of + a shared library +*/ + +#include "mysys_priv.h" +#include "my_static.h" +#include "my_alarm.h" + + +PSI_memory_key key_memory_DYNAMIC_STRING; +PSI_memory_key key_memory_IO_CACHE; +PSI_memory_key key_memory_KEY_CACHE; +PSI_memory_key key_memory_LIST; +PSI_memory_key key_memory_MY_BITMAP_bitmap; +PSI_memory_key key_memory_MY_DIR; +PSI_memory_key key_memory_MY_STAT; +PSI_memory_key key_memory_MY_TMPDIR_full_list; +PSI_memory_key key_memory_QUEUE; +PSI_memory_key key_memory_SAFE_HASH_ENTRY; +PSI_memory_key key_memory_THD_ALARM; +PSI_memory_key key_memory_TREE; +PSI_memory_key key_memory_charset_file; +PSI_memory_key key_memory_charset_loader; +PSI_memory_key key_memory_defaults; +PSI_memory_key key_memory_lf_dynarray; +PSI_memory_key key_memory_lf_node; +PSI_memory_key key_memory_lf_slist; +PSI_memory_key key_memory_max_alloca; +PSI_memory_key key_memory_my_compress_alloc; +PSI_memory_key key_memory_my_err_head; +PSI_memory_key key_memory_my_file_info; +PSI_memory_key key_memory_pack_frm; +PSI_memory_key key_memory_charsets; +PSI_memory_key key_memory_new= PSI_INSTRUMENT_MEM; + +#ifdef _WIN32 +PSI_memory_key key_memory_win_SECURITY_ATTRIBUTES; +PSI_memory_key key_memory_win_PACL; +PSI_memory_key key_memory_win_IP_ADAPTER_ADDRESSES; +#endif /* _WIN32 */ + + /* from my_init */ +char *home_dir=0; +char *mysql_data_home= (char*) "."; +const char *my_progname= NULL, *my_progname_short= NULL; +char curr_dir[FN_REFLEN]= {0}, + home_dir_buff[FN_REFLEN]= {0}; +ulong my_stream_opened=0,my_tmp_file_created=0; +ulong my_file_total_opened= 0; +int my_umask=0664, my_umask_dir=0777; +#ifdef _WIN32 +SECURITY_ATTRIBUTES my_dir_security_attributes= {sizeof(SECURITY_ATTRIBUTES),NULL,FALSE}; +#endif +myf my_global_flags= 0; +#ifndef DBUG_OFF +my_bool my_assert= 1; +#endif +my_bool my_assert_on_error= 0; +struct st_my_file_info my_file_info_default[MY_NFILE]; +uint my_file_limit= MY_NFILE; +int32 my_file_opened=0; +struct st_my_file_info *my_file_info= my_file_info_default; + + /* From mf_brkhant */ +int my_dont_interrupt=0; +volatile int _my_signals=0; +struct st_remember _my_sig_remember[MAX_SIGNALS]={{0,0}}; + + /* from mf_reccache.c */ +ulong my_default_record_cache_size=RECORD_CACHE_SIZE; + + /* from soundex.c */ + /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ + /* :::::::::::::::::::::::::: */ +const char *soundex_map= "01230120022455012623010202"; + + /* from my_malloc */ +USED_MEM* my_once_root_block=0; /* pointer to first block */ +uint my_once_extra=ONCE_ALLOC_INIT; /* Memory to alloc / block */ + + /* from my_alarm */ +int volatile my_have_got_alarm=0; /* declare variable to reset */ +ulong my_time_to_wait_for_lock=2; /* In seconds */ + + /* from errors.c */ +#ifdef SHARED_LIBRARY +const char *globerrs[GLOBERRS]; /* my_error_messages is here */ +#endif +void (*error_handler_hook)(uint error, const char *str, myf MyFlags)= + my_message_stderr; +void (*fatal_error_handler_hook)(uint error, const char *str, myf MyFlags)= + my_message_stderr; + +static void proc_info_dummy(void *a __attribute__((unused)), + const PSI_stage_info *b __attribute__((unused)), + PSI_stage_info *c __attribute__((unused)), + const char *d __attribute__((unused)), + const char *e __attribute__((unused)), + const unsigned int f __attribute__((unused))) +{ + return; +} + +/* this is to be able to call set_thd_proc_info from the C code */ +void (*proc_info_hook)(void *, const PSI_stage_info *, PSI_stage_info *, + const char *, const char *, const unsigned int)= proc_info_dummy; +void (*debug_sync_C_callback_ptr)(MYSQL_THD, const char *, size_t)= 0; + + /* How to disable options */ +my_bool my_disable_locking=0; +my_bool my_disable_sync=0; +my_bool my_disable_async_io=0; +my_bool my_disable_flush_key_blocks=0; +my_bool my_disable_symlinks=0; +my_bool my_disable_copystat_in_redel=0; + +/* Typelib by all clients */ +const char *sql_protocol_names_lib[] = +{ "TCP", "SOCKET", "PIPE", NullS }; + +TYPELIB sql_protocol_typelib ={ array_elements(sql_protocol_names_lib) - 1, "", + sql_protocol_names_lib, NULL }; diff --git a/mysys/my_static.h b/mysys/my_static.h new file mode 100644 index 00000000..b30540b1 --- /dev/null +++ b/mysys/my_static.h @@ -0,0 +1,48 @@ +#ifndef MYSYS_MY_STATIC_INCLUDED +#define MYSYS_MY_STATIC_INCLUDED + +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Static variables for mysys library. All definied here for easy making of + a shared library +*/ + +C_MODE_START +#include + +#define MAX_SIGNALS 10 /* Max signals under a dont-allow */ + +struct st_remember { + int number; + sig_handler (*func)(int number); +}; + +extern char curr_dir[FN_REFLEN], home_dir_buff[FN_REFLEN]; + +extern volatile int _my_signals; +extern struct st_remember _my_sig_remember[MAX_SIGNALS]; + +extern const char *soundex_map; + +extern USED_MEM* my_once_root_block; +extern uint my_once_extra; + +extern struct st_my_file_info my_file_info_default[MY_NFILE]; + +C_MODE_END + +#endif /* MYSYS_MY_STATIC_INCLUDED */ diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c new file mode 100644 index 00000000..8238e501 --- /dev/null +++ b/mysys/my_symlink.c @@ -0,0 +1,266 @@ +/* + Copyright (c) 2001, 2011, Oracle and/or its affiliates + Copyright (c) 2010, 2022, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include +#ifdef HAVE_REALPATH +#include +#include +#endif + +static int always_valid(const char *filename __attribute__((unused))) +{ + return 0; +} + +int (*mysys_test_invalid_symlink)(const char *filename)= always_valid; + + +/* + Reads the content of a symbolic link + If the file is not a symbolic link, return the original file name in to. + + RETURN + 0 If filename was a symlink, (to will be set to value of symlink) + 1 If filename was a normal file (to will be set to filename) + -1 on error. +*/ + +int my_readlink(char *to, const char *filename, myf MyFlags) +{ +#ifndef HAVE_READLINK + strmov(to,filename); + return 1; +#else + int result=0; + int length; + DBUG_ENTER("my_readlink"); + + if ((length=readlink(filename, to, FN_REFLEN-1)) < 0) + { + /* Don't give an error if this wasn't a symlink */ + if ((my_errno=errno) == EINVAL) + { + result= 1; + strmov(to,filename); + } + else + { + if (MyFlags & MY_WME) + my_error(EE_CANT_READLINK, MYF(0), filename, errno); + result= -1; + } + } + else + to[length]=0; + DBUG_PRINT("exit" ,("result: %d", result)); + DBUG_RETURN(result); +#endif /* HAVE_READLINK */ +} + + +/* Create a symbolic link */ + +int my_symlink(const char *content, const char *linkname, myf MyFlags) +{ +#ifndef HAVE_READLINK + return 0; +#else + int result; + DBUG_ENTER("my_symlink"); + DBUG_PRINT("enter",("content: %s linkname: %s", content, linkname)); + + result= 0; + if (symlink(content, linkname)) + { + result= -1; + my_errno=errno; + if (MyFlags & MY_WME) + my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno); + } + else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags)) + result= -1; + DBUG_RETURN(result); +#endif /* HAVE_READLINK */ +} + +#if defined(SCO) +#define BUFF_LEN 4097 +#elif defined(MAXPATHLEN) +#define BUFF_LEN MAXPATHLEN +#else +#define BUFF_LEN FN_LEN +#endif + + +int my_is_symlink(const char *filename __attribute__((unused))) +{ +#if defined (HAVE_LSTAT) && defined (S_ISLNK) + struct stat stat_buff; + if (lstat(filename, &stat_buff)) + return 0; + MSAN_STAT_WORKAROUND(&stat_buff); + return !!S_ISLNK(stat_buff.st_mode); +#elif defined (_WIN32) + DWORD dwAttr = GetFileAttributes(filename); + return (dwAttr != INVALID_FILE_ATTRIBUTES) && + (dwAttr & FILE_ATTRIBUTE_REPARSE_POINT); +#else /* No symlinks */ + return 0; +#endif +} + +/* + Resolve all symbolic links in path + 'to' may be equal to 'filename' + + to is guaranteed to never set to a string longer than FN_REFLEN + (including the end \0) + + On error returns -1, unless error is file not found, in which case it + is 1. + + Sets my_errno to specific error number. +*/ + +int my_realpath(char *to, const char *filename, myf MyFlags) +{ +#if defined(HAVE_REALPATH) && !defined(HAVE_BROKEN_REALPATH) + int result=0; + char buff[BUFF_LEN]; + char *ptr; + DBUG_ENTER("my_realpath"); + + DBUG_PRINT("info",("executing realpath")); + if ((ptr=realpath(filename,buff))) + strmake(to, ptr, FN_REFLEN-1); + else + { + /* + Realpath didn't work; Use my_load_path() which is a poor substitute + original name but will at least be able to resolve paths that starts + with '.'. + */ + if (MyFlags) + DBUG_PRINT("error",("realpath failed with errno: %d", errno)); + my_errno=errno; + if (MyFlags & MY_WME) + my_error(EE_REALPATH, MYF(0), filename, my_errno); + my_load_path(to, filename, NullS); + if (my_errno == ENOENT) + result= 1; + else + result= -1; + } + DBUG_RETURN(result); +#elif defined(_WIN32) + int ret= GetFullPathName(filename,FN_REFLEN, to, NULL); + if (ret == 0 || ret > FN_REFLEN) + { + my_errno= (ret > FN_REFLEN) ? ENAMETOOLONG : GetLastError(); + if (MyFlags & MY_WME) + my_error(EE_REALPATH, MYF(0), filename, my_errno); + /* + GetFullPathName didn't work : use my_load_path() which is a poor + substitute original name but will at least be able to resolve + paths that starts with '.'. + */ + my_load_path(to, filename, NullS); + return -1; + } +#else + my_load_path(to, filename, NullS); +#endif + return 0; +} + +#ifdef HAVE_OPEN_PARENT_DIR_NOSYMLINKS +/** opens the parent dir. walks the path, and does not resolve symlinks + + returns the pointer to the file name (basename) within the pathname + or NULL in case of an error + + stores the parent dir (dirname) file descriptor in pdfd. + It can be -1 even if there was no error! + + This is used for symlinked tables for DATA/INDEX DIRECTORY. + The paths there have been realpath()-ed. So, we can assume here that + + * `pathname` is an absolute path + * no '.', '..', and '//' in the path + * file exists +*/ + +const char *my_open_parent_dir_nosymlinks(const char *pathname, int *pdfd) +{ + char buf[FN_REFLEN + 1]; + char *s= buf, *e= buf+1, *end= strnmov(buf, pathname, sizeof(buf)); + int fd, dfd= -1; + + if (*end) + { + errno= ENAMETOOLONG; + return NULL; + } + + if (*s != '/') /* not an absolute path */ + { + errno= ENOENT; + return NULL; + } + + for (;;) + { + if (*e == '/') /* '//' in the path */ + { + errno= ENOENT; + goto err; + } + while (*e && *e != '/') + e++; + *e= 0; + + if (!memcmp(s, ".", 2) || !memcmp(s, "..", 3)) + { + errno= ENOENT; + goto err; + } + + if (++e >= end) + { + *pdfd= dfd; + return pathname + (s - buf); + } + + fd = openat(dfd, s, O_NOFOLLOW | O_PATH | O_CLOEXEC); + if (fd < 0) + goto err; + + if (dfd >= 0) + close(dfd); + + dfd= fd; + s= e; + } +err: + if (dfd >= 0) + close(dfd); + return NULL; +} +#endif diff --git a/mysys/my_symlink2.c b/mysys/my_symlink2.c new file mode 100644 index 00000000..0b580ecd --- /dev/null +++ b/mysys/my_symlink2.c @@ -0,0 +1,191 @@ +/* Copyright (c) 2000, 2001, 2003, 2005-2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Advanced symlink handling. + This is used in MyISAM to let users symlinks tables to different disk. + The main idea with these functions is to automatically create, delete and + rename files and symlinks like they would be one unit. +*/ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include + +File my_create_with_symlink(const char *linkname, const char *filename, + int createflags, int access_flags, myf MyFlags) +{ + File file; + int tmp_errno; + /* Test if we should create a link */ + int create_link; + char abs_linkname[FN_REFLEN]; + DBUG_ENTER("my_create_with_symlink"); + DBUG_PRINT("enter", ("linkname: %s filename: %s", + linkname ? linkname : "(NULL)", + filename ? filename : "(NULL)")); + + if (my_disable_symlinks) + { + DBUG_PRINT("info", ("Symlinks disabled")); + /* Create only the file, not the link and file */ + create_link= 0; + if (linkname) + filename= linkname; + } + else + { + if (linkname) + my_realpath(abs_linkname, linkname, MYF(0)); + create_link= (linkname && strcmp(abs_linkname,filename)); + } + + if (!(MyFlags & MY_DELETE_OLD)) + { + if (!access(filename,F_OK)) + { + my_errno= errno= EEXIST; + my_error(EE_CANTCREATEFILE, MYF(0), filename, EEXIST); + DBUG_RETURN(-1); + } + if (create_link && !access(linkname,F_OK)) + { + my_errno= errno= EEXIST; + my_error(EE_CANTCREATEFILE, MYF(0), linkname, EEXIST); + DBUG_RETURN(-1); + } + } + + if ((file=my_create(filename, createflags, access_flags, MyFlags)) >= 0) + { + if (create_link) + { + /* Delete old link/file */ + if (MyFlags & MY_DELETE_OLD) + my_delete(linkname, MYF(0)); + /* Create link */ + if (my_symlink(filename, linkname, MyFlags)) + { + /* Fail, remove everything we have done */ + tmp_errno=my_errno; + my_close(file,MYF(0)); + my_delete(filename, MYF(0)); + file= -1; + my_errno=tmp_errno; + } + } + } + DBUG_RETURN(file); +} + +/* + If the file is a normal file, just rename it. + If the file is a symlink: + - Create a new file with the name 'to' that points at + symlink_dir/basename(to) + - Rename the symlinked file to symlink_dir/basename(to) + - Delete 'from' + If something goes wrong, restore everything. +*/ + +int my_rename_with_symlink(const char *from, const char *to, myf MyFlags) +{ +#ifndef HAVE_READLINK + return my_rename(from, to, MyFlags); +#else + char link_name[FN_REFLEN], tmp_name[FN_REFLEN]; + int was_symlink= (!my_disable_symlinks && + !my_readlink(link_name, from, MYF(0))); + int result=0; + int name_is_different; + DBUG_ENTER("my_rename_with_symlink"); + + if (!was_symlink) + DBUG_RETURN(my_rename(from, to, MyFlags)); + + /* Change filename that symlink pointed to */ + strmov(tmp_name, to); + fn_same(tmp_name,link_name,1); /* Copy dir */ + name_is_different= strcmp(link_name, tmp_name); + if (name_is_different && !access(tmp_name, F_OK)) + { + my_errno= EEXIST; + if (MyFlags & MY_WME) + my_error(EE_CANTCREATEFILE, MYF(0), tmp_name, EEXIST); + DBUG_RETURN(1); + } + + /* Create new symlink */ + if (my_symlink(tmp_name, to, MyFlags)) + DBUG_RETURN(1); + + /* + Rename symlinked file if the base name didn't change. + This can happen if you use this function where 'from' and 'to' has + the same basename and different directories. + */ + + if (name_is_different && my_rename(link_name, tmp_name, MyFlags)) + { + int save_errno=my_errno; + my_delete(to, MyFlags); /* Remove created symlink */ + my_errno=save_errno; + DBUG_RETURN(1); + } + + /* Remove original symlink */ + if (my_delete(from, MyFlags)) + { + int save_errno=my_errno; + /* Remove created link */ + my_delete(to, MyFlags); + /* Rename file back */ + if (strcmp(link_name, tmp_name)) + (void) my_rename(tmp_name, link_name, MyFlags); + my_errno=save_errno; + result= 1; + } + DBUG_RETURN(result); +#endif /* HAVE_READLINK */ +} + +/** delete a - possibly symlinked - table file + + This is used to delete a file that is part of a table (e.g. MYI or MYD + file of MyISAM) when dropping a table. A file might be a symlink - + if the table was created with DATA DIRECTORY or INDEX DIRECTORY - + in this case both the symlink and the symlinked file are deleted, + but only if the symlinked file is not in the datadir. +*/ + +int my_handler_delete_with_symlink(const char *filename, myf sync_dir) +{ + char real[FN_REFLEN]; + int res= 0; + DBUG_ENTER("my_handler_delete_with_symlink"); + + if (my_is_symlink(filename)) + { + /* + Delete the symlinked file only if the symlink is not + pointing into datadir. + */ + if (!(my_realpath(real, filename, MYF(0)) || + mysys_test_invalid_symlink(real))) + res= my_delete(real, MYF(MY_NOSYMLINKS | sync_dir)); + } + DBUG_RETURN(my_delete(filename, sync_dir) || res); +} diff --git a/mysys/my_sync.c b/mysys/my_sync.c new file mode 100644 index 00000000..6f8760c3 --- /dev/null +++ b/mysys/my_sync.c @@ -0,0 +1,188 @@ +/* + Copyright (c) 2003, 2011, Oracle and/or its affiliates + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include + + +ulong my_sync_count; /* Count number of sync calls */ + +static void (*before_sync_wait)(void)= 0; +static void (*after_sync_wait)(void)= 0; + +void thr_set_sync_wait_callback(void (*before_wait)(void), + void (*after_wait)(void)) +{ + before_sync_wait= before_wait; + after_sync_wait= after_wait; +} + +/* + Sync data in file to disk + + SYNOPSIS + my_sync() + fd File descritor to sync + my_flags Flags (now only MY_WME is supported) + + NOTE + If file system supports its, only file data is synced, not inode data. + + MY_IGNORE_BADFD is useful when fd is "volatile" - not protected by a + mutex. In this case by the time of fsync(), fd may be already closed by + another thread, or even reassigned to a different file. With this flag - + MY_IGNORE_BADFD - such a situation will not be considered an error. + (which is correct behaviour, if we know that the other thread synced the + file before closing) + + RETURN + 0 ok + -1 error +*/ + +int my_sync(File fd, myf my_flags) +{ + int res; + DBUG_ENTER("my_sync"); + DBUG_PRINT("my",("fd: %d my_flags: %lu", fd, my_flags)); + + if (my_disable_sync) + DBUG_RETURN(0); + + statistic_increment(my_sync_count,&THR_LOCK_open); + + if (before_sync_wait) + (*before_sync_wait)(); + + do + { +#if defined(F_FULLFSYNC) + /* + In Mac OS X >= 10.3 this call is safer than fsync() (it forces the + disk's cache and guarantees ordered writes). + */ + if (!(res= fcntl(fd, F_FULLFSYNC, 0))) + break; /* ok */ + /* Some file systems don't support F_FULLFSYNC and fail above: */ + DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back")); +#endif +#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC + res= fdatasync(fd); +#elif defined(HAVE_FSYNC) + res= fsync(fd); + if (res == -1 && errno == ENOLCK) + res= 0; /* Result Bug in Old FreeBSD */ +#elif defined(_WIN32) + res= my_win_fsync(fd); +#else +#error Cannot find a way to sync a file, durability in danger + res= 0; /* No sync (strange OS) */ +#endif + } while (res == -1 && errno == EINTR); + + if (res) + { + int er= errno; + if (!(my_errno= er)) + my_errno= -1; /* Unknown error */ + if (after_sync_wait) + (*after_sync_wait)(); + if ((my_flags & MY_IGNORE_BADFD) && + (er == EBADF || er == EINVAL || er == EROFS)) + { + DBUG_PRINT("info", ("ignoring errno %d", er)); + res= 0; + } + else if (my_flags & MY_WME) + my_error(EE_SYNC, MYF(ME_BELL), my_filename(fd), my_errno); + } + else + { + if (after_sync_wait) + (*after_sync_wait)(); + } + DBUG_RETURN(res); +} /* my_sync */ + + +/* + Force directory information to disk. + + SYNOPSIS + my_sync_dir() + dir_name the name of the directory + my_flags flags (MY_WME etc) + + RETURN + 0 if ok, !=0 if error +*/ + +int my_sync_dir(const char *dir_name __attribute__((unused)), + myf my_flags __attribute__((unused))) +{ +#ifdef NEED_EXPLICIT_SYNC_DIR + static const char cur_dir_name[]= {FN_CURLIB, 0}; + File dir_fd; + int res= 0; + const char *correct_dir_name; + DBUG_ENTER("my_sync_dir"); + DBUG_PRINT("my",("Dir: '%s' my_flags: %lu", dir_name, my_flags)); + /* Sometimes the path does not contain an explicit directory */ + correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name; + /* + Syncing a dir may give EINVAL on tmpfs on Linux, which is ok. + EIO on the other hand is very important. Hence MY_IGNORE_BADFD. + */ + if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0) + { + if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD))) + res= 2; + if (my_close(dir_fd, MYF(my_flags))) + res= 3; + } + else + res= 1; + DBUG_RETURN(res); +#else + return 0; +#endif +} + +/* + Force directory information to disk. + + SYNOPSIS + my_sync_dir_by_file() + file_name the name of a file in the directory + my_flags flags (MY_WME etc) + + RETURN + 0 if ok, !=0 if error +*/ + +int my_sync_dir_by_file(const char *file_name __attribute__((unused)), + myf my_flags __attribute__((unused))) +{ +#ifdef NEED_EXPLICIT_SYNC_DIR + char dir_name[FN_REFLEN]; + size_t dir_name_length; + dirname_part(dir_name, file_name, &dir_name_length); + return my_sync_dir(dir_name, my_flags & ~MY_NOSYMLINKS); +#else + return 0; +#endif +} diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c new file mode 100644 index 00000000..2e8decd7 --- /dev/null +++ b/mysys/my_thr_init.c @@ -0,0 +1,614 @@ +/* Copyright (c) 2000, 2011 Oracle and/or its affiliates. + Copyright 2008, 2021, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Functions to handle initializating and allocationg of all mysys & debug + thread variables. +*/ + +#include "mysys_priv.h" +#include +#include + +pthread_key(struct st_my_thread_var*, THR_KEY_mysys=-1); +mysql_mutex_t THR_LOCK_malloc, THR_LOCK_open, + THR_LOCK_lock, THR_LOCK_myisam, THR_LOCK_heap, + THR_LOCK_net, THR_LOCK_charset, THR_LOCK_threads, + THR_LOCK_myisam_mmap; + +mysql_cond_t THR_COND_threads; +uint THR_thread_count= 0; +uint my_thread_end_wait_time= 5; +#if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) +mysql_mutex_t LOCK_localtime_r; +#endif +#ifdef _MSC_VER +static void install_sigabrt_handler(); +#endif + +/** True if @c my_thread_global_init() has been called. */ +static my_bool my_thread_global_init_done= 0; +/* True if THR_KEY_mysys is created */ +my_bool my_thr_key_mysys_exists= 0; + + +/* + These are mutexes not used by safe_mutex or my_thr_init.c + + We want to free these earlier than other mutex so that safe_mutex + can detect if all mutex and memory is freed properly. +*/ + +static void my_thread_init_common_mutex(void) +{ + mysql_mutex_init(key_THR_LOCK_open, &THR_LOCK_open, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_THR_LOCK_lock, &THR_LOCK_lock, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_THR_LOCK_myisam, &THR_LOCK_myisam, MY_MUTEX_INIT_SLOW); + mysql_mutex_init(key_THR_LOCK_myisam_mmap, &THR_LOCK_myisam_mmap, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_THR_LOCK_heap, &THR_LOCK_heap, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_THR_LOCK_net, &THR_LOCK_net, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_THR_LOCK_charset, &THR_LOCK_charset, MY_MUTEX_INIT_FAST); +#if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) + mysql_mutex_init(key_LOCK_localtime_r, &LOCK_localtime_r, MY_MUTEX_INIT_SLOW); +#endif +} + +void my_thread_destroy_common_mutex(void) +{ + mysql_mutex_destroy(&THR_LOCK_open); + mysql_mutex_destroy(&THR_LOCK_lock); + mysql_mutex_destroy(&THR_LOCK_myisam); + mysql_mutex_destroy(&THR_LOCK_myisam_mmap); + mysql_mutex_destroy(&THR_LOCK_heap); + mysql_mutex_destroy(&THR_LOCK_net); + mysql_mutex_destroy(&THR_LOCK_charset); +#if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) + mysql_mutex_destroy(&LOCK_localtime_r); +#endif +} + + +/* + These mutexes are used by my_thread_init() and after + my_thread_destroy_mutex() +*/ + +static void my_thread_init_internal_mutex(void) +{ + mysql_mutex_init(key_THR_LOCK_threads, &THR_LOCK_threads, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_THR_LOCK_malloc, &THR_LOCK_malloc, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_THR_COND_threads, &THR_COND_threads, NULL); +} + +void my_thread_destroy_internal_mutex(void) +{ + mysql_mutex_destroy(&THR_LOCK_threads); + mysql_mutex_destroy(&THR_LOCK_malloc); + mysql_cond_destroy(&THR_COND_threads); +} + +static void my_thread_init_thr_mutex(struct st_my_thread_var *var) +{ + mysql_mutex_init(key_my_thread_var_mutex, &var->mutex, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_my_thread_var_suspend, &var->suspend, NULL); +} + +static void my_thread_destory_thr_mutex(struct st_my_thread_var *var) +{ + mysql_mutex_destroy(&var->mutex); + mysql_cond_destroy(&var->suspend); +} + + +/** + Re-initialize components initialized early with @c my_thread_global_init. + Some mutexes were initialized before the instrumentation. + Destroy + create them again, now that the instrumentation + is in place. + This is safe, since this function() is called before creating new threads, + so the mutexes are not in use. +*/ +void my_thread_global_reinit(void) +{ + struct st_my_thread_var *tmp; + + DBUG_ASSERT(my_thread_global_init_done); + +#ifdef HAVE_PSI_INTERFACE + my_init_mysys_psi_keys(); +#endif + + my_thread_destroy_common_mutex(); + my_thread_init_common_mutex(); + + my_thread_destroy_internal_mutex(); + my_thread_init_internal_mutex(); + + tmp= my_thread_var; + DBUG_ASSERT(tmp); + + my_thread_destory_thr_mutex(tmp); + my_thread_init_thr_mutex(tmp); +} + +/* + initialize thread environment + + SYNOPSIS + my_thread_global_init() + + RETURN + 0 ok + 1 error (Couldn't create THR_KEY_mysys) +*/ + +my_bool my_thread_global_init(void) +{ + int pth_ret; + + /* Normally this should never be called twice */ + DBUG_ASSERT(my_thread_global_init_done == 0); + if (my_thread_global_init_done) + return 0; + my_thread_global_init_done= 1; + + /* + THR_KEY_mysys is deleted in my_end() as DBUG libraries are using it even + after my_thread_global_end() is called. + my_thr_key_mysys_exist is used to protect against application like QT + that calls my_thread_global_init() + my_thread_global_end() multiple times + without calling my_init() + my_end(). + */ + if (!my_thr_key_mysys_exists && + (pth_ret= pthread_key_create(&THR_KEY_mysys, NULL)) != 0) + { + fprintf(stderr, "Can't initialize threads: error %d\n", pth_ret); + return 1; + } + my_thr_key_mysys_exists= 1; + + /* Mutex used by my_thread_init() and after my_thread_destroy_mutex() */ + my_thread_init_internal_mutex(); + + if (my_thread_init()) + return 1; + + + my_thread_init_common_mutex(); + + return 0; +} + + +/** + End the mysys thread system. Called when ending the last thread +*/ + +void my_thread_global_end(void) +{ + struct timespec abstime; + my_bool all_threads_killed= 1; + + set_timespec(abstime, my_thread_end_wait_time); + mysql_mutex_lock(&THR_LOCK_threads); + while (THR_thread_count > 0) + { + int error= mysql_cond_timedwait(&THR_COND_threads, &THR_LOCK_threads, + &abstime); + if (error == ETIMEDOUT || error == ETIME) + { +#ifdef HAVE_PTHREAD_KILL + /* + We shouldn't give an error here, because if we don't have + pthread_kill(), programs like mysqld can't ensure that all threads + are killed when we enter here. + */ + if (THR_thread_count) + fprintf(stderr, + "Error in my_thread_global_end(): %d threads didn't exit\n", + THR_thread_count); +#endif + all_threads_killed= 0; + break; + } + } + mysql_mutex_unlock(&THR_LOCK_threads); + + my_thread_destroy_common_mutex(); + + /* + Only destroy the mutex & conditions if we don't have other threads around + that could use them. + */ + if (all_threads_killed) + { + my_thread_destroy_internal_mutex(); + } + my_thread_global_init_done= 0; +} + +static my_thread_id thread_id= 0; + +/* + Allocate thread specific memory for the thread, used by mysys and dbug + + SYNOPSIS + my_thread_init() + + NOTES + We can't use mutex_locks here if we are using windows as + we may have compiled the program with SAFE_MUTEX, in which + case the checking of mutex_locks will not work until + the pthread_self thread specific variable is initialized. + + This function may called multiple times for a thread, for example + if one uses my_init() followed by mysql_server_init(). + + RETURN + 0 ok + 1 Fatal error; mysys/dbug functions can't be used +*/ + +my_bool my_thread_init(void) +{ + struct st_my_thread_var *tmp; + my_bool error=0; + + if (!my_thread_global_init_done) + return 1; /* cannot proceed with uninitialized library */ + +#ifdef EXTRA_DEBUG_THREADS + fprintf(stderr,"my_thread_init(): pthread_self: %p\n", pthread_self()); +#endif + + if (my_thread_var) + { +#ifdef EXTRA_DEBUG_THREADS + fprintf(stderr,"my_thread_init() called more than once in thread 0x%lx\n", + (long) pthread_self()); +#endif + goto end; + } + +#ifdef _MSC_VER + install_sigabrt_handler(); +#endif + + if (!(tmp= (struct st_my_thread_var *) calloc(1, sizeof(*tmp)))) + { + error= 1; + goto end; + } + set_mysys_var(tmp); + tmp->pthread_self= pthread_self(); + my_thread_init_thr_mutex(tmp); + + tmp->stack_ends_here= (char*)&tmp + + STACK_DIRECTION * (long)my_thread_stack_size; + + mysql_mutex_lock(&THR_LOCK_threads); + tmp->id= tmp->dbug_id= ++thread_id; + ++THR_thread_count; + mysql_mutex_unlock(&THR_LOCK_threads); + tmp->init= 1; +#ifndef DBUG_OFF + /* Generate unique name for thread */ + (void) my_thread_name(); +#endif + +end: + return error; +} + + +/* + Deallocate memory used by the thread for book-keeping + + SYNOPSIS + my_thread_end() + + NOTE + This may be called multiple times for a thread. + This happens for example when one calls 'mysql_server_init()' + mysql_server_end() and then ends with a mysql_end(). +*/ + +void my_thread_end(void) +{ + struct st_my_thread_var *tmp; + tmp= my_thread_var; + +#ifdef EXTRA_DEBUG_THREADS + fprintf(stderr,"my_thread_end(): tmp: %p pthread_self: %p thread_id: %ld\n", + tmp, pthread_self(), tmp ? (long) tmp->id : 0L); +#endif + + /* + Remove the instrumentation for this thread. + This must be done before trashing st_my_thread_var, + because the LF_HASH depends on it. + */ + PSI_CALL_delete_current_thread(); + + /* + We need to disable DBUG early for this thread to ensure that the + the mutex calls doesn't enable it again + To this we have to both do DBUG_POP() and also reset THR_KEY_mysys + as the key is used by DBUG. + */ + DBUG_POP(); + set_mysys_var(NULL); + + if (tmp && tmp->init) + { +#if !defined(DBUG_OFF) + /* tmp->dbug is allocated inside DBUG library */ + if (tmp->dbug) + { + free(tmp->dbug); + tmp->dbug=0; + } +#endif + my_thread_destory_thr_mutex(tmp); + + /* + Decrement counter for number of running threads. We are using this + in my_thread_global_end() to wait until all threads have called + my_thread_end and thus freed all memory they have allocated in + my_thread_init() and DBUG_xxxx + */ + mysql_mutex_lock(&THR_LOCK_threads); + DBUG_ASSERT(THR_thread_count != 0); + if (--THR_thread_count == 0) + mysql_cond_signal(&THR_COND_threads); + mysql_mutex_unlock(&THR_LOCK_threads); + + /* Trash variable so that we can detect false accesses to my_thread_var */ + tmp->init= 2; + free(tmp); + } +} + +struct st_my_thread_var *_my_thread_var(void) +{ + return my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys); +} + +int set_mysys_var(struct st_my_thread_var *mysys_var) +{ + return my_pthread_setspecific_ptr(THR_KEY_mysys, mysys_var); +} + +/**************************************************************************** + Get name of current thread. +****************************************************************************/ + +my_thread_id my_thread_dbug_id() +{ + /* + We need to do this test as some system thread may not yet have called + my_thread_init(). + */ + struct st_my_thread_var *tmp= my_thread_var; + return tmp ? tmp->dbug_id : 0; +} + +#ifdef DBUG_OFF +const char *my_thread_name(void) +{ + return "no_name"; +} + +#else + +const char *my_thread_name(void) +{ + char name_buff[100]; + struct st_my_thread_var *tmp=my_thread_var; + if (!tmp->name[0]) + { + my_thread_id id= my_thread_dbug_id(); + snprintf(name_buff, sizeof(name_buff), "T@%lu", (ulong) id); + strmake_buf(tmp->name, name_buff); + } + return tmp->name; +} + +/* Return pointer to DBUG for holding current state */ + +extern void **my_thread_var_dbug() +{ + struct st_my_thread_var *tmp; + if (!my_thread_global_init_done) + return NULL; + tmp= my_thread_var; + return tmp && tmp->init ? &tmp->dbug : 0; +} +#endif /* DBUG_OFF */ + +/* Return pointer to mutex_in_use */ + +safe_mutex_t **my_thread_var_mutex_in_use() +{ + struct st_my_thread_var *tmp; + if (!my_thread_global_init_done) + return NULL; + tmp= my_thread_var; + return tmp ? &tmp->mutex_in_use : 0; +} + +#ifdef _WIN32 +/* + In Visual Studio 2005 and later, default SIGABRT handler will overwrite + any unhandled exception filter set by the application and will try to + call JIT debugger. This is not what we want, this we calling __debugbreak + to stop in debugger, if process is being debugged or to generate + EXCEPTION_BREAKPOINT and then handle_segfault will do its magic. +*/ + +#if (_MSC_VER >= 1400) +static void my_sigabrt_handler(int sig) +{ + __debugbreak(); +} +#endif /*_MSC_VER >=1400 */ + +static void install_sigabrt_handler(void) +{ +#if (_MSC_VER >=1400) + /*abort() should not override our exception filter*/ + _set_abort_behavior(0,_CALL_REPORTFAULT); + signal(SIGABRT,my_sigabrt_handler); +#endif /* _MSC_VER >=1400 */ +} +#endif + +#ifdef HAVE_PSI_MUTEX_INTERFACE +ATTRIBUTE_COLD int psi_mutex_lock(mysql_mutex_t *that, + const char *file, uint line) +{ + PSI_mutex_locker_state state; + PSI_mutex_locker *locker= PSI_MUTEX_CALL(start_mutex_wait) + (&state, that->m_psi, PSI_MUTEX_LOCK, file, line); +# ifdef SAFE_MUTEX + int result= safe_mutex_lock(&that->m_mutex, FALSE, file, line); +# else + int result= pthread_mutex_lock(&that->m_mutex); +# endif + if (locker) + PSI_MUTEX_CALL(end_mutex_wait)(locker, result); + return result; +} + +ATTRIBUTE_COLD int psi_mutex_trylock(mysql_mutex_t *that, + const char *file, uint line) +{ + PSI_mutex_locker_state state; + PSI_mutex_locker *locker= PSI_MUTEX_CALL(start_mutex_wait) + (&state, that->m_psi, PSI_MUTEX_TRYLOCK, file, line); +# ifdef SAFE_MUTEX + int result= safe_mutex_lock(&that->m_mutex, TRUE, file, line); +# else + int result= pthread_mutex_trylock(&that->m_mutex); +# endif + if (locker) + PSI_MUTEX_CALL(end_mutex_wait)(locker, result); + return result; +} +#endif /* HAVE_PSI_MUTEX_INTERFACE */ + +#ifdef HAVE_PSI_RWLOCK_INTERFACE +ATTRIBUTE_COLD +int psi_rwlock_rdlock(mysql_rwlock_t *that, const char *file, uint line) +{ + PSI_rwlock_locker_state state; + PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_rdwait) + (&state, that->m_psi, PSI_RWLOCK_READLOCK, file, line); + int result= rw_rdlock(&that->m_rwlock); + if (locker) + PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result); + return result; +} + +ATTRIBUTE_COLD +int psi_rwlock_tryrdlock(mysql_rwlock_t *that, const char *file, uint line) +{ + PSI_rwlock_locker_state state; + PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_rdwait) + (&state, that->m_psi, PSI_RWLOCK_TRYREADLOCK, file, line); + int result= rw_tryrdlock(&that->m_rwlock); + if (locker) + PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result); + return result; +} + +ATTRIBUTE_COLD +int psi_rwlock_trywrlock(mysql_rwlock_t *that, const char *file, uint line) +{ + PSI_rwlock_locker_state state; + PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_wrwait) + (&state, that->m_psi, PSI_RWLOCK_TRYWRITELOCK, file, line); + int result= rw_trywrlock(&that->m_rwlock); + if (locker) + PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result); + return result; +} + +ATTRIBUTE_COLD +int psi_rwlock_wrlock(mysql_rwlock_t *that, const char *file, uint line) +{ + PSI_rwlock_locker_state state; + PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_wrwait) + (&state, that->m_psi, PSI_RWLOCK_WRITELOCK, file, line); + int result= rw_wrlock(&that->m_rwlock); + if (locker) + PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result); + return result; +} + +# ifndef DISABLE_MYSQL_PRLOCK_H +ATTRIBUTE_COLD +int psi_prlock_rdlock(mysql_prlock_t *that, const char *file, uint line) +{ + PSI_rwlock_locker_state state; + PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_rdwait) + (&state, that->m_psi, PSI_RWLOCK_READLOCK, file, line); + int result= rw_pr_rdlock(&that->m_prlock); + if (locker) + PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result); + return result; +} + +ATTRIBUTE_COLD +int psi_prlock_wrlock(mysql_prlock_t *that, const char *file, uint line) +{ + PSI_rwlock_locker_state state; + PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_wrwait) + (&state, that->m_psi, PSI_RWLOCK_WRITELOCK, file, line); + int result= rw_pr_wrlock(&that->m_prlock); + if (locker) + PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result); + return result; +} +# endif /* !DISABLE_MYSQL_PRLOCK_H */ +#endif /* HAVE_PSI_RWLOCK_INTERFACE */ + +#ifdef HAVE_PSI_COND_INTERFACE +ATTRIBUTE_COLD int psi_cond_wait(mysql_cond_t *that, mysql_mutex_t *mutex, + const char *file, uint line) +{ + PSI_cond_locker_state state; + PSI_cond_locker *locker= PSI_COND_CALL(start_cond_wait) + (&state, that->m_psi, mutex->m_psi, PSI_COND_WAIT, file, line); + int result= my_cond_wait(&that->m_cond, &mutex->m_mutex); + if (locker) + PSI_COND_CALL(end_cond_wait)(locker, result); + return result; +} + +ATTRIBUTE_COLD int psi_cond_timedwait(mysql_cond_t *that, mysql_mutex_t *mutex, + const struct timespec *abstime, + const char *file, uint line) +{ + PSI_cond_locker_state state; + PSI_cond_locker *locker= PSI_COND_CALL(start_cond_wait) + (&state, that->m_psi, mutex->m_psi, PSI_COND_TIMEDWAIT, file, line); + int result= my_cond_timedwait(&that->m_cond, &mutex->m_mutex, abstime); + if (psi_likely(locker)) + PSI_COND_CALL(end_cond_wait)(locker, result); + return result; +} +#endif /* HAVE_PSI_COND_INTERFACE */ diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c new file mode 100644 index 00000000..7925f801 --- /dev/null +++ b/mysys/my_uuid.c @@ -0,0 +1,226 @@ +/* Copyright (C) 2007 MySQL AB, Sergei Golubchik & Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + implements Universal Unique Identifiers (UUIDs), as in + DCE 1.1: Remote Procedure Call, + Open Group Technical Standard Document Number C706, October 1997, + (supersedes C309 DCE: Remote Procedure Call 8/1994, + which was basis for ISO/IEC 11578:1996 specification) + + A UUID has the following structure: + + Field NDR Data Type Octet # Note + time_low unsigned long 0-3 The low field of the + timestamp. + time_mid unsigned short 4-5 The middle field of + the timestamp. + time_hi_and_version unsigned short 6-7 The high field of the + timestamp multiplexed + with the version number. + clock_seq_hi_and_reserved unsigned small 8 The high field of the + clock sequence multi- + plexed with the variant. + clock_seq_low unsigned small 9 The low field of the + clock sequence. + node character 10-15 The spatially unique node + identifier. +*/ + +#include "mysys_priv.h" +#include +#include +#include /* mi_int2store, mi_int4store */ + +static my_bool my_uuid_inited= 0; +static struct my_rnd_struct uuid_rand; +static uint nanoseq; +static ulonglong uuid_time= 0; +static longlong interval_timer_offset; +static uchar uuid_suffix[2+6]; /* clock_seq and node */ + +static mysql_mutex_t LOCK_uuid_generator; + +/* + Number of 100-nanosecond intervals between + 1582-10-15 00:00:00.00 and 1970-01-01 00:00:00.00 +*/ + +#define UUID_TIME_OFFSET ((ulonglong) 141427 * 24 * 60 * 60 * \ + 1000 * 1000 * 10) +#define UUID_VERSION 0x1000 +#define UUID_VARIANT 0x8000 + + +/* Helper function */ + +static void set_clock_seq() +{ + uint16 clock_seq= ((uint)(my_rnd(&uuid_rand)*16383)) | UUID_VARIANT; + mi_int2store(uuid_suffix, clock_seq); + interval_timer_offset= (my_hrtime().val * 10 - my_interval_timer()/100 + + UUID_TIME_OFFSET); +} + + +/** + Init structures needed for my_uuid + + @func my_uuid_init() + @param seed1 Seed for random generator + @param seed2 Seed for random generator + + @note + Seed1 & seed2 should NOT depend on clock. This is to be able to + generate a random mac address according to UUID specs. +*/ + +void my_uuid_init(ulong seed1, ulong seed2) +{ + uchar *mac= uuid_suffix+2; + ulonglong now; + + if (my_uuid_inited) + return; + my_uuid_inited= 1; + now= my_interval_timer()/100 + interval_timer_offset; + nanoseq= 0; + + if (my_gethwaddr(mac)) + { + uint i; + /* + Generating random "hardware addr" + + Specs explicitly specify that node identifier should NOT + correlate with a clock_seq value, so we use a separate + randominit() here. + */ + /* purecov: begin inspected */ + my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), (ulong) (now+rand())); + for (i=0; i < array_elements(uuid_suffix) -2 ; i++) + mac[i]= (uchar)(my_rnd(&uuid_rand)*255); + /* purecov: end */ + } + my_rnd_init(&uuid_rand, (ulong) (seed1 + now), (ulong) (now/2+ getpid())); + set_clock_seq(); + mysql_mutex_init(key_LOCK_uuid_generator, &LOCK_uuid_generator, MY_MUTEX_INIT_FAST); +} + + +/** + Create a global unique identifier (uuid) + + @func my_uuid() + @param to Store uuid here. Must be of size MY_UUID_SIZE (16) +*/ + +void my_uuid(uchar *to) +{ + ulonglong tv; + uint32 time_low; + uint16 time_mid, time_hi_and_version; + + DBUG_ASSERT(my_uuid_inited); + + mysql_mutex_lock(&LOCK_uuid_generator); + tv= my_interval_timer()/100 + interval_timer_offset + nanoseq; + + if (likely(tv > uuid_time)) + { + /* + Current time is ahead of last timestamp, as it should be. + If we "borrowed time", give it back, just as long as we + stay ahead of the previous timestamp. + */ + if (nanoseq) + { + ulong delta; + DBUG_ASSERT((tv > uuid_time) && (nanoseq > 0)); + /* + -1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time) + */ + delta= MY_MIN(nanoseq, (ulong)(tv - uuid_time -1)); + tv-= delta; + nanoseq-= delta; + } + } + else + { + if (unlikely(tv == uuid_time)) + { + /* + For low-res system clocks. If several requests for UUIDs + end up on the same tick, we add a nano-second to make them + different. + ( current_timestamp + nanoseq * calls_in_this_period ) + may end up > next_timestamp; this is OK. Nonetheless, we'll + try to unwind nanoseq when we get a chance to. + If nanoseq overflows, we'll start over with a new numberspace + (so the if() below is needed so we can avoid the ++tv and thus + match the follow-up if() if nanoseq overflows!). + */ + if (likely(++nanoseq)) + ++tv; + } + + if (unlikely(tv <= uuid_time)) + { + /* + If the admin changes the system clock (or due to Daylight + Saving Time), the system clock may be turned *back* so we + go through a period once more for which we already gave out + UUIDs. To avoid duplicate UUIDs despite potentially identical + times, we make a new random component. + We also come here if the nanoseq "borrowing" overflows. + In either case, we throw away any nanoseq borrowing since it's + irrelevant in the new numberspace. + */ + set_clock_seq(); + tv= my_interval_timer()/100 + interval_timer_offset; + nanoseq= 0; + DBUG_PRINT("uuid",("making new numberspace")); + } + } + + uuid_time=tv; + mysql_mutex_unlock(&LOCK_uuid_generator); + + time_low= (uint32) (tv & 0xFFFFFFFF); + time_mid= (uint16) ((tv >> 32) & 0xFFFF); + time_hi_and_version= (uint16) ((tv >> 48) | UUID_VERSION); + + /* + Note, that the standard does NOT specify byte ordering in + multi-byte fields. it's implementation defined (but must be + the same for all fields). + We use big-endian, so we can use memcmp() to compare UUIDs + and for straightforward UUID to string conversion. + */ + mi_int4store(to, time_low); + mi_int2store(to+4, time_mid); + mi_int2store(to+6, time_hi_and_version); + bmove(to+8, uuid_suffix, sizeof(uuid_suffix)); +} + + +void my_uuid_end() +{ + if (my_uuid_inited) + { + my_uuid_inited= 0; + mysql_mutex_destroy(&LOCK_uuid_generator); + } +} diff --git a/mysys/my_win_popen.cc b/mysys/my_win_popen.cc new file mode 100644 index 00000000..cceb77e9 --- /dev/null +++ b/mysys/my_win_popen.cc @@ -0,0 +1,170 @@ +/* 2019, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/* + Replacement of the buggy implementations of popen in Windows CRT +*/ +#include +#include +#include +#include +#include +#include + +enum +{ + REDIRECT_STDIN= 'w', + REDIRECT_STDOUT= 'r' +}; + +/** Map from FILE* returned by popen() to corresponding process handle.*/ +static std::unordered_map popen_map; +/* Mutex to protect the map.*/ +static std::mutex popen_mtx; + +/** +Creates a FILE* from HANDLE. +*/ +static FILE *make_fp(HANDLE *handle, const char *mode) +{ + int flags = 0; + + if (mode[0] == REDIRECT_STDOUT) + flags |= O_RDONLY; + switch (mode[1]) + { + case 't': + flags |= _O_TEXT; + break; + case 'b': + flags |= _O_BINARY; + break; + } + + int fd= _open_osfhandle((intptr_t) *handle, flags); + if (fd < 0) + return NULL; + FILE *fp= fdopen(fd, mode); + if (!fp) + { + /* Closing file descriptor also closes underlying handle.*/ + close(fd); + *handle= 0; + } + return fp; +} + +/** A home-backed version of popen(). */ +extern "C" FILE *my_win_popen(const char *cmd, const char *mode) +{ + FILE *fp(0); + char type= mode[0]; + HANDLE parent_pipe_end(0); + HANDLE child_pipe_end(0); + PROCESS_INFORMATION pi{}; + STARTUPINFO si{}; + std::string command_line; + + /* Create a pipe between this and child process.*/ + SECURITY_ATTRIBUTES sa_attr{}; + sa_attr.nLength= sizeof(SECURITY_ATTRIBUTES); + sa_attr.bInheritHandle= TRUE; + switch (type) + { + case REDIRECT_STDIN: + if (!CreatePipe(&child_pipe_end, &parent_pipe_end, &sa_attr, 0)) + goto error; + break; + case REDIRECT_STDOUT: + if (!CreatePipe(&parent_pipe_end, &child_pipe_end, &sa_attr, 0)) + goto error; + break; + default: + /* Unknown mode, expected "r", "rt", "w", "wt" */ + abort(); + } + if (!SetHandleInformation(parent_pipe_end, HANDLE_FLAG_INHERIT, 0)) + goto error; + + /* Start child process with redirected output.*/ + + si.cb= sizeof(STARTUPINFO); + si.hStdError= GetStdHandle(STD_ERROR_HANDLE); + si.hStdOutput= (type == REDIRECT_STDOUT) ? child_pipe_end + : GetStdHandle(STD_OUTPUT_HANDLE); + si.hStdInput= (type == REDIRECT_STDIN) ? child_pipe_end + : GetStdHandle(STD_INPUT_HANDLE); + + si.dwFlags|= STARTF_USESTDHANDLES; + command_line.append("cmd.exe /c ").append(cmd); + + if (!CreateProcess(0, (LPSTR) command_line.c_str(), 0, 0, TRUE, 0, 0, 0, &si, + &pi)) + goto error; + + CloseHandle(pi.hThread); + CloseHandle(child_pipe_end); + child_pipe_end= 0; + + fp= make_fp(&parent_pipe_end, mode); + if (fp) + { + std::unique_lock lk(popen_mtx); + popen_map[fp]= pi.hProcess; + return fp; + } + +error: + for (auto handle : { parent_pipe_end, child_pipe_end }) + { + if (handle) + CloseHandle(handle); + } + + if (pi.hProcess) + { + TerminateProcess(pi.hProcess, 1); + CloseHandle(pi.hProcess); + } + return NULL; +} + +/** A home-backed version of pclose(). */ + +extern "C" int my_win_pclose(FILE *fp) +{ + /* Find process entry for given file pointer.*/ + std::unique_lock lk(popen_mtx); + HANDLE proc= popen_map[fp]; + if (!proc) + { + errno= EINVAL; + return -1; + } + popen_map.erase(fp); + lk.unlock(); + + fclose(fp); + + /* Wait for process to complete, return its exit code.*/ + DWORD ret; + if (WaitForSingleObject(proc, INFINITE) || !GetExitCodeProcess(proc, &ret)) + { + ret= -1; + errno= EINVAL; + } + CloseHandle(proc); + return ret; +} diff --git a/mysys/my_wincond.c b/mysys/my_wincond.c new file mode 100644 index 00000000..978be9db --- /dev/null +++ b/mysys/my_wincond.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. + Copyright (c) 2011, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/***************************************************************************** +** The following is a simple implementation of posix conditions +*****************************************************************************/ +#if defined(_WIN32) + +#undef SAFE_MUTEX /* Avoid safe_mutex redefinitions */ +#include "mysys_priv.h" +#include +#include +#include + + +/** + Convert abstime to milliseconds +*/ + +static DWORD get_milliseconds(const struct timespec *abstime) +{ + struct timespec current_time; + long long ms; + + if (abstime == NULL) + return INFINITE; + + set_timespec_nsec(current_time, 0); + ms= (abstime->tv_sec - current_time.tv_sec)*1000LL + + (abstime->tv_nsec - current_time.tv_nsec)/1000000LL; + if(ms < 0 ) + ms= 0; + if(ms > UINT_MAX) + ms= INFINITE; + return (DWORD)ms; +} + +int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) +{ + InitializeConditionVariable(cond); + return 0; +} + + +int pthread_cond_destroy(pthread_cond_t *cond) +{ + return 0; +} + + +int pthread_cond_broadcast(pthread_cond_t *cond) +{ + WakeAllConditionVariable(cond); + return 0; +} + + +int pthread_cond_signal(pthread_cond_t *cond) +{ + WakeConditionVariable(cond); + return 0; +} + + +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime) +{ + DWORD timeout= get_milliseconds(abstime); + if (!SleepConditionVariableCS(cond, mutex, timeout)) + return ETIMEDOUT; + return 0; +} + + +int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + return pthread_cond_timedwait(cond, mutex, NULL); +} + + +int pthread_attr_init(pthread_attr_t *connect_att) +{ + connect_att->dwStackSize = 0; + connect_att->dwCreatingFlag = 0; + return 0; +} + +int pthread_attr_setstacksize(pthread_attr_t *connect_att,size_t stack) +{ + DBUG_ASSERT(stack < UINT_MAX); + connect_att->dwStackSize=(DWORD)stack; + return 0; +} + +int pthread_attr_destroy(pthread_attr_t *connect_att) +{ + bzero((uchar*) connect_att,sizeof(*connect_att)); + return 0; +} + +#endif /* _WIN32 */ diff --git a/mysys/my_winerr.c b/mysys/my_winerr.c new file mode 100644 index 00000000..dd4156da --- /dev/null +++ b/mysys/my_winerr.c @@ -0,0 +1,126 @@ +/* Copyright (c) 2008 MySQL AB, 2009 Sun Microsystems, Inc. + Use is subject to license terms. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Convert Windows API error (GetLastError() to Posix equivalent (errno) + The exported function my_osmaperr() is modelled after and borrows + heavily from undocumented _dosmaperr()(found of the static Microsoft C runtime). +*/ + +#include +#include + + +struct errentry +{ + unsigned long oscode; /* OS return value */ + int sysv_errno; /* System V error code */ +}; + +static struct errentry errtable[]= { + { ERROR_INVALID_FUNCTION, EINVAL }, /* 1 */ + { ERROR_FILE_NOT_FOUND, ENOENT }, /* 2 */ + { ERROR_PATH_NOT_FOUND, ENOENT }, /* 3 */ + { ERROR_TOO_MANY_OPEN_FILES, EMFILE }, /* 4 */ + { ERROR_ACCESS_DENIED, EACCES }, /* 5 */ + { ERROR_INVALID_HANDLE, EBADF }, /* 6 */ + { ERROR_ARENA_TRASHED, ENOMEM }, /* 7 */ + { ERROR_NOT_ENOUGH_MEMORY, ENOMEM }, /* 8 */ + { ERROR_INVALID_BLOCK, ENOMEM }, /* 9 */ + { ERROR_BAD_ENVIRONMENT, E2BIG }, /* 10 */ + { ERROR_BAD_FORMAT, ENOEXEC }, /* 11 */ + { ERROR_INVALID_ACCESS, EINVAL }, /* 12 */ + { ERROR_INVALID_DATA, EINVAL }, /* 13 */ + { ERROR_INVALID_DRIVE, ENOENT }, /* 15 */ + { ERROR_CURRENT_DIRECTORY, EACCES }, /* 16 */ + { ERROR_NOT_SAME_DEVICE, EXDEV }, /* 17 */ + { ERROR_NO_MORE_FILES, ENOENT }, /* 18 */ + { ERROR_LOCK_VIOLATION, EACCES }, /* 33 */ + { ERROR_BAD_NETPATH, ENOENT }, /* 53 */ + { ERROR_NETWORK_ACCESS_DENIED, EACCES }, /* 65 */ + { ERROR_BAD_NET_NAME, ENOENT }, /* 67 */ + { ERROR_FILE_EXISTS, EEXIST }, /* 80 */ + { ERROR_CANNOT_MAKE, EACCES }, /* 82 */ + { ERROR_FAIL_I24, EACCES }, /* 83 */ + { ERROR_INVALID_PARAMETER, EINVAL }, /* 87 */ + { ERROR_NO_PROC_SLOTS, EAGAIN }, /* 89 */ + { ERROR_DRIVE_LOCKED, EACCES }, /* 108 */ + { ERROR_BROKEN_PIPE, EPIPE }, /* 109 */ + { ERROR_DISK_FULL, ENOSPC }, /* 112 */ + { ERROR_INVALID_TARGET_HANDLE, EBADF }, /* 114 */ + { ERROR_INVALID_HANDLE, EINVAL }, /* 124 */ + { ERROR_WAIT_NO_CHILDREN, ECHILD }, /* 128 */ + { ERROR_CHILD_NOT_COMPLETE, ECHILD }, /* 129 */ + { ERROR_DIRECT_ACCESS_HANDLE, EBADF }, /* 130 */ + { ERROR_NEGATIVE_SEEK, EINVAL }, /* 131 */ + { ERROR_SEEK_ON_DEVICE, EACCES }, /* 132 */ + { ERROR_DIR_NOT_EMPTY, ENOTEMPTY }, /* 145 */ + { ERROR_NOT_LOCKED, EACCES }, /* 158 */ + { ERROR_BAD_PATHNAME, ENOENT }, /* 161 */ + { ERROR_MAX_THRDS_REACHED, EAGAIN }, /* 164 */ + { ERROR_LOCK_FAILED, EACCES }, /* 167 */ + { ERROR_ALREADY_EXISTS, EEXIST }, /* 183 */ + { ERROR_FILENAME_EXCED_RANGE, ENOENT }, /* 206 */ + { ERROR_NESTING_NOT_ALLOWED, EAGAIN }, /* 215 */ + { ERROR_FILE_SYSTEM_LIMITATION, EFBIG }, /* 665 */ + { ERROR_NO_SYSTEM_RESOURCES, ENOMEM }, /* 1450 */ + { ERROR_NOT_ENOUGH_QUOTA, ENOMEM } /* 1816 */ +}; + +/* size of the table */ +#define ERRTABLESIZE (sizeof(errtable)/sizeof(errtable[0])) + +/* The following two constants must be the minimum and maximum +values in the (contiguous) range of Exec Failure errors. */ +#define MIN_EXEC_ERROR ERROR_INVALID_STARTING_CODESEG +#define MAX_EXEC_ERROR ERROR_INFLOOP_IN_RELOC_CHAIN + +/* These are the low and high value in the range of errors that are +access violations */ +#define MIN_EACCES_RANGE ERROR_WRITE_PROTECT +#define MAX_EACCES_RANGE ERROR_SHARING_BUFFER_EXCEEDED + + +static int get_errno_from_oserr(unsigned long oserrno) +{ + size_t i; + + /* check the table for the OS error code */ + for (i= 0; i < ERRTABLESIZE; ++i) + { + if (oserrno == errtable[i].oscode) + { + return errtable[i].sysv_errno; + } + } + + /* The error code wasn't in the table. We check for a range of */ + /* EACCES errors or exec failure errors (ENOEXEC). Otherwise */ + /* EINVAL is returned. */ + + if (oserrno >= MIN_EACCES_RANGE && oserrno <= MAX_EACCES_RANGE) + return EACCES; + else if (oserrno >= MIN_EXEC_ERROR && oserrno <= MAX_EXEC_ERROR) + return ENOEXEC; + else + return EINVAL; +} + +/* Set errno corresponding to GetLastError() value */ +void my_osmaperr ( unsigned long oserrno) +{ + errno= get_errno_from_oserr(oserrno); +} diff --git a/mysys/my_winfile.c b/mysys/my_winfile.c new file mode 100644 index 00000000..35bc6b35 --- /dev/null +++ b/mysys/my_winfile.c @@ -0,0 +1,738 @@ +/* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + The purpose of this file is to provide implementation of file IO routines on + Windows that can be thought as drop-in replacement for corresponding C runtime + functionality. + + Compared to Windows CRT, this one + - does not have the same file descriptor + limitation (default is 16384 and can be increased further, whereas CRT poses + a hard limit of 2048 file descriptors) + - the file operations are not serialized + - positional IO pread/pwrite is ported here. + - no text mode for files, all IO is "binary" + + Naming convention: + All routines are prefixed with my_win_, e.g Posix open() is implemented with + my_win_open() + + Implemented are + - POSIX routines(e.g open, read, lseek ...) + - Some ANSI C stream routines (fopen, fdopen, fileno, fclose) + - Windows CRT equvalients (my_get_osfhandle, open_osfhandle) + + Worth to note: + - File descriptors used here are located in a range that is not compatible + with CRT on purpose. Attempt to use a file descriptor from Windows CRT library + range in my_win_* function will be punished with DBUG_ASSERT() + + - File streams (FILE *) are actually from the C runtime. The routines provided + here are useful only in scernarios that use low-level IO with my_win_fileno() +*/ + +#ifdef _WIN32 + +#include "mysys_priv.h" +#include +#include + +/* Associates a file descriptor with an existing operating-system file handle.*/ +File my_open_osfhandle(HANDLE handle, int oflag) +{ + int offset= -1; + uint i; + DBUG_ENTER("my_open_osfhandle"); + + mysql_mutex_lock(&THR_LOCK_open); + for(i= MY_FILE_MIN; i < my_file_limit;i++) + { + if(my_file_info[i].fhandle == 0) + { + struct st_my_file_info *finfo= &(my_file_info[i]); + finfo->type= FILE_BY_OPEN; + finfo->fhandle= handle; + finfo->oflag= oflag; + offset= i; + break; + } + } + mysql_mutex_unlock(&THR_LOCK_open); + if(offset == -1) + errno= EMFILE; /* to many file handles open */ + DBUG_RETURN(offset); +} + + +static void invalidate_fd(File fd) +{ + DBUG_ENTER("invalidate_fd"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + my_file_info[fd].fhandle= 0; + DBUG_VOID_RETURN; +} + + +/* Get Windows handle for a file descriptor */ +HANDLE my_get_osfhandle(File fd) +{ + DBUG_ENTER("my_get_osfhandle"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + DBUG_RETURN(my_file_info[fd].fhandle); +} + + +static int my_get_open_flags(File fd) +{ + DBUG_ENTER("my_get_open_flags"); + DBUG_ASSERT(fd >= MY_FILE_MIN && fd < (int)my_file_limit); + DBUG_RETURN(my_file_info[fd].oflag); +} + +/* + CreateFile with retry logic. + + Uses retries, to avoid or reduce CreateFile errors + with ERROR_SHARING_VIOLATION, in case the file is opened + by another process, which used incompatible sharing + flags when opening. + + See Windows' CreateFile() documentation for details. +*/ +static HANDLE my_create_file_with_retries( + LPCSTR lpFileName, DWORD dwDesiredAccess, + DWORD dwShareMode, + LPSECURITY_ATTRIBUTES lpSecurityAttributes, + DWORD dwCreationDisposition, + DWORD dwFlagsAndAttributes, + HANDLE hTemplateFile) +{ + int retries; + DBUG_INJECT_FILE_SHARING_VIOLATION(lpFileName); + + for (retries = FILE_SHARING_VIOLATION_RETRIES;;) + { + HANDLE h= CreateFile(lpFileName, dwDesiredAccess, dwShareMode, + lpSecurityAttributes, dwCreationDisposition, + dwFlagsAndAttributes, hTemplateFile); + DBUG_CLEAR_FILE_SHARING_VIOLATION(); + + if (h != INVALID_HANDLE_VALUE || + GetLastError() != ERROR_SHARING_VIOLATION || --retries == 0) + return h; + + Sleep(FILE_SHARING_VIOLATION_DELAY_MS); + } + return INVALID_HANDLE_VALUE; +} + +/* + Default security attributes for files and directories + Usually NULL, but can be set + - by either mysqld --bootstrap when started from + mysql_install_db.exe, and creating windows service + - or by mariabackup --copy-back. + + The objective in both cases is to fix file or directory + privileges for those files that are outside of the usual + datadir, so that unprivileged service account has full + access to the files. +*/ +LPSECURITY_ATTRIBUTES my_win_file_secattr() +{ + return my_dir_security_attributes.lpSecurityDescriptor? + &my_dir_security_attributes : NULL; +} + + +/* + Open a file with sharing. Similar to _sopen() from libc, but allows managing + share delete on win32 + + SYNOPSIS + my_win_sopen() + path file name + oflag operation flags + shflag share flag + pmode permission flags + + RETURN VALUE + File descriptor of opened file if success + -1 and sets errno if fails. +*/ + +File my_win_sopen(const char *path, int oflag, int shflag, int pmode) +{ + int fh; /* handle of opened file */ + int mask; + HANDLE osfh; /* OS handle of opened file */ + DWORD fileaccess; /* OS file access (requested) */ + DWORD fileshare; /* OS file sharing mode */ + DWORD filecreate; /* OS method of opening/creating */ + DWORD fileattrib; /* OS file attribute flags */ + + DBUG_ENTER("my_win_sopen"); + + if (check_if_legal_filename(path)) + { + errno= EACCES; + DBUG_RETURN(-1); + } + + /* decode the access flags */ + switch (oflag & (_O_RDONLY | _O_WRONLY | _O_RDWR)) { + case _O_RDONLY: /* read access */ + fileaccess= GENERIC_READ; + break; + case _O_WRONLY: /* write access */ + fileaccess= GENERIC_WRITE; + break; + case _O_RDWR: /* read and write access */ + fileaccess= GENERIC_READ | GENERIC_WRITE; + break; + default: /* error, bad oflag */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode sharing flags */ + switch (shflag) { + case _SH_DENYRW: /* exclusive access except delete */ + fileshare= FILE_SHARE_DELETE; + break; + case _SH_DENYWR: /* share read and delete access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_DELETE; + break; + case _SH_DENYRD: /* share write and delete access */ + fileshare= FILE_SHARE_WRITE | FILE_SHARE_DELETE; + break; + case _SH_DENYNO: /* share read, write and delete access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; + break; + case _SH_DENYRWD: /* exclusive access */ + fileshare= 0L; + break; + case _SH_DENYWRD: /* share read access */ + fileshare= FILE_SHARE_READ; + break; + case _SH_DENYRDD: /* share write access */ + fileshare= FILE_SHARE_WRITE; + break; + case _SH_DENYDEL: /* share read and write access */ + fileshare= FILE_SHARE_READ | FILE_SHARE_WRITE; + break; + default: /* error, bad shflag */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode open/create method flags */ + switch (oflag & (_O_CREAT | _O_EXCL | _O_TRUNC)) { + case 0: + case _O_EXCL: /* ignore EXCL w/o CREAT */ + filecreate= OPEN_EXISTING; + break; + + case _O_CREAT: + filecreate= OPEN_ALWAYS; + break; + + case _O_CREAT | _O_EXCL: + case _O_CREAT | _O_TRUNC | _O_EXCL: + filecreate= CREATE_NEW; + break; + + case _O_TRUNC: + case _O_TRUNC | _O_EXCL: /* ignore EXCL w/o CREAT */ + filecreate= TRUNCATE_EXISTING; + break; + + case _O_CREAT | _O_TRUNC: + filecreate= CREATE_ALWAYS; + break; + + default: + /* this can't happen ... all cases are covered */ + errno= EINVAL; + DBUG_RETURN(-1); + } + + /* decode file attribute flags if _O_CREAT was specified */ + fileattrib= FILE_ATTRIBUTE_NORMAL; /* default */ + if (oflag & _O_CREAT) + { + _umask((mask= _umask(0))); + + if (!((pmode & ~mask) & _S_IWRITE)) + fileattrib= FILE_ATTRIBUTE_READONLY; + } + + /* Set temporary file (delete-on-close) attribute if requested. */ + if (oflag & _O_TEMPORARY) + { + fileattrib|= FILE_FLAG_DELETE_ON_CLOSE; + fileaccess|= DELETE; + } + + /* Set temporary file (delay-flush-to-disk) attribute if requested.*/ + if (oflag & _O_SHORT_LIVED) + fileattrib|= FILE_ATTRIBUTE_TEMPORARY; + + /* Set sequential or random access attribute if requested. */ + if (oflag & _O_SEQUENTIAL) + fileattrib|= FILE_FLAG_SEQUENTIAL_SCAN; + else if (oflag & _O_RANDOM) + fileattrib|= FILE_FLAG_RANDOM_ACCESS; + + /* try to open/create the file */ + if ((osfh= my_create_file_with_retries(path, fileaccess, fileshare,my_win_file_secattr(), + filecreate, fileattrib, NULL)) == INVALID_HANDLE_VALUE) + { + DWORD last_error= GetLastError(); + if (last_error == ERROR_PATH_NOT_FOUND && strlen(path) >= MAX_PATH) + errno= ENAMETOOLONG; + else + my_osmaperr(last_error); /* map error */ + DBUG_RETURN(-1); + } + + if ((fh= my_open_osfhandle(osfh, + oflag & (_O_APPEND | _O_RDONLY | _O_TEXT))) == -1) + { + CloseHandle(osfh); + } + + DBUG_RETURN(fh); /* return handle */ +} + + +File my_win_open(const char *path, int flags) +{ + DBUG_ENTER("my_win_open"); + DBUG_RETURN(my_win_sopen((char *) path, flags | _O_BINARY, _SH_DENYNO, + _S_IREAD | S_IWRITE)); +} + + +int my_win_close(File fd) +{ + DBUG_ENTER("my_win_close"); + if(CloseHandle(my_get_osfhandle(fd))) + { + invalidate_fd(fd); + DBUG_RETURN(0); + } + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + + +size_t my_win_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset) +{ + DWORD nBytesRead; + HANDLE hFile; + OVERLAPPED ov= {0}; + LARGE_INTEGER li; + + DBUG_ENTER("my_win_pread"); + + if(!Count) + DBUG_RETURN(0); +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + li.QuadPart= offset; + ov.Offset= li.LowPart; + ov.OffsetHigh= li.HighPart; + + if(!ReadFile(hFile, Buffer, (DWORD)Count, &nBytesRead, &ov)) + { + DWORD lastError= GetLastError(); + /* + ERROR_BROKEN_PIPE is returned when no more data coming + through e.g. a command pipe in windows : see MSDN on ReadFile. + */ + if(lastError == ERROR_HANDLE_EOF || lastError == ERROR_BROKEN_PIPE) + DBUG_RETURN(0); /*return 0 at EOF*/ + my_osmaperr(lastError); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nBytesRead); +} + + +size_t my_win_read(File Filedes, uchar *Buffer, size_t Count) +{ + DWORD nBytesRead; + HANDLE hFile; + + DBUG_ENTER("my_win_read"); + if(!Count) + DBUG_RETURN(0); +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + + if(!ReadFile(hFile, Buffer, (DWORD)Count, &nBytesRead, NULL)) + { + DWORD lastError= GetLastError(); + /* + ERROR_BROKEN_PIPE is returned when no more data coming + through e.g. a command pipe in windows : see MSDN on ReadFile. + */ + if(lastError == ERROR_HANDLE_EOF || lastError == ERROR_BROKEN_PIPE) + DBUG_RETURN(0); /*return 0 at EOF*/ + my_osmaperr(lastError); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nBytesRead); +} + + +size_t my_win_pwrite(File Filedes, const uchar *Buffer, size_t Count, + my_off_t offset) +{ + DWORD nBytesWritten; + HANDLE hFile; + OVERLAPPED ov= {0}; + LARGE_INTEGER li; + + DBUG_ENTER("my_win_pwrite"); + DBUG_PRINT("my",("Filedes: %d, Buffer: %p, Count: %llu, offset: %llu", + Filedes, Buffer, (ulonglong)Count, (ulonglong)offset)); + + if(!Count) + DBUG_RETURN(0); + +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + hFile= (HANDLE)my_get_osfhandle(Filedes); + li.QuadPart= offset; + ov.Offset= li.LowPart; + ov.OffsetHigh= li.HighPart; + + if(!WriteFile(hFile, Buffer, (DWORD)Count, &nBytesWritten, &ov)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN((size_t)-1); + } + else + DBUG_RETURN(nBytesWritten); +} + + +my_off_t my_win_lseek(File fd, my_off_t pos, int whence) +{ + LARGE_INTEGER offset; + LARGE_INTEGER newpos; + + DBUG_ENTER("my_win_lseek"); + + /* Check compatibility of Windows and Posix seek constants */ + compile_time_assert(FILE_BEGIN == SEEK_SET && FILE_CURRENT == SEEK_CUR + && FILE_END == SEEK_END); + + offset.QuadPart= pos; + if(!SetFilePointerEx(my_get_osfhandle(fd), offset, &newpos, whence)) + { + my_osmaperr(GetLastError()); + newpos.QuadPart= -1; + } + DBUG_RETURN(newpos.QuadPart); +} + + +#ifndef FILE_WRITE_TO_END_OF_FILE +#define FILE_WRITE_TO_END_OF_FILE 0xffffffff +#endif +size_t my_win_write(File fd, const uchar *Buffer, size_t Count) +{ + DWORD nWritten; + OVERLAPPED ov; + OVERLAPPED *pov= NULL; + HANDLE hFile; + + DBUG_ENTER("my_win_write"); + DBUG_PRINT("my",("Filedes: %d, Buffer: %p, Count %llu", fd, Buffer, + (ulonglong)Count)); + + if(!Count) + DBUG_RETURN(0); + +#ifdef _WIN64 + if(Count > UINT_MAX) + Count= UINT_MAX; +#endif + + if(my_get_open_flags(fd) & _O_APPEND) + { + /* + Atomic append to the end of file is is done by special initialization of + the OVERLAPPED structure. See MSDN WriteFile documentation for more info. + */ + memset(&ov, 0, sizeof(ov)); + ov.Offset= FILE_WRITE_TO_END_OF_FILE; + ov.OffsetHigh= -1; + pov= &ov; + } + + hFile= my_get_osfhandle(fd); + if(!WriteFile(hFile, Buffer, (DWORD)Count, &nWritten, pov)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN((size_t)-1); + } + DBUG_RETURN(nWritten); +} + + +int my_win_chsize(File fd, my_off_t newlength) +{ + HANDLE hFile; + LARGE_INTEGER length; + DBUG_ENTER("my_win_chsize"); + + hFile= (HANDLE) my_get_osfhandle(fd); + length.QuadPart= newlength; + if (!SetFilePointerEx(hFile, length , NULL , FILE_BEGIN)) + goto err; + if (!SetEndOfFile(hFile)) + goto err; + DBUG_RETURN(0); +err: + my_osmaperr(GetLastError()); + my_errno= errno; + DBUG_RETURN(-1); +} + + +/* Get the file descriptor for stdin,stdout or stderr */ +static File my_get_stdfile_descriptor(FILE *stream) +{ + HANDLE hFile; + DWORD nStdHandle; + DBUG_ENTER("my_get_stdfile_descriptor"); + + if(stream == stdin) + nStdHandle= STD_INPUT_HANDLE; + else if(stream == stdout) + nStdHandle= STD_OUTPUT_HANDLE; + else if(stream == stderr) + nStdHandle= STD_ERROR_HANDLE; + else + DBUG_RETURN(-1); + + hFile= GetStdHandle(nStdHandle); + if(hFile != INVALID_HANDLE_VALUE) + DBUG_RETURN(my_open_osfhandle(hFile, 0)); + DBUG_RETURN(-1); +} + + +File my_win_handle2File(HANDLE hFile) +{ + int retval= -1; + uint i; + + DBUG_ENTER("my_win_handle2File"); + + for(i= MY_FILE_MIN; i < my_file_limit; i++) + { + if(my_file_info[i].fhandle == hFile) + { + retval= i; + break; + } + } + DBUG_RETURN(retval); +} + + +File my_win_fileno(FILE *file) +{ + DBUG_ENTER("my_win_fileno"); + int retval= my_win_handle2File((HANDLE) _get_osfhandle(fileno(file))); + if(retval == -1) + /* try std stream */ + DBUG_RETURN(my_get_stdfile_descriptor(file)); + DBUG_RETURN(retval); +} + + +FILE *my_win_fopen(const char *filename, const char *type) +{ + FILE *file; + int flags= 0; + DBUG_ENTER("my_win_fopen"); + + /* + If we are not creating, then we need to use my_access to make sure + the file exists since Windows doesn't handle files like "com1.sym" + very well + */ + if (check_if_legal_filename(filename)) + { + errno= EACCES; + DBUG_RETURN(NULL); + } + + file= fopen(filename, type); + if(!file) + DBUG_RETURN(NULL); + + if(strchr(type,'a') != NULL) + flags= O_APPEND; + + /* + Register file handle in my_table_info. + Necessary for my_fileno() + */ + if(my_open_osfhandle((HANDLE)_get_osfhandle(fileno(file)), flags) < 0) + { + fclose(file); + DBUG_RETURN(NULL); + } + DBUG_RETURN(file); +} + + +FILE * my_win_fdopen(File fd, const char *type) +{ + FILE *file; + int crt_fd; + int flags= 0; + + DBUG_ENTER("my_win_fdopen"); + + if(strchr(type,'a') != NULL) + flags= O_APPEND; + /* Convert OS file handle to CRT file descriptor and then call fdopen*/ + crt_fd= _open_osfhandle((intptr_t)my_get_osfhandle(fd), flags); + if(crt_fd < 0) + file= NULL; + else + file= fdopen(crt_fd, type); + DBUG_RETURN(file); +} + + +int my_win_fclose(FILE *file) +{ + File fd; + + DBUG_ENTER("my_win_fclose"); + fd= my_fileno(file); + if(fd < 0) + DBUG_RETURN(-1); + if(fclose(file) < 0) + DBUG_RETURN(-1); + invalidate_fd(fd); + DBUG_RETURN(0); +} + + + +/* + Quick and dirty my_fstat() implementation for Windows. + Use CRT fstat on temporarily allocated file descriptor. + Patch file size, because size that fstat returns is not + reliable (may be outdated) +*/ +int my_win_fstat(File fd, struct _stati64 *buf) +{ + int crt_fd; + int retval; + HANDLE hFile, hDup; + + DBUG_ENTER("my_win_fstat"); + + hFile= my_get_osfhandle(fd); + if(!DuplicateHandle( GetCurrentProcess(), hFile, GetCurrentProcess(), + &hDup ,0,FALSE,DUPLICATE_SAME_ACCESS)) + { + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); + } + if ((crt_fd= _open_osfhandle((intptr_t)hDup,0)) < 0) + DBUG_RETURN(-1); + + retval= _fstati64(crt_fd, buf); + if(retval == 0) + { + /* File size returned by stat is not accurate (may be outdated), fix it*/ + GetFileSizeEx(hDup, (PLARGE_INTEGER) (&(buf->st_size))); + } + _close(crt_fd); + DBUG_RETURN(retval); +} + + + +int my_win_stat( const char *path, struct _stati64 *buf) +{ + DBUG_ENTER("my_win_stat"); + if(_stati64( path, buf) == 0) + { + /* File size returned by stat is not accurate (may be outdated), fix it*/ + WIN32_FILE_ATTRIBUTE_DATA data; + if (GetFileAttributesEx(path, GetFileExInfoStandard, &data)) + { + LARGE_INTEGER li; + li.LowPart= data.nFileSizeLow; + li.HighPart= data.nFileSizeHigh; + buf->st_size= li.QuadPart; + } + DBUG_RETURN(0); + } + DBUG_RETURN(-1); +} + + + +int my_win_fsync(File fd) +{ + DBUG_ENTER("my_win_fsync"); + if(FlushFileBuffers(my_get_osfhandle(fd))) + DBUG_RETURN(0); + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + + + +int my_win_dup(File fd) +{ + HANDLE hDup; + DBUG_ENTER("my_win_dup"); + if (DuplicateHandle(GetCurrentProcess(), my_get_osfhandle(fd), + GetCurrentProcess(), &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS)) + { + DBUG_RETURN(my_open_osfhandle(hDup, my_get_open_flags(fd))); + } + my_osmaperr(GetLastError()); + DBUG_RETURN(-1); +} + +#endif /*_WIN32*/ diff --git a/mysys/my_winthread.c b/mysys/my_winthread.c new file mode 100644 index 00000000..b74804fb --- /dev/null +++ b/mysys/my_winthread.c @@ -0,0 +1,179 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/***************************************************************************** +** Simulation of posix threads calls for Windows +*****************************************************************************/ +#if defined (_WIN32) +/* SAFE_MUTEX will not work until the thread structure is up to date */ + +#undef SAFE_MUTEX +#include "mysys_priv.h" +#include +#include + +struct thread_start_parameter +{ + pthread_handler func; + void *arg; +}; + +/** + Adapter to @c pthread_mutex_trylock() + + @retval 0 Mutex was acquired + @retval EBUSY Mutex was already locked by a thread + */ +int +win_pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + if (TryEnterCriticalSection(mutex)) + { + /* Don't allow recursive lock */ + if (mutex->RecursionCount > 1){ + LeaveCriticalSection(mutex); + return EBUSY; + } + return 0; + } + return EBUSY; +} + +static unsigned int __stdcall pthread_start(void *p) +{ + struct thread_start_parameter *par= (struct thread_start_parameter *)p; + pthread_handler func= par->func; + void *arg= par->arg; + free(p); + (*func)(arg); + return 0; +} + + +int pthread_create(pthread_t *thread_id, const pthread_attr_t *attr, + pthread_handler func, void *param) +{ + uintptr_t handle; + struct thread_start_parameter *par; + unsigned int stack_size; + int error_no; + DBUG_ENTER("pthread_create"); + + par= (struct thread_start_parameter *)malloc(sizeof(*par)); + if (!par) + goto error_return; + + par->func= func; + par->arg= param; + stack_size= attr?attr->dwStackSize:0; + + handle= _beginthreadex(NULL, stack_size , pthread_start, par, 0, (uint *)thread_id); + if (!handle) + goto error_return; + DBUG_PRINT("info", ("thread id=%lu",*thread_id)); + + /* Do not need thread handle, close it */ + CloseHandle((HANDLE)handle); + DBUG_RETURN(0); + +error_return: + error_no= errno; + DBUG_PRINT("error", + ("Can't create thread to handle request (error %d)",error_no)); + DBUG_RETURN(error_no); +} + + +void pthread_exit(void *a) +{ + _endthreadex(0); +} + +int pthread_join(pthread_t thread, void **value_ptr) +{ + DWORD ret; + HANDLE handle; + + handle= OpenThread(SYNCHRONIZE, FALSE, thread); + if (!handle) + { + errno= EINVAL; + goto error_return; + } + + ret= WaitForSingleObject(handle, INFINITE); + + if(ret != WAIT_OBJECT_0) + { + errno= EINVAL; + goto error_return; + } + + if (!GetExitCodeThread(handle, &ret)) + { + errno= EINVAL; + goto error_return; + } + + if (value_ptr) + *value_ptr= (void *)(size_t)ret; + + CloseHandle(handle); + return 0; + +error_return: + if(handle) + CloseHandle(handle); + return -1; +} + +int pthread_cancel(pthread_t thread) +{ + + HANDLE handle= 0; + BOOL ok= FALSE; + + handle= OpenThread(THREAD_TERMINATE, FALSE, thread); + if (handle) + { + ok= TerminateThread(handle,0); + CloseHandle(handle); + } + if (ok) + return 0; + + errno= EINVAL; + return -1; +} + + + +/* + One time initialization. +*/ + +static BOOL CALLBACK init_once_callback(my_pthread_once_t *once_control, PVOID param, PVOID *context) +{ + typedef void(*void_f)(void); + ((void_f)param)(); + return TRUE; +} + +int my_pthread_once(my_pthread_once_t *once_control, void (*func)(void)) +{ + InitOnceExecuteOnce(once_control, init_once_callback, func, NULL); + return 0; +} +#endif diff --git a/mysys/my_wintoken.c b/mysys/my_wintoken.c new file mode 100644 index 00000000..78c4bc9d --- /dev/null +++ b/mysys/my_wintoken.c @@ -0,0 +1,42 @@ +/* Copyright (c) 2019, IBM. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" + +BOOL my_obtain_privilege(LPCSTR lpPrivilege) +{ + HANDLE hAccessToken; + TOKEN_PRIVILEGES token; + BOOL ret_value= FALSE; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &hAccessToken)) + { + return FALSE; + } + + if (!LookupPrivilegeValue(NULL, lpPrivilege, &token.Privileges[0].Luid)) + return FALSE; + + token.PrivilegeCount= 1; + token.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + ret_value= AdjustTokenPrivileges(hAccessToken, FALSE, &token, 0, NULL, NULL); + + if (!ret_value || (GetLastError() != ERROR_SUCCESS)) + return FALSE; + + CloseHandle(hAccessToken); + return TRUE; +} diff --git a/mysys/my_write.c b/mysys/my_write.c new file mode 100644 index 00000000..06914b25 --- /dev/null +++ b/mysys/my_write.c @@ -0,0 +1,120 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2011, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include + + + /* Write a chunk of bytes to a file */ + +size_t my_write(File Filedes, const uchar *Buffer, size_t Count, myf MyFlags) +{ + size_t writtenbytes, written; + uint errors; + DBUG_ENTER("my_write"); + DBUG_PRINT("my",("fd: %d Buffer: %p Count: %lu MyFlags: %lu", + Filedes, Buffer, (ulong) Count, MyFlags)); + errors= 0; written= 0; + if (!(MyFlags & (MY_WME | MY_FAE | MY_FNABP))) + MyFlags|= my_global_flags; + + /* The behavior of write(fd, buf, 0) is not portable */ + if (unlikely(!Count)) + DBUG_RETURN(0); + + for (;;) + { +#ifdef _WIN32 + if(Filedes < 0) + { + my_errno= errno= EBADF; + DBUG_RETURN((size_t)-1); + } + writtenbytes= my_win_write(Filedes, Buffer, Count); +#else + writtenbytes= write(Filedes, Buffer, Count); +#endif + + /** + To simulate the write error set the errno = error code + and the number pf written bytes to -1. + */ + DBUG_EXECUTE_IF ("simulate_file_write_error", + if (!errors) { + errno= ENOSPC; + writtenbytes= (size_t) -1; + }); + + if (writtenbytes == Count) + break; + if (writtenbytes != (size_t) -1) + { /* Safeguard */ + written+= writtenbytes; + Buffer+= writtenbytes; + Count-= writtenbytes; + } + my_errno= errno; + DBUG_PRINT("error",("Write only %ld bytes, error: %d", + (long) writtenbytes, my_errno)); +#ifndef NO_BACKGROUND + if (my_thread_var->abort) + MyFlags&= ~ MY_WAIT_IF_FULL; /* End if aborted by user */ + + if ((my_errno == ENOSPC || my_errno == EDQUOT) && + (MyFlags & MY_WAIT_IF_FULL)) + { + wait_for_free_space(my_filename(Filedes), errors); + errors++; + continue; + } + + if ((writtenbytes == 0 || writtenbytes == (size_t) -1)) + { + if (my_errno == EINTR) + { + DBUG_PRINT("debug", ("my_write() was interrupted and returned %ld", + (long) writtenbytes)); + continue; /* Interrupted */ + } + + if (!writtenbytes && !errors++) /* Retry once */ + { + /* We may come here if the file quota is exeeded */ + errno= EFBIG; /* Assume this is the error */ + continue; + } + } + else + continue; /* Retry */ +#endif + + /* Don't give a warning if it's ok that we only write part of the data */ + if (MyFlags & (MY_NABP | MY_FNABP)) + { + if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) + { + my_error(EE_WRITE, MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))), + my_filename(Filedes),my_errno); + } + DBUG_RETURN(MY_FILE_ERROR); /* Error on read */ + } + break; /* Return bytes written */ + } + if (MyFlags & (MY_NABP | MY_FNABP)) + DBUG_RETURN(0); /* Want only errors */ + DBUG_RETURN(writtenbytes+written); +} /* my_write */ diff --git a/mysys/mysys_priv.h b/mysys/mysys_priv.h new file mode 100644 index 00000000..e795dbe2 --- /dev/null +++ b/mysys/mysys_priv.h @@ -0,0 +1,251 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef MYSYS_PRIV_INCLUDED +#define MYSYS_PRIV_INCLUDED + +#include +#include +#include + +C_MODE_START + +#ifdef USE_SYSTEM_WRAPPERS +#include "system_wrappers.h" +#endif + +#ifdef HAVE_GETRUSAGE +#include +#endif + +#include + +#ifdef HAVE_PSI_INTERFACE + +#if !defined(HAVE_PREAD) && !defined(_WIN32) +extern PSI_mutex_key key_my_file_info_mutex; +#endif /* !defined(HAVE_PREAD) && !defined(_WIN32) */ + +#if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) +extern PSI_mutex_key key_LOCK_localtime_r; +#endif /* !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) */ + +extern PSI_mutex_key key_BITMAP_mutex, key_IO_CACHE_append_buffer_lock, + key_IO_CACHE_SHARE_mutex, key_KEY_CACHE_cache_lock, key_LOCK_alarm, + key_my_thread_var_mutex, key_THR_LOCK_charset, key_THR_LOCK_heap, + key_THR_LOCK_lock, key_THR_LOCK_malloc, + key_THR_LOCK_mutex, key_THR_LOCK_myisam, key_THR_LOCK_net, + key_THR_LOCK_open, key_THR_LOCK_threads, key_LOCK_uuid_generator, + key_TMPDIR_mutex, key_THR_LOCK_myisam_mmap, key_LOCK_timer; + +extern PSI_cond_key key_COND_alarm, key_COND_timer, key_IO_CACHE_SHARE_cond, + key_IO_CACHE_SHARE_cond_writer, key_my_thread_var_suspend, + key_THR_COND_threads; + +#ifdef USE_ALARM_THREAD +extern PSI_thread_key key_thread_alarm; +#endif /* USE_ALARM_THREAD */ +extern PSI_thread_key key_thread_timer; +extern PSI_rwlock_key key_SAFEHASH_mutex; + +#endif /* HAVE_PSI_INTERFACE */ + +extern PSI_stage_info stage_waiting_for_table_level_lock; + +/* These keys are always defined. */ + +extern PSI_memory_key key_memory_DYNAMIC_STRING; +extern PSI_memory_key key_memory_IO_CACHE; +extern PSI_memory_key key_memory_KEY_CACHE; +extern PSI_memory_key key_memory_LIST; +extern PSI_memory_key key_memory_MY_BITMAP_bitmap; +extern PSI_memory_key key_memory_MY_DIR; +extern PSI_memory_key key_memory_MY_STAT; +extern PSI_memory_key key_memory_MY_TMPDIR_full_list; +extern PSI_memory_key key_memory_QUEUE; +extern PSI_memory_key key_memory_SAFE_HASH_ENTRY; +extern PSI_memory_key key_memory_TREE; +extern PSI_memory_key key_memory_charset_file; +extern PSI_memory_key key_memory_charset_loader; +extern PSI_memory_key key_memory_defaults; +extern PSI_memory_key key_memory_lf_dynarray; +extern PSI_memory_key key_memory_lf_node; +extern PSI_memory_key key_memory_lf_slist; +extern PSI_memory_key key_memory_my_compress_alloc; +extern PSI_memory_key key_memory_my_err_head; +extern PSI_memory_key key_memory_my_file_info; +extern PSI_memory_key key_memory_pack_frm; +extern PSI_memory_key key_memory_charsets; +extern PSI_memory_key key_memory_new; + +#ifdef _WIN32 +extern PSI_memory_key key_memory_win_SECURITY_ATTRIBUTES; +extern PSI_memory_key key_memory_win_PACL; +extern PSI_memory_key key_memory_win_IP_ADAPTER_ADDRESSES; +#endif + +extern mysql_mutex_t THR_LOCK_malloc, THR_LOCK_open, THR_LOCK_keycache; +extern mysql_mutex_t THR_LOCK_lock, THR_LOCK_net; +extern mysql_mutex_t THR_LOCK_charset; + +#include + +#ifdef HAVE_PSI_INTERFACE +extern PSI_file_key key_file_charset, key_file_cnf; +#endif /* HAVE_PSI_INTERFACE */ + +typedef struct { + ulonglong counter; + uint block_length, last_block_length; + uchar key[MY_AES_BLOCK_SIZE]; + ulonglong inbuf_counter; +} IO_CACHE_CRYPT; + +extern int (*_my_b_encr_read)(IO_CACHE *info,uchar *Buffer,size_t Count); +extern int (*_my_b_encr_write)(IO_CACHE *info,const uchar *Buffer,size_t Count); + +#ifdef SAFEMALLOC +void *sf_malloc(size_t size, myf my_flags); +void *sf_realloc(void *ptr, size_t size, myf my_flags); +void sf_free(void *ptr); +size_t sf_malloc_usable_size(void *ptr, my_bool *is_thread_specific); +#else +#define sf_malloc(X,Y) malloc(X) +#define sf_realloc(X,Y,Z) realloc(X,Y) +#define sf_free(X) free(X) +#endif + +/* + EDQUOT is used only in 3 C files only in mysys/. If it does not exist on + system, we set it to some value which can never happen. +*/ +#ifndef EDQUOT +#define EDQUOT (-1) +#endif + +void my_error_unregister_all(void); + +#ifndef O_PATH /* not Linux */ +#if defined(O_SEARCH) /* Illumos */ +#define O_PATH O_SEARCH +#elif defined(O_EXEC) /* FreeBSD */ +#define O_PATH O_EXEC +#endif +#endif + +#ifdef O_PATH +#define HAVE_OPEN_PARENT_DIR_NOSYMLINKS +const char *my_open_parent_dir_nosymlinks(const char *pathname, int *pdfd); +#define NOSYMLINK_FUNCTION_BODY(AT,NOAT) \ + int dfd, res; \ + const char *filename= my_open_parent_dir_nosymlinks(pathname, &dfd); \ + if (filename == NULL) return -1; \ + res= AT; \ + if (dfd >= 0) close(dfd); \ + return res; +#elif defined(HAVE_REALPATH) && defined(PATH_MAX) +#define NOSYMLINK_FUNCTION_BODY(AT,NOAT) \ + char buf[PATH_MAX+1]; \ + if (realpath(pathname, buf) == NULL) return -1; \ + if (strcmp(pathname, buf)) { errno= ENOTDIR; return -1; } \ + return NOAT; +#elif defined(HAVE_REALPATH) +#define NOSYMLINK_FUNCTION_BODY(AT,NOAT) \ + char *buf= realpath(pathname, NULL); \ + int res; \ + if (buf == NULL) return -1; \ + if (strcmp(pathname, buf)) { errno= ENOTDIR; res= -1; } \ + else res= NOAT; \ + free(buf); \ + return res; +#else +#define NOSYMLINK_FUNCTION_BODY(AT,NOAT) \ + return NOAT; +#endif + +#ifndef _WIN32 +#define CREATE_NOSYMLINK_FUNCTION(PROTO,AT,NOAT) \ +static int PROTO { NOSYMLINK_FUNCTION_BODY(AT,NOAT) } +#else +#define CREATE_NOSYMLINK_FUNCTION(PROTO,AT,NOAT) +#endif + +#ifdef _WIN32 +#include +/* my_winfile.c exports, should not be used outside mysys */ +extern File my_win_open(const char *path, int oflag); +extern int my_win_close(File fd); +extern size_t my_win_read(File fd, uchar *buffer, size_t count); +extern size_t my_win_write(File fd, const uchar *buffer, size_t count); +extern size_t my_win_pread(File fd, uchar *buffer, size_t count, + my_off_t offset); +extern size_t my_win_pwrite(File fd, const uchar *buffer, size_t count, + my_off_t offset); +extern my_off_t my_win_lseek(File fd, my_off_t pos, int whence); +extern int my_win_chsize(File fd, my_off_t newlength); +extern FILE* my_win_fopen(const char *filename, const char *type); +extern File my_win_fclose(FILE *file); +extern File my_win_fileno(FILE *file); +extern FILE* my_win_fdopen(File Filedes, const char *type); +extern int my_win_stat(const char *path, struct _stati64 *buf); +extern int my_win_fstat(File fd, struct _stati64 *buf); +extern int my_win_fsync(File fd); +extern File my_win_dup(File fd); +extern File my_win_sopen(const char *path, int oflag, int shflag, int perm); +extern File my_open_osfhandle(HANDLE handle, int oflag); + + +/* + The following constants are related to retries when file operation fails with + ERROR_FILE_SHARING_VIOLATION +*/ +#define FILE_SHARING_VIOLATION_RETRIES 50 +#define FILE_SHARING_VIOLATION_DELAY_MS 10 + + +/* DBUG injecting of ERROR_FILE_SHARING_VIOLATION */ +#ifndef DBUG_OFF +/* Open file, without sharing. if specific DBUG keyword is set */ +#define DBUG_INJECT_FILE_SHARING_VIOLATION(filename) \ + FILE *fp= NULL; \ + do \ + { \ + DBUG_EXECUTE_IF("file_sharing_violation", \ + fp= _fsopen(filename, "r", _SH_DENYRW);); \ + } while (0) + +/* Close the file that causes ERROR_FILE_SHARING_VIOLATION.*/ +#define DBUG_CLEAR_FILE_SHARING_VIOLATION() \ + do \ + { \ + if (fp) \ + { \ + DWORD tmp_err= GetLastError(); \ + fclose(fp); \ + SetLastError(tmp_err); \ + fp= NULL; \ + } \ + } while (0) + +#else +#define DBUG_INJECT_FILE_SHARING_VIOLATION(filename) do {} while (0) +#define DBUG_CLEAR_FILE_SHARING_VIOLATION() do {} while (0) +#endif + +#endif + +C_MODE_END + +#endif diff --git a/mysys/psi_noop.c b/mysys/psi_noop.c new file mode 100644 index 00000000..403be3c9 --- /dev/null +++ b/mysys/psi_noop.c @@ -0,0 +1,1074 @@ +/* Copyright (c) 2011, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + Without limiting anything contained in the foregoing, this file, + which is part of C Driver for MySQL (Connector/C), is also subject to the + Universal FOSS Exception, version 1.0, a copy of which can be found at + http://oss.oracle.com/licenses/universal-foss-exception. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +/* + Always provide the noop performance interface, for plugins. +*/ + +#define USE_PSI_V1 +#define HAVE_PSI_INTERFACE + +#include "my_global.h" +#include "my_pthread.h" +#include "my_sys.h" +#include "mysql/psi/psi.h" + +C_MODE_START + +#define NNN __attribute__((unused)) + +static void register_mutex_noop(const char *category NNN, + PSI_mutex_info *info NNN, + int count NNN) +{ + return; +} + +static void register_rwlock_noop(const char *category NNN, + PSI_rwlock_info *info NNN, + int count NNN) +{ + return; +} + +static void register_cond_noop(const char *category NNN, + PSI_cond_info *info NNN, + int count NNN) +{ + return; +} + +static void register_thread_noop(const char *category NNN, + PSI_thread_info *info NNN, + int count NNN) +{ + return; +} + +static void register_file_noop(const char *category NNN, + PSI_file_info *info NNN, + int count NNN) +{ + return; +} + +static void register_stage_noop(const char *category NNN, + PSI_stage_info **info_array NNN, + int count NNN) +{ + return; +} + +static void register_statement_noop(const char *category NNN, + PSI_statement_info *info NNN, + int count NNN) +{ + return; +} + +static void register_socket_noop(const char *category NNN, + PSI_socket_info *info NNN, + int count NNN) +{ + return; +} + +static PSI_mutex* +init_mutex_noop(PSI_mutex_key key NNN, void *identity NNN) +{ + return NULL; +} + +static void destroy_mutex_noop(PSI_mutex* mutex NNN) +{ + return; +} + +static PSI_rwlock* +init_rwlock_noop(PSI_rwlock_key key NNN, void *identity NNN) +{ + return NULL; +} + +static void destroy_rwlock_noop(PSI_rwlock* rwlock NNN) +{ + return; +} + +static PSI_cond* +init_cond_noop(PSI_cond_key key NNN, void *identity NNN) +{ + return NULL; +} + +static void destroy_cond_noop(PSI_cond* cond NNN) +{ + return; +} + +static PSI_socket* +init_socket_noop(PSI_socket_key key NNN, const my_socket *fd NNN, + const struct sockaddr *addr NNN, socklen_t addr_len NNN) +{ + return NULL; +} + +static void destroy_socket_noop(PSI_socket* socket NNN) +{ + return; +} + +static PSI_table_share* +get_table_share_noop(my_bool temporary NNN, struct TABLE_SHARE *share NNN) +{ + return NULL; +} + +static void release_table_share_noop(PSI_table_share* share NNN) +{ + return; +} + +static void +drop_table_share_noop(my_bool temporary NNN, const char *schema_name NNN, + int schema_name_length NNN, const char *table_name NNN, + int table_name_length NNN) +{ + return; +} + +static PSI_table* +open_table_noop(PSI_table_share *share NNN, const void *identity NNN) +{ + return NULL; +} + +static void unbind_table_noop(PSI_table *table NNN) +{ + return; +} + +static PSI_table* +rebind_table_noop(PSI_table_share *share NNN, + const void *identity NNN, + PSI_table *table NNN) +{ + return NULL; +} + +static void close_table_noop(struct TABLE_SHARE *share NNN, + PSI_table *table NNN) +{ + return; +} + +static void create_file_noop(PSI_file_key key NNN, + const char *name NNN, File file NNN) +{ + return; +} + +static int spawn_thread_noop(PSI_thread_key key NNN, + pthread_t *thread NNN, + const pthread_attr_t *attr NNN, + void *(*start_routine)(void*) NNN, void *arg NNN) +{ + return pthread_create(thread, attr, start_routine, arg); +} + +static PSI_thread* +new_thread_noop(PSI_thread_key key NNN, + const void *identity NNN, ulonglong thread_id NNN) +{ + return NULL; +} + +static void set_thread_id_noop(PSI_thread *thread NNN, ulonglong id NNN) +{ + return; +} + +static void set_thread_THD_noop(PSI_thread *thread NNN, THD *thd NNN) +{ + return; +} + +static void set_thread_os_id_noop(PSI_thread *thread NNN) +{ + return; +} + +static PSI_thread* +get_thread_noop(void NNN) +{ + return NULL; +} + +static void set_thread_user_noop(const char *user NNN, int user_len NNN) +{ + return; +} + +static void set_thread_user_host_noop(const char *user NNN, int user_len NNN, + const char *host NNN, int host_len NNN) +{ + return; +} + +static void set_thread_db_noop(const char* db NNN, int db_len NNN) +{ + return; +} + +static void set_thread_command_noop(int command NNN) +{ + return; +} + +static void set_connection_type_noop(opaque_vio_type conn_type NNN) +{ + return; +} + +static void set_thread_start_time_noop(time_t start_time NNN) +{ + return; +} + +static void set_thread_state_noop(const char* state NNN) +{ + return; +} + +static void set_thread_info_noop(const char* info NNN, uint info_len NNN) +{ + return; +} + +static void set_thread_noop(PSI_thread* thread NNN) +{ + return; +} + +static void set_thread_peer_port_noop(PSI_thread * thread NNN, uint port NNN) +{ + return; +} + +static void delete_current_thread_noop(void) +{ + return; +} + +static void delete_thread_noop(PSI_thread *thread NNN) +{ + return; +} + +static PSI_file_locker* +get_thread_file_name_locker_noop(PSI_file_locker_state *state NNN, + PSI_file_key key NNN, + enum PSI_file_operation op NNN, + const char *name NNN, const void *identity NNN) +{ + return NULL; +} + +static PSI_file_locker* +get_thread_file_stream_locker_noop(PSI_file_locker_state *state NNN, + PSI_file *file NNN, + enum PSI_file_operation op NNN) +{ + return NULL; +} + + +static PSI_file_locker* +get_thread_file_descriptor_locker_noop(PSI_file_locker_state *state NNN, + File file NNN, + enum PSI_file_operation op NNN) +{ + return NULL; +} + +static void unlock_mutex_noop(PSI_mutex *mutex NNN) +{ + return; +} + +static void unlock_rwlock_noop(PSI_rwlock *rwlock NNN) +{ + return; +} + +static void signal_cond_noop(PSI_cond* cond NNN) +{ + return; +} + +static void broadcast_cond_noop(PSI_cond* cond NNN) +{ + return; +} + +static PSI_idle_locker* +start_idle_wait_noop(PSI_idle_locker_state* state NNN, + const char *src_file NNN, uint src_line NNN) +{ + return NULL; +} + +static void end_idle_wait_noop(PSI_idle_locker* locker NNN) +{ + return; +} + +static PSI_mutex_locker* +start_mutex_wait_noop(PSI_mutex_locker_state *state NNN, + PSI_mutex *mutex NNN, + PSI_mutex_operation op NNN, + const char *src_file NNN, uint src_line NNN) +{ + return NULL; +} + +static void end_mutex_wait_noop(PSI_mutex_locker* locker NNN, int rc NNN) +{ + return; +} + + +static PSI_rwlock_locker* +start_rwlock_rdwait_noop(struct PSI_rwlock_locker_state_v1 *state NNN, + struct PSI_rwlock *rwlock NNN, + enum PSI_rwlock_operation op NNN, + const char *src_file NNN, uint src_line NNN) +{ + return NULL; +} + +static void end_rwlock_rdwait_noop(PSI_rwlock_locker* locker NNN, int rc NNN) +{ + return; +} + +static struct PSI_rwlock_locker* +start_rwlock_wrwait_noop(struct PSI_rwlock_locker_state_v1 *state NNN, + struct PSI_rwlock *rwlock NNN, + enum PSI_rwlock_operation op NNN, + const char *src_file NNN, uint src_line NNN) +{ + return NULL; +} + +static void end_rwlock_wrwait_noop(PSI_rwlock_locker* locker NNN, int rc NNN) +{ + return; +} + +static struct PSI_cond_locker* +start_cond_wait_noop(struct PSI_cond_locker_state_v1 *state NNN, + struct PSI_cond *cond NNN, + struct PSI_mutex *mutex NNN, + enum PSI_cond_operation op NNN, + const char *src_file NNN, uint src_line NNN) +{ + return NULL; +} + +static void end_cond_wait_noop(PSI_cond_locker* locker NNN, int rc NNN) +{ + return; +} + +static struct PSI_table_locker* +start_table_io_wait_noop(struct PSI_table_locker_state *state NNN, + struct PSI_table *table NNN, + enum PSI_table_io_operation op NNN, + uint index NNN, + const char *src_file NNN, uint src_line NNN) +{ + return NULL; +} + +static void end_table_io_wait_noop(PSI_table_locker* locker NNN, + ulonglong numrows NNN) +{ + return; +} + +static struct PSI_table_locker* +start_table_lock_wait_noop(struct PSI_table_locker_state *state NNN, + struct PSI_table *table NNN, + enum PSI_table_lock_operation op NNN, + ulong flags NNN, + const char *src_file NNN, uint src_line NNN) +{ + return NULL; +} + +static void end_table_lock_wait_noop(PSI_table_locker* locker NNN) +{ + return; +} + +static void start_file_open_wait_noop(PSI_file_locker *locker NNN, + const char *src_file NNN, + uint src_line NNN) +{ + return; +} + +static PSI_file* end_file_open_wait_noop(PSI_file_locker *locker NNN, + void *result NNN) +{ + return NULL; +} + +static void end_file_open_wait_and_bind_to_descriptor_noop + (PSI_file_locker *locker NNN, File file NNN) +{ + return; +} + +static void end_temp_file_open_wait_and_bind_to_descriptor_noop + (PSI_file_locker *locker NNN, File file NNN, const char *filaneme NNN) +{ + return; +} + +static void start_file_wait_noop(PSI_file_locker *locker NNN, + size_t count NNN, + const char *src_file NNN, + uint src_line NNN) +{ + return; +} + +static void end_file_wait_noop(PSI_file_locker *locker NNN, + size_t count NNN) +{ + return; +} + +static void start_file_close_wait_noop(PSI_file_locker *locker NNN, + const char *src_file NNN, + uint src_line NNN) +{ + return; +} + +static void end_file_close_wait_noop(PSI_file_locker *locker NNN, + int result NNN) +{ + return; +} + +static void end_file_rename_wait_noop(PSI_file_locker *locker NNN, + const char *old_name NNN, + const char *new_name NNN, + int result NNN) +{ + return; +} + +static PSI_stage_progress* +start_stage_noop(PSI_stage_key key NNN, + const char *src_file NNN, int src_line NNN) +{ + return NULL; +} + +static PSI_stage_progress* +get_current_stage_progress_noop(void) +{ + return NULL; +} + +static void end_stage_noop(void) +{ + return; +} + +static PSI_statement_locker* +get_thread_statement_locker_noop(PSI_statement_locker_state *state NNN, + PSI_statement_key key NNN, + const void *charset NNN, + PSI_sp_share *sp_share NNN) +{ + return NULL; +} + +static PSI_statement_locker* +refine_statement_noop(PSI_statement_locker *locker NNN, + PSI_statement_key key NNN) +{ + return NULL; +} + +static void start_statement_noop(PSI_statement_locker *locker NNN, + const char *db NNN, uint db_len NNN, + const char *src_file NNN, uint src_line NNN) +{ + return; +} + +static void set_statement_text_noop(PSI_statement_locker *locker NNN, + const char *text NNN, uint text_len NNN) +{ + return; +} + +static void set_statement_lock_time_noop(PSI_statement_locker *locker NNN, + ulonglong count NNN) +{ + return; +} + +static void set_statement_rows_sent_noop(PSI_statement_locker *locker NNN, + ulonglong count NNN) +{ + return; +} + +static void set_statement_rows_examined_noop(PSI_statement_locker *locker NNN, + ulonglong count NNN) +{ + return; +} + +static void inc_statement_created_tmp_disk_tables_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_created_tmp_tables_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_select_full_join_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_select_full_range_join_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_select_range_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_select_range_check_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_select_scan_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_sort_merge_passes_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_sort_range_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_sort_rows_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_statement_sort_scan_noop(PSI_statement_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void set_statement_no_index_used_noop(PSI_statement_locker *locker NNN) +{ + return; +} + +static void set_statement_no_good_index_used_noop(PSI_statement_locker *locker NNN) +{ + return; +} + +static void end_statement_noop(PSI_statement_locker *locker NNN, + void *stmt_da NNN) +{ + return; +} + +static PSI_transaction_locker* +get_thread_transaction_locker_noop(PSI_transaction_locker_state *state NNN, + const void *xid NNN, + ulonglong trxid NNN, + int isolation_level NNN, + my_bool read_only NNN, + my_bool autocommit NNN) +{ + return NULL; +} + +static void start_transaction_noop(PSI_transaction_locker *locker NNN, + const char *src_file NNN, uint src_line NNN) +{ + return; +} + +static void set_transaction_xid_noop(PSI_transaction_locker *locker NNN, + const void *xid NNN, + int xa_state NNN) +{ + return; +} + +static void set_transaction_xa_state_noop(PSI_transaction_locker *locker NNN, + int xa_state NNN) +{ + return; +} + +static void set_transaction_gtid_noop(PSI_transaction_locker *locker NNN, + const void *sid NNN, + const void *gtid_spec NNN) +{ + return; +} + +static void set_transaction_trxid_noop(PSI_transaction_locker *locker NNN, + const ulonglong *trxid NNN) +{ + return; +} + +static void inc_transaction_savepoints_noop(PSI_transaction_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_transaction_rollback_to_savepoint_noop(PSI_transaction_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void inc_transaction_release_savepoint_noop(PSI_transaction_locker *locker NNN, + ulong count NNN) +{ + return; +} + +static void end_transaction_noop(PSI_transaction_locker *locker NNN, + my_bool commit NNN) +{ + return; +} + +static PSI_socket_locker* +start_socket_wait_noop(PSI_socket_locker_state *state NNN, + PSI_socket *socket NNN, + PSI_socket_operation op NNN, + size_t count NNN, + const char *src_file NNN, + uint src_line NNN) +{ + return NULL; +} + +static void end_socket_wait_noop(PSI_socket_locker *locker NNN, + size_t count NNN) +{ + return; +} + +static void set_socket_state_noop(PSI_socket *socket NNN, + enum PSI_socket_state state NNN) +{ + return; +} + +static void set_socket_info_noop(PSI_socket *socket NNN, + const my_socket *fd NNN, + const struct sockaddr *addr NNN, + socklen_t addr_len NNN) +{ + return; +} + +static void set_socket_thread_owner_noop(PSI_socket *socket NNN) +{ + return; +} + +static PSI_prepared_stmt* +create_prepare_stmt_noop(void *identity NNN, uint stmt_id NNN, + PSI_statement_locker *locker NNN, + const char *stmt_name NNN, size_t stmt_name_length NNN) +{ + return NULL; +} + +static void +execute_prepare_stmt_noop(PSI_statement_locker *locker NNN, + PSI_prepared_stmt *prepared_stmt NNN) +{ + return; +} + +static void set_prepared_stmt_text_noop(PSI_prepared_stmt *prepared_stmt NNN, + const char *text NNN, uint text_len NNN) +{ + return; +} + +void +destroy_prepared_stmt_noop(PSI_prepared_stmt *prepared_stmt NNN) +{ + return; +} + +void +reprepare_prepared_stmt_noop(PSI_prepared_stmt *prepared_stmt NNN) +{ + return; +} + +static struct PSI_digest_locker* +digest_start_noop(PSI_statement_locker *locker NNN) +{ + return NULL; +} + +static void +digest_end_noop(PSI_digest_locker *locker NNN, + const struct sql_digest_storage *digest NNN) +{ + return; +} + +static int +set_thread_connect_attrs_noop(const char *buffer __attribute__ ((unused)), + uint length __attribute__ ((unused)), + const void *from_cs __attribute__ ((unused))) +{ + return 0; +} + +static PSI_sp_locker* +pfs_start_sp_noop(PSI_sp_locker_state *state NNN, PSI_sp_share *sp_share NNN) +{ + return NULL; +} + +static void pfs_end_sp_noop(PSI_sp_locker *locker NNN) +{ + return; +} + +static void +pfs_drop_sp_noop(uint object_type NNN, + const char *schema_name NNN, uint schema_name_length NNN, + const char *object_name NNN, uint object_name_length NNN) +{ + return; +} + +static PSI_sp_share* +pfs_get_sp_share_noop(uint object_type NNN, + const char *schema_name NNN, uint schema_name_length NNN, + const char *object_name NNN, uint object_name_length NNN) +{ + return NULL; +} + +static void +pfs_release_sp_share_noop(PSI_sp_share *sp_share NNN) +{ + return; +} + +static void register_memory_noop(const char *category NNN, + PSI_memory_info *info NNN, + int count NNN) +{ + return; +} + +static PSI_memory_key memory_alloc_noop(PSI_memory_key key NNN, size_t size NNN, struct PSI_thread ** owner NNN) +{ + *owner= NULL; + return PSI_NOT_INSTRUMENTED; +} + +static PSI_memory_key memory_realloc_noop(PSI_memory_key key NNN, size_t old_size NNN, size_t new_size NNN, struct PSI_thread ** owner NNN) +{ + *owner= NULL; + return PSI_NOT_INSTRUMENTED; +} + +static PSI_memory_key memory_claim_noop(PSI_memory_key key NNN, size_t size NNN, struct PSI_thread ** owner) +{ + *owner= NULL; + return PSI_NOT_INSTRUMENTED; +} + +static void memory_free_noop(PSI_memory_key key NNN, size_t size NNN, struct PSI_thread * owner NNN) +{ + return; +} + +static void unlock_table_noop(PSI_table *table NNN) +{ + return; +} + +static PSI_metadata_lock * +create_metadata_lock_noop(void *identity NNN, + const MDL_key *mdl_key NNN, + opaque_mdl_type mdl_type NNN, + opaque_mdl_duration mdl_duration NNN, + opaque_mdl_status mdl_status NNN, + const char *src_file NNN, + uint src_line NNN) +{ + return NULL; +} + +static void +set_metadata_lock_status_noop(PSI_metadata_lock* lock NNN, + opaque_mdl_status mdl_status NNN) +{ +} + +static void +destroy_metadata_lock_noop(PSI_metadata_lock* lock NNN) +{ +} + +static PSI_metadata_locker * +start_metadata_wait_noop(PSI_metadata_locker_state *state NNN, + PSI_metadata_lock *mdl NNN, + const char *src_file NNN, + uint src_line NNN) +{ + return NULL; +} + +static void +end_metadata_wait_noop(PSI_metadata_locker *locker NNN, + int rc NNN) +{ +} + +static PSI PSI_noop= +{ + register_mutex_noop, + register_rwlock_noop, + register_cond_noop, + register_thread_noop, + register_file_noop, + register_stage_noop, + register_statement_noop, + register_socket_noop, + init_mutex_noop, + destroy_mutex_noop, + init_rwlock_noop, + destroy_rwlock_noop, + init_cond_noop, + destroy_cond_noop, + init_socket_noop, + destroy_socket_noop, + get_table_share_noop, + release_table_share_noop, + drop_table_share_noop, + open_table_noop, + unbind_table_noop, + rebind_table_noop, + close_table_noop, + create_file_noop, + spawn_thread_noop, + new_thread_noop, + set_thread_id_noop, + set_thread_THD_noop, + set_thread_os_id_noop, + get_thread_noop, + set_thread_user_noop, + set_thread_user_host_noop, + set_thread_db_noop, + set_thread_command_noop, + set_connection_type_noop, + set_thread_start_time_noop, + set_thread_state_noop, + set_thread_info_noop, + set_thread_noop, + delete_current_thread_noop, + delete_thread_noop, + get_thread_file_name_locker_noop, + get_thread_file_stream_locker_noop, + get_thread_file_descriptor_locker_noop, + unlock_mutex_noop, + unlock_rwlock_noop, + signal_cond_noop, + broadcast_cond_noop, + start_idle_wait_noop, + end_idle_wait_noop, + start_mutex_wait_noop, + end_mutex_wait_noop, + start_rwlock_rdwait_noop, + end_rwlock_rdwait_noop, + start_rwlock_wrwait_noop, + end_rwlock_wrwait_noop, + start_cond_wait_noop, + end_cond_wait_noop, + start_table_io_wait_noop, + end_table_io_wait_noop, + start_table_lock_wait_noop, + end_table_lock_wait_noop, + start_file_open_wait_noop, + end_file_open_wait_noop, + end_file_open_wait_and_bind_to_descriptor_noop, + end_temp_file_open_wait_and_bind_to_descriptor_noop, + start_file_wait_noop, + end_file_wait_noop, + start_file_close_wait_noop, + end_file_close_wait_noop, + end_file_rename_wait_noop, + start_stage_noop, + get_current_stage_progress_noop, + end_stage_noop, + get_thread_statement_locker_noop, + refine_statement_noop, + start_statement_noop, + set_statement_text_noop, + set_statement_lock_time_noop, + set_statement_rows_sent_noop, + set_statement_rows_examined_noop, + inc_statement_created_tmp_disk_tables_noop, + inc_statement_created_tmp_tables_noop, + inc_statement_select_full_join_noop, + inc_statement_select_full_range_join_noop, + inc_statement_select_range_noop, + inc_statement_select_range_check_noop, + inc_statement_select_scan_noop, + inc_statement_sort_merge_passes_noop, + inc_statement_sort_range_noop, + inc_statement_sort_rows_noop, + inc_statement_sort_scan_noop, + set_statement_no_index_used_noop, + set_statement_no_good_index_used_noop, + end_statement_noop, + get_thread_transaction_locker_noop, + start_transaction_noop, + set_transaction_xid_noop, + set_transaction_xa_state_noop, + set_transaction_gtid_noop, + set_transaction_trxid_noop, + inc_transaction_savepoints_noop, + inc_transaction_rollback_to_savepoint_noop, + inc_transaction_release_savepoint_noop, + end_transaction_noop, + start_socket_wait_noop, + end_socket_wait_noop, + set_socket_state_noop, + set_socket_info_noop, + set_socket_thread_owner_noop, + create_prepare_stmt_noop, + destroy_prepared_stmt_noop, + reprepare_prepared_stmt_noop, + execute_prepare_stmt_noop, + set_prepared_stmt_text_noop, + digest_start_noop, + digest_end_noop, + set_thread_connect_attrs_noop, + pfs_start_sp_noop, + pfs_end_sp_noop, + pfs_drop_sp_noop, + pfs_get_sp_share_noop, + pfs_release_sp_share_noop, + register_memory_noop, + memory_alloc_noop, + memory_realloc_noop, + memory_claim_noop, + memory_free_noop, + + unlock_table_noop, + create_metadata_lock_noop, + set_metadata_lock_status_noop, + destroy_metadata_lock_noop, + start_metadata_wait_noop, + end_metadata_wait_noop, + + set_thread_peer_port_noop +}; + +/** + Hook for the instrumentation interface. + Code implementing the instrumentation interface should register here. +*/ +struct PSI_bootstrap *PSI_hook= NULL; + +/** + Instance of the instrumentation interface for the MySQL server. + @todo This is currently a global variable, which is handy when + compiling instrumented code that is bundled with the server. + When dynamic plugin are truly supported, this variable will need + to be replaced by a macro, so that each XYZ plugin can have it's own + xyz_psi_server variable, obtained from PSI_bootstrap::get_interface() + with the version used at compile time for plugin XYZ. +*/ + +PSI *PSI_server= & PSI_noop; + +void set_psi_server(PSI *psi) +{ + PSI_server= psi; +} + +C_MODE_END + diff --git a/mysys/ptr_cmp.c b/mysys/ptr_cmp.c new file mode 100644 index 00000000..7ea15baf --- /dev/null +++ b/mysys/ptr_cmp.c @@ -0,0 +1,231 @@ +/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + get_ptr_compare(len) returns a pointer to a optimal byte-compare function + for a array of stringpointer where all strings have size len. + The bytes are compare as unsigned chars. + */ + +#include "mysys_priv.h" +#include +/* + * On some platforms, memcmp() is faster than the unrolled ptr_compare_N + * functions, as memcmp() is usually a platform-specific implementation + * written in assembler. for example one in /usr/lib/libc/libc_hwcap*.so.1. + * on Solaris, or on Windows inside C runtime linrary. + * + * On Solaris, native implementation is also usually faster than the + * built-in memcmp supplied by GCC, so it is recommended to build + * with "-fno-builtin-memcmp"in CFLAGS if building with GCC on Solaris. + */ + +/* + Daniel Blacks tests shows that libc memcmp is generally faster than + ptr_cmp() at least of x86 and power8 platforms, so we use the libc + code as deafult for now +*/ + +#define USE_NATIVE_MEMCMP 1 + +#ifdef USE_NATIVE_MEMCMP + +#include + +static int native_compare(size_t *length, unsigned char **a, unsigned char **b) +{ + return memcmp(*a, *b, *length); +} + +qsort2_cmp get_ptr_compare (size_t size __attribute__((unused))) +{ + return (qsort2_cmp) native_compare; +} + +#else /* USE_NATIVE_MEMCMP */ + +static int ptr_compare(size_t *compare_length, uchar **a, uchar **b); +static int ptr_compare_0(size_t *compare_length, uchar **a, uchar **b); +static int ptr_compare_1(size_t *compare_length, uchar **a, uchar **b); +static int ptr_compare_2(size_t *compare_length, uchar **a, uchar **b); +static int ptr_compare_3(size_t *compare_length, uchar **a, uchar **b); +static int degenerate_compare_func(size_t *compare_length, uchar **a, uchar **b) +{ + DBUG_ASSERT(*compare_length == 0); + return 0; +} + +qsort2_cmp get_ptr_compare (size_t size) +{ + if (size == 0) + return (qsort2_cmp) degenerate_compare_func; + if (size < 4) + return (qsort2_cmp) ptr_compare; + switch (size & 3) { + case 0: return (qsort2_cmp) ptr_compare_0; + case 1: return (qsort2_cmp) ptr_compare_1; + case 2: return (qsort2_cmp) ptr_compare_2; + case 3: return (qsort2_cmp) ptr_compare_3; + } + return 0; /* Impossible */ +} + /* + Compare to keys to see witch is smaller. + Loop unrolled to make it quick !! + */ + +#define cmp(N) if (first[N] != last[N]) return (int) first[N] - (int) last[N] + +static int ptr_compare(size_t *compare_length, uchar **a, uchar **b) +{ + size_t length= *compare_length; + uchar *first,*last; + + DBUG_ASSERT(length > 0); + first= *a; last= *b; + while (--length) + { + if (*first++ != *last++) + return (int) first[-1] - (int) last[-1]; + } + return (int) first[0] - (int) last[0]; +} + + +static int ptr_compare_0(size_t *compare_length,uchar **a, uchar **b) +{ + size_t length= *compare_length; + uchar *first,*last; + + first= *a; last= *b; + loop: + cmp(0); + cmp(1); + cmp(2); + cmp(3); + if ((length-=4)) + { + first+=4; + last+=4; + goto loop; + } + return (0); +} + + +static int ptr_compare_1(size_t *compare_length,uchar **a, uchar **b) +{ + size_t length= *compare_length-1; + uchar *first,*last; + + first= *a+1; last= *b+1; + cmp(-1); + loop: + cmp(0); + cmp(1); + cmp(2); + cmp(3); + if ((length-=4)) + { + first+=4; + last+=4; + goto loop; + } + return (0); +} + +static int ptr_compare_2(size_t *compare_length,uchar **a, uchar **b) +{ + size_t length= *compare_length-2; + uchar *first,*last; + + first= *a +2 ; last= *b +2; + cmp(-2); + cmp(-1); + loop: + cmp(0); + cmp(1); + cmp(2); + cmp(3); + if ((length-=4)) + { + first+=4; + last+=4; + goto loop; + } + return (0); +} + +static int ptr_compare_3(size_t *compare_length,uchar **a, uchar **b) +{ + size_t length= *compare_length-3; + uchar *first,*last; + + first= *a +3 ; last= *b +3; + cmp(-3); + cmp(-2); + cmp(-1); + loop: + cmp(0); + cmp(1); + cmp(2); + cmp(3); + if ((length-=4)) + { + first+=4; + last+=4; + goto loop; + } + return (0); +} + +#endif /* USE_NATIVE_MEMCMP */ + +void my_store_ptr(uchar *buff, size_t pack_length, my_off_t pos) +{ + switch (pack_length) { +#if SIZEOF_OFF_T > 4 + case 8: mi_int8store(buff,pos); break; + case 7: mi_int7store(buff,pos); break; + case 6: mi_int6store(buff,pos); break; + case 5: mi_int5store(buff,pos); break; +#endif + case 4: mi_int4store(buff,pos); break; + case 3: mi_int3store(buff,pos); break; + case 2: mi_int2store(buff,pos); break; + case 1: buff[0]= (uchar) pos; break; + default: DBUG_ASSERT(0); + } + return; +} + +my_off_t my_get_ptr(uchar *ptr, size_t pack_length) +{ + my_off_t pos; + switch (pack_length) { +#if SIZEOF_OFF_T > 4 + case 8: pos= (my_off_t) mi_uint8korr(ptr); break; + case 7: pos= (my_off_t) mi_uint7korr(ptr); break; + case 6: pos= (my_off_t) mi_uint6korr(ptr); break; + case 5: pos= (my_off_t) mi_uint5korr(ptr); break; +#endif + case 4: pos= (my_off_t) mi_uint4korr(ptr); break; + case 3: pos= (my_off_t) mi_uint3korr(ptr); break; + case 2: pos= (my_off_t) mi_uint2korr(ptr); break; + case 1: pos= (my_off_t) *(uchar*) ptr; break; + default: DBUG_ASSERT(0); return 0; + } + return pos; +} diff --git a/mysys/queues.c b/mysys/queues.c new file mode 100644 index 00000000..0a1149bf --- /dev/null +++ b/mysys/queues.c @@ -0,0 +1,386 @@ +/* Copyright (C) 2010 Monty Program Ab + All Rights reserved + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. +*/ + +/* + This code originates from the Unireg project. + + Code for generell handling of priority Queues. + Implementation of queues from "Algorithms in C" by Robert Sedgewick. + + The queue can optionally store the position in queue in the element + that is in the queue. This allows one to remove any element from the queue + in O(1) time. + + Optimisation of _downheap() and queue_fix() is inspired by code done + by Mikael Ronström, based on an optimisation of _downheap from + Exercise 7.51 in "Data Structures & Algorithms in C++" by Mark Allen + Weiss, Second Edition. +*/ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include + + +/* + Init queue + + SYNOPSIS + init_queue() + queue Queue to initialise + max_elements Max elements that will be put in queue + offset_to_key Offset to key in element stored in queue + Used when sending pointers to compare function + max_at_top Set to 1 if you want biggest element on top. + compare Compare function for elements, takes 3 arguments. + first_cmp_arg First argument to compare function + offset_to_queue_pos If <> 0, then offset+1 in element to store position + in queue (for fast delete of element in queue) + auto_extent When the queue is full and there is insert operation + extend the queue. + + NOTES + Will allocate max_element pointers for queue array + + RETURN + 0 ok + 1 Could not allocate memory +*/ + +int init_queue(QUEUE *queue, uint max_elements, uint offset_to_key, + my_bool max_at_top, int (*compare) (void *, uchar *, uchar *), + void *first_cmp_arg, uint offset_to_queue_pos, + uint auto_extent) +{ + DBUG_ENTER("init_queue"); + if ((queue->root= (uchar **) my_malloc(key_memory_QUEUE, + (max_elements + 1) * sizeof(void*), + MYF(MY_WME))) == 0) + DBUG_RETURN(1); + queue->elements= 0; + queue->compare= compare; + queue->first_cmp_arg= first_cmp_arg; + queue->max_elements= max_elements; + queue->offset_to_key= offset_to_key; + queue->offset_to_queue_pos= offset_to_queue_pos; + queue->auto_extent= auto_extent; + queue_set_max_at_top(queue, max_at_top); + DBUG_RETURN(0); +} + + +/* + Reinitialize queue for other usage + + SYNOPSIS + reinit_queue() + queue Queue to initialise + For rest of arguments, see init_queue() above + + NOTES + This will delete all elements from the queue. If you don't want this, + use resize_queue() instead. + + RETURN + 0 ok + 1 Wrong max_elements; Queue has old size +*/ + +int reinit_queue(QUEUE *queue, uint max_elements, uint offset_to_key, + my_bool max_at_top, int (*compare) (void *, uchar *, uchar *), + void *first_cmp_arg, uint offset_to_queue_pos, + uint auto_extent) +{ + DBUG_ENTER("reinit_queue"); + queue->elements= 0; + queue->compare= compare; + queue->first_cmp_arg= first_cmp_arg; + queue->offset_to_key= offset_to_key; + queue->offset_to_queue_pos= offset_to_queue_pos; + queue->auto_extent= auto_extent; + queue_set_max_at_top(queue, max_at_top); + DBUG_RETURN(resize_queue(queue, max_elements)); +} + + +/* + Resize queue + + SYNOPSIS + resize_queue() + queue Queue + max_elements New max size for queue + + NOTES + If you resize queue to be less than the elements you have in it, + the extra elements will be deleted + + RETURN + 0 ok + 1 Error. In this case the queue is unchanged +*/ + +int resize_queue(QUEUE *queue, uint max_elements) +{ + uchar **new_root; + DBUG_ENTER("resize_queue"); + if (queue->max_elements == max_elements) + DBUG_RETURN(0); + if ((new_root= (uchar **) my_realloc(key_memory_QUEUE, (void *)queue->root, + (max_elements + 1)* sizeof(void*), + MYF(MY_WME))) == 0) + DBUG_RETURN(1); + set_if_smaller(queue->elements, max_elements); + queue->max_elements= max_elements; + queue->root= new_root; + DBUG_RETURN(0); +} + + +/* + Delete queue + + SYNOPSIS + delete_queue() + queue Queue to delete + + IMPLEMENTATION + Just free allocated memory. + + NOTES + Can be called safely multiple times +*/ + +void delete_queue(QUEUE *queue) +{ + DBUG_ENTER("delete_queue"); + my_free(queue->root); + queue->root=0; /* Allow multiple calls */ + DBUG_VOID_RETURN; +} + + +static void insert_at(QUEUE *queue, uchar *element, uint idx) +{ + uint next_index, offset_to_key= queue->offset_to_key; + uint offset_to_queue_pos= queue->offset_to_queue_pos; + /* max_at_top swaps the comparison if we want to order by desc */ + while ((next_index= idx >> 1) > 0 && + queue->compare(queue->first_cmp_arg, + element + offset_to_key, + queue->root[next_index] + offset_to_key) * + queue->max_at_top < 0) + { + queue->root[idx]= queue->root[next_index]; + if (offset_to_queue_pos) + (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx; + idx= next_index; + } + queue->root[idx]= element; + if (offset_to_queue_pos) + (*(uint*) (element + offset_to_queue_pos-1))= idx; +} + + +/* + Insert element in queue + + SYNOPSIS + queue_insert() + queue Queue to use + element Element to insert +*/ + +void queue_insert(QUEUE *queue, uchar *element) +{ + DBUG_ASSERT(queue->elements < queue->max_elements); + insert_at(queue, element, ++queue->elements); +} + + +/* + Like queue_insert, but resize queue if queue is full + + SYNOPSIS + queue_insert_safe() + queue Queue to use + element Element to insert + + RETURN + 0 OK + 1 Cannot allocate more memory + 2 auto_extend is 0; No insertion done +*/ + +int queue_insert_safe(QUEUE *queue, uchar *element) +{ + + if (queue->elements == queue->max_elements) + { + if (!queue->auto_extent) + return 2; + if (resize_queue(queue, queue->max_elements + queue->auto_extent)) + return 1; + } + + queue_insert(queue, element); + return 0; +} + + +/* + Remove item from queue + + SYNOPSIS + queue_remove() + queue Queue to use + element Index of element to remove. + First element in queue is 'queue_first_element(queue)' + + RETURN + pointer to removed element +*/ + +uchar *queue_remove(QUEUE *queue, uint idx) +{ + uchar *element; + DBUG_ASSERT(idx >= 1); + DBUG_ASSERT(idx <= queue->elements); + element= queue->root[idx]; + queue->root[idx]= queue->root[queue->elements--]; + queue_replace(queue, idx); + return element; +} + + +/* + Restores the heap property from idx down the heap + + SYNOPSIS + _downheap() + queue Queue to use + idx Index of element to change +*/ + +void _downheap(QUEUE *queue, uint idx) +{ + uchar *element= queue->root[idx]; + uint next_index, + elements= queue->elements, + half_queue= elements >> 1, + offset_to_key= queue->offset_to_key, + offset_to_queue_pos= queue->offset_to_queue_pos; + + while (idx <= half_queue) + { + next_index= idx+idx; + if (next_index < elements && + (queue->compare(queue->first_cmp_arg, + queue->root[next_index]+offset_to_key, + queue->root[next_index+1]+offset_to_key) * + queue->max_at_top) > 0) + next_index++; + if ((queue->compare(queue->first_cmp_arg, + queue->root[next_index]+offset_to_key, + element+offset_to_key) * queue->max_at_top) >= 0) + break; + queue->root[idx]= queue->root[next_index]; + if (offset_to_queue_pos) + (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx; + idx= next_index; + } + queue->root[idx]=element; + if (offset_to_queue_pos) + (*(uint*) (element + offset_to_queue_pos-1))= idx; +} + + +/* + Fix heap when every element was changed. + + SYNOPSIS + queue_fix() + queue Queue to use +*/ + +void queue_fix(QUEUE *queue) +{ + uint i; + for (i= queue->elements >> 1; i > 0; i--) + _downheap(queue, i); +} + + +/* + Change element at fixed position + + SYNOPSIS + queue_replace() + queue Queue to use + idx Index of element to change + + NOTE + optimized for the case when the new position is close to the end of the + heap (typical for queue_remove() replacements). +*/ + +void queue_replace(QUEUE *queue, uint idx) +{ + uchar *element= queue->root[idx]; + uint next_index, + elements= queue->elements, + half_queue= elements>>1, + offset_to_key= queue->offset_to_key, + offset_to_queue_pos= queue->offset_to_queue_pos; + my_bool first= TRUE; + + while (idx <= half_queue) + { + next_index= idx + idx; + if (next_index < elements && + queue->compare(queue->first_cmp_arg, + queue->root[next_index]+offset_to_key, + queue->root[next_index+1]+offset_to_key) * + queue->max_at_top > 0) + next_index++; + if (first && + queue->compare(queue->first_cmp_arg, + queue->root[next_index]+offset_to_key, + element+offset_to_key) * queue->max_at_top >= 0) + { + queue->root[idx]= element; + if (offset_to_queue_pos) + (*(uint*) (element + offset_to_queue_pos-1))= idx; + break; + } + first= FALSE; + queue->root[idx]= queue->root[next_index]; + if (offset_to_queue_pos) + (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx; + idx=next_index; + } + + insert_at(queue, element, idx); +} diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c new file mode 100644 index 00000000..edfe3b18 --- /dev/null +++ b/mysys/safemalloc.c @@ -0,0 +1,421 @@ +/* Copyright (C) 2000 MySQL AB, 2011 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/******************************************************************** + memory debugger + based on safemalloc, memory sub-system, written by Bjorn Benson +********************************************************************/ + + +#include "mysys_priv.h" +#include /* my_addr_resolve */ + +#if HAVE_EXECINFO_H +#include +#endif + +/* + this can be set to 1 if we leak memory and know it + (to disable memory leak tests on exit) +*/ +int sf_leaking_memory= 0; + +#ifdef SAFEMALLOC + +/* this mutex protects all sf_* variables, and nothing else*/ +static pthread_mutex_t sf_mutex; +static int init_done= 0; + +#ifndef SF_REMEMBER_FRAMES +#ifdef USE_MYSYS_NEW +#define SF_REMEMBER_FRAMES 14 +#else +#define SF_REMEMBER_FRAMES 8 +#endif /* USE_MYSYS_NEW */ +#endif /* SF_REMEMBER_FRAMES */ + +/* ignore the first two frames (sf_malloc itself, and my_malloc) */ +#define SF_FRAMES_SKIP 2 + +/* + Structure that stores information of an allocated memory block + The data is at &struct_adr+sizeof(struct irem) + Note that sizeof(struct st_irem) % sizeof(double) == 0 +*/ +struct st_irem +{ + struct st_irem *next; /* Linked list of structures */ + struct st_irem *prev; /* Other link */ + size_t datasize; /* Size requested */ +#if SIZEOF_SIZE_T == 4 + size_t pad; /* Compensate 32bit datasize */ +#endif +#ifdef HAVE_BACKTRACE + void *frame[SF_REMEMBER_FRAMES]; /* call stack */ +#endif + uint32 flags; /* Flags passed to malloc */ + my_thread_id thread_id; /* Which thread did the allocation */ + uint32 marker; /* Underrun marker value */ +}; + +static int sf_malloc_count= 0; /* Number of allocated chunks */ + +static void *sf_min_adress= (void*) (intptr)~0ULL, + *sf_max_adress= 0; + +static struct st_irem *sf_malloc_root = 0; + +#define MAGICSTART 0x14235296 /* A magic value for underrun key */ +#define MAGICEND 0x12345678 /* Value for freed block */ + +#define MAGICEND0 0x68 /* Magic values for overrun keys */ +#define MAGICEND1 0x34 /* " */ +#define MAGICEND2 0x7A /* " */ +#define MAGICEND3 0x15 /* " */ + +static int bad_ptr(const char *where, void *ptr); +static void free_memory(void *ptr); +static void sf_terminate(); + +/* Setup default call to get a thread id for the memory */ + +my_thread_id default_sf_malloc_dbug_id(void) +{ + return my_thread_dbug_id(); +} + +my_thread_id (*sf_malloc_dbug_id)(void)= default_sf_malloc_dbug_id; + + +/** + allocates memory +*/ + +void *sf_malloc(size_t size, myf my_flags) +{ + struct st_irem *irem; + uchar *data; + + /* + this style of initialization looks like race conditon prone, + but it is safe under the assumption that a program does + at least one malloc() while still being single threaded. + */ + if (!init_done) + { + pthread_mutex_init(&sf_mutex, NULL); + atexit(sf_terminate); + init_done= 1; + } + + if (size > SIZE_T_MAX - 1024L*1024L*16L) /* Wrong call */ + return 0; + + if (!(irem= (struct st_irem *) malloc (sizeof(struct st_irem) + size + 4))) + return 0; + + /* we guarantee the alignment */ + compile_time_assert(sizeof(struct st_irem) % sizeof(double) == 0); + + /* Fill up the structure */ + data= (uchar*) (irem + 1); + irem->datasize= size; + irem->prev= 0; + irem->flags= my_flags; + irem->marker= MAGICSTART; + irem->thread_id= sf_malloc_dbug_id(); + data[size + 0]= MAGICEND0; + data[size + 1]= MAGICEND1; + data[size + 2]= MAGICEND2; + data[size + 3]= MAGICEND3; + +#ifdef HAVE_BACKTRACE + { + void *frame[SF_REMEMBER_FRAMES + SF_FRAMES_SKIP]; + int frames= backtrace(frame, array_elements(frame)); + if (frames < SF_FRAMES_SKIP) + frames= 0; + else + { + frames-= SF_FRAMES_SKIP; + memcpy(irem->frame, frame + SF_FRAMES_SKIP, sizeof(void*)*frames); + } + if (frames < SF_REMEMBER_FRAMES) + irem->frame[frames]= 0; + } +#endif + + pthread_mutex_lock(&sf_mutex); + + /* Add this structure to the linked list */ + if ((irem->next= sf_malloc_root)) + sf_malloc_root->prev= irem; + sf_malloc_root= irem; + + /* Keep the statistics */ + sf_malloc_count++; + set_if_smaller(sf_min_adress, (void*)data); + set_if_bigger(sf_max_adress, (void*)data); + + pthread_mutex_unlock(&sf_mutex); + + TRASH_ALLOC(data, size); + return data; +} + +void *sf_realloc(void *ptr, size_t size, myf my_flags) +{ + char *data; + + if (!ptr) + return sf_malloc(size, my_flags); + + if (bad_ptr("Reallocating", ptr)) + return 0; + + if ((data= sf_malloc(size, my_flags))) + { + struct st_irem *irem= (struct st_irem *)ptr - 1; + set_if_smaller(size, irem->datasize); + memcpy(data, ptr, size); + free_memory(ptr); + } + return data; +} + +void sf_free(void *ptr) +{ + if (!ptr || bad_ptr("Freeing", ptr)) + return; + + free_memory(ptr); +} + +/** + Return size of memory block and if block is thread specific + + sf_malloc_usable_size() + @param ptr Pointer to malloced block + @param flags We will store 1 here if block is marked as MY_THREAD_SPECIFIC + otherwise 0 + + @return Size of block +*/ + +size_t sf_malloc_usable_size(void *ptr, my_bool *is_thread_specific) +{ + struct st_irem *irem= (struct st_irem *)ptr - 1; + DBUG_ENTER("sf_malloc_usable_size"); + *is_thread_specific= MY_TEST(irem->flags & MY_THREAD_SPECIFIC); + DBUG_PRINT("exit", ("size: %lu flags: %lu", (ulong) irem->datasize, + (ulong)irem->flags)); + DBUG_RETURN(irem->datasize); +} + +#ifdef HAVE_BACKTRACE +static void print_stack(void **frame) +{ + const char *err; + int i; + + if ((err= my_addr_resolve_init())) + { + fprintf(stderr, "(my_addr_resolve failure: %s)\n", err); + return; + } + + for (i=0; i < SF_REMEMBER_FRAMES && frame[i]; i++) + { + my_addr_loc loc; + if (i) + fprintf(stderr, ", "); + + if (my_addr_resolve(frame[i], &loc)) + fprintf(stderr, "%p", frame[i]); + else + fprintf(stderr, "%s:%u", loc.file, loc.line); + } + fprintf(stderr, "\n"); +} +#else +#define print_stack(X) fprintf(stderr, "???\n") +#endif + +static void free_memory(void *ptr) +{ + struct st_irem *irem= (struct st_irem *)ptr - 1; + size_t end_offset; + + if ((irem->flags & MY_THREAD_SPECIFIC) && irem->thread_id && + irem->thread_id != sf_malloc_dbug_id()) + { + fprintf(stderr, "Warning: %4lu bytes freed by T@%lu, allocated by T@%lu at ", + (ulong) irem->datasize, + (ulong) sf_malloc_dbug_id(), (ulong) irem->thread_id); + print_stack(irem->frame); + } + + pthread_mutex_lock(&sf_mutex); + /* Protect against double free at same time */ + if (irem->marker != MAGICSTART) + { + pthread_mutex_unlock(&sf_mutex); /* Allow stack trace alloc mem */ + DBUG_ASSERT(irem->marker == MAGICSTART); /* Crash */ + pthread_mutex_lock(&sf_mutex); /* Impossible, but safer */ + } + + /* Remove this structure from the linked list */ + if (irem->prev) + irem->prev->next= irem->next; + else + sf_malloc_root= irem->next; + + if (irem->next) + irem->next->prev= irem->prev; + + /* Handle the statistics */ + sf_malloc_count--; + + irem->marker= MAGICEND; /* Double free detection */ + pthread_mutex_unlock(&sf_mutex); + + /* Trash the data and magic values, but keep the stack trace */ + end_offset= sizeof(*irem) - ((char*) &irem->marker - (char*) irem); + TRASH_FREE((uchar*)(irem + 1) - end_offset, irem->datasize + 4 + end_offset); + free(irem); + return; +} + +static void warn(const char *format,...) +{ + va_list args; + DBUG_PRINT("error", ("%s", format)); + va_start(args,format); + vfprintf(stderr, format, args); + fflush(stderr); + va_end(args); + +#ifdef HAVE_BACKTRACE + { + void *frame[SF_REMEMBER_FRAMES + SF_FRAMES_SKIP]; + int frames= backtrace(frame, array_elements(frame)); + fprintf(stderr, " at "); + if (frames < SF_REMEMBER_FRAMES + SF_FRAMES_SKIP) + frame[frames]= 0; + print_stack(frame + SF_FRAMES_SKIP); + } +#endif +} + +static int bad_ptr(const char *where, void *ptr) +{ + struct st_irem *irem= (struct st_irem *)ptr - 1; + const uchar *magicend; + + if (((intptr) ptr) % sizeof(double)) + { + warn("Error: %s wrong aligned pointer", where); + return 1; + } + if (ptr < sf_min_adress || ptr > sf_max_adress) + { + warn("Error: %s pointer out of range", where); + return 1; + } + if (irem->marker != MAGICSTART) + { + DBUG_PRINT("error",("Unallocated data or underrun buffer %p", ptr)); + warn("Error: %s unallocated data or underrun buffer %p", where, ptr); + return 1; + } + + magicend= (uchar*)ptr + irem->datasize; + if (magicend[0] != MAGICEND0 || + magicend[1] != MAGICEND1 || + magicend[2] != MAGICEND2 || + magicend[3] != MAGICEND3) + { + DBUG_PRINT("error",("Overrun buffer %p", ptr)); + warn("Error: %s overrun buffer %p", where, ptr); + fprintf(stderr, "Allocated at "); + print_stack(irem->frame); + return 1; + } + + return 0; +} + +/* check all allocated memory list for consistency */ +int sf_sanity() +{ + struct st_irem *irem; + int flag= 0; + int count= 0; + + pthread_mutex_lock(&sf_mutex); + count= sf_malloc_count; + for (irem= sf_malloc_root; irem && count > 0; count--, irem= irem->next) + flag+= bad_ptr("Safemalloc", irem + 1); + pthread_mutex_unlock(&sf_mutex); + if (count || irem) + { + warn("Error: Safemalloc link list destroyed"); + flag= 1; + } + return flag; +} + +/** + report on all the memory pieces that have not been free'd + + @param id Id of thread to report. 0 if all +*/ + +void sf_report_leaked_memory(my_thread_id id) +{ + size_t total= 0; + struct st_irem *irem; + + sf_sanity(); + + /* Report on all the memory that was allocated but not free'd */ + + for (irem= sf_malloc_root; irem; irem= irem->next) + { + if (!id || (irem->thread_id == id && irem->flags & MY_THREAD_SPECIFIC)) + { + my_thread_id tid = irem->thread_id && irem->flags & MY_THREAD_SPECIFIC ? + irem->thread_id : 0; + fprintf(stderr, "Warning: %4lu bytes lost at %p, allocated by T@%llu at ", + (ulong) irem->datasize, (char*) (irem + 1), tid); + print_stack(irem->frame); + total+= irem->datasize; + } + } + if (total) + fprintf(stderr, "Memory lost: %lu bytes in %d chunks\n", + (ulong) total, sf_malloc_count); + return; +} + +static void sf_terminate() +{ + if (!sf_leaking_memory) + sf_report_leaked_memory(0); + + pthread_mutex_destroy(&sf_mutex); +} + +#endif diff --git a/mysys/stacktrace.c b/mysys/stacktrace.c new file mode 100644 index 00000000..f203bba4 --- /dev/null +++ b/mysys/stacktrace.c @@ -0,0 +1,739 @@ +/* + Copyright (c) 2001, 2011, Oracle and/or its affiliates + Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "mysys_priv.h" +#include + +#ifndef _WIN32 +#include +#include +#ifdef HAVE_STACKTRACE +#include +#include + +#ifdef __linux__ +#include /* isprint */ +#include /* SYS_gettid */ +#endif + +#if HAVE_EXECINFO_H +#include +#endif + +#ifdef HAVE_gcov +#include +#endif +/** + Default handler for printing stacktrace +*/ + +static sig_handler default_handle_fatal_signal(int sig) +{ + my_safe_printf_stderr("%s: Got signal %d. Attempting backtrace\n", + my_progname_short, sig); + my_print_stacktrace(0,0,1); +#ifndef _WIN32 + signal(sig, SIG_DFL); + kill(getpid(), sig); +#endif /* _WIN32 */ + return; +} + + +/** + Initialize priting off stacktrace at signal +*/ + +void my_setup_stacktrace(void) +{ + struct sigaction sa; + sa.sa_flags = SA_RESETHAND | SA_NODEFER; + sigemptyset(&sa.sa_mask); + sa.sa_handler= default_handle_fatal_signal; + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGABRT, &sa, NULL); +#ifdef SIGBUS + sigaction(SIGBUS, &sa, NULL); +#endif + sigaction(SIGILL, &sa, NULL); + sigaction(SIGFPE, &sa, NULL); +} + + +/* + Attempt to print a char * pointer as a string. + + SYNOPSIS + Prints either until the end of string ('\0'), or max_len characters have + been printed. + + RETURN VALUE + 0 Pointer was within the heap address space. + The string was printed fully, or until the end of the heap address space. + 1 Pointer is outside the heap address space. Printed as invalid. + + NOTE + On some systems, we can have valid pointers outside the heap address space. + This is through the use of mmap inside malloc calls. When this function + returns 1, it does not mean 100% that the pointer is corrupted. +*/ + +int my_safe_print_str(const char* val, size_t max_len) +{ + const char *orig_val= val; + if (!val) + { + my_safe_printf_stderr("%s", "(null)"); + return 1; + } + + for (; max_len; --max_len) + { + if (my_write_stderr((val++), 1) != 1) + { + if ((errno == EFAULT) &&(val == orig_val + 1)) + { + // We can not read the address from very beginning + my_safe_printf_stderr("Can't access address %p", orig_val); + } + break; + } + } + my_safe_printf_stderr("%s", "\n"); + + return 0; +} + +#if defined(HAVE_PRINTSTACK) + +/* Use Solaris' symbolic stack trace routine. */ +#include + +void my_print_stacktrace(uchar* stack_bottom __attribute__((unused)), + ulong thread_stack __attribute__((unused)), + my_bool silent) +{ + if (printstack(fileno(stderr)) == -1) + my_safe_printf_stderr("%s", + "Error when traversing the stack, stack appears corrupt.\n"); + else if (!silent) + my_safe_printf_stderr("%s", + "Please read " + "http://dev.mysql.com/doc/refman/5.1/en/resolve-stack-dump.html\n" + "and follow instructions on how to resolve the stack trace.\n" + "Resolved stack trace is much more helpful in diagnosing the\n" + "problem, so please do resolve it\n"); +} + +#elif HAVE_BACKTRACE && (HAVE_BACKTRACE_SYMBOLS || HAVE_BACKTRACE_SYMBOLS_FD) + +#if BACKTRACE_DEMANGLE + +char __attribute__ ((weak)) * +my_demangle(const char *mangled_name __attribute__((unused)), + int *status __attribute__((unused))) +{ + return NULL; +} + +static void my_demangle_symbols(char **addrs, int n) +{ + int status, i; + char *begin, *end, *demangled; + + for (i= 0; i < n; i++) + { + demangled= NULL; + begin= strchr(addrs[i], '('); + end= begin ? strchr(begin, '+') : NULL; + + if (begin && end) + { + *begin++= *end++= '\0'; + demangled= my_demangle(begin, &status); + if (!demangled || status) + { + demangled= NULL; + begin[-1]= '('; + end[-1]= '+'; + } + } + + if (demangled) + my_safe_printf_stderr("%s(%s+%s\n", addrs[i], demangled, end); + else + my_safe_printf_stderr("%s\n", addrs[i]); + } +} + +#endif /* BACKTRACE_DEMANGLE */ + +#if HAVE_MY_ADDR_RESOLVE +static int print_with_addr_resolve(void **addrs, int n) +{ + int i; + const char *err; + + if ((err= my_addr_resolve_init())) + { + my_safe_printf_stderr("(my_addr_resolve failure: %s)\n", err); + return 0; + } + + for (i= 0; i < n; i++) + { + my_addr_loc loc; + if (my_addr_resolve(addrs[i], &loc)) + backtrace_symbols_fd(addrs+i, 1, fileno(stderr)); + else + my_safe_printf_stderr("%s:%u(%s)[%p]\n", + loc.file, loc.line, loc.func, addrs[i]); + } + return 1; +} +#endif + +void my_print_stacktrace(uchar* stack_bottom, ulong thread_stack, + my_bool silent __attribute__((unused))) +{ + void *addrs[128]; + char **strings __attribute__((unused)) = NULL; + int n = backtrace(addrs, array_elements(addrs)); + my_safe_printf_stderr("stack_bottom = %p thread_stack 0x%lx\n", + stack_bottom, thread_stack); +#if HAVE_MY_ADDR_RESOLVE + if (print_with_addr_resolve(addrs, n)) + return; +#endif +#if BACKTRACE_DEMANGLE + if ((strings= backtrace_symbols(addrs, n))) + { + my_demangle_symbols(strings, n); + free(strings); + return; + } +#endif +#if HAVE_BACKTRACE_SYMBOLS_FD + backtrace_symbols_fd(addrs, n, fileno(stderr)); +#endif +} + +#elif defined(TARGET_OS_LINUX) + +#ifdef __i386__ +#define SIGRETURN_FRAME_OFFSET 17 +#endif + +#ifdef __x86_64__ +#define SIGRETURN_FRAME_OFFSET 23 +#endif + +#if defined(__alpha__) && defined(__GNUC__) +/* + The only way to backtrace without a symbol table on alpha + is to find stq fp,N(sp), and the first byte + of the instruction opcode will give us the value of N. From this + we can find where the old value of fp is stored +*/ + +#define MAX_INSTR_IN_FUNC 10000 + +inline uchar** find_prev_fp(uint32* pc, uchar** fp) +{ + int i; + for (i = 0; i < MAX_INSTR_IN_FUNC; ++i,--pc) + { + uchar* p = (uchar*)pc; + if (p[2] == 222 && p[3] == 35) + { + return (uchar**)((uchar*)fp - *(short int*)p); + } + } + return 0; +} + +inline uint32* find_prev_pc(uint32* pc, uchar** fp) +{ + int i; + for (i = 0; i < MAX_INSTR_IN_FUNC; ++i,--pc) + { + char* p = (char*)pc; + if (p[1] == 0 && p[2] == 94 && p[3] == -73) + { + uint32* prev_pc = (uint32*)*((fp+p[0]/sizeof(fp))); + return prev_pc; + } + } + return 0; +} +#endif /* defined(__alpha__) && defined(__GNUC__) */ + +void my_print_stacktrace(uchar* stack_bottom, ulong thread_stack, + my_bool silent) +{ + uchar** UNINIT_VAR(fp); + uint frame_count = 0, sigreturn_frame_count; +#if defined(__alpha__) && defined(__GNUC__) + uint32* pc; +#endif + + +#ifdef __i386__ + __asm __volatile__ ("movl %%ebp,%0" + :"=r"(fp) + :"r"(fp)); +#endif +#ifdef __x86_64__ + __asm __volatile__ ("movq %%rbp,%0" + :"=r"(fp) + :"r"(fp)); +#endif +#if defined(__alpha__) && defined(__GNUC__) + __asm __volatile__ ("mov $30,%0" + :"=r"(fp) + :"r"(fp)); +#endif + if (!fp) + { + my_safe_printf_stderr("%s", + "frame pointer is NULL, did you compile with\n" + "-fomit-frame-pointer? Aborting backtrace!\n"); + return; + } + + if (!stack_bottom || (uchar*) stack_bottom > (uchar*) &fp) + { + ulong tmp= MY_MIN(0x10000,thread_stack); + /* Assume that the stack starts at the previous even 65K */ + stack_bottom= (uchar*) (((ulong) &fp + tmp) & ~(ulong) 0xFFFF); + my_safe_printf_stderr("Cannot determine thread, fp=%p, " + "backtrace may not be correct.\n", fp); + } + if (fp > (uchar**) stack_bottom || + fp < (uchar**) stack_bottom - thread_stack) + { + my_safe_printf_stderr("Bogus stack limit or frame pointer, " + "fp=%p, stack_bottom=%p, thread_stack=%ld, " + "aborting backtrace.\n", + fp, stack_bottom, thread_stack); + return; + } + + my_safe_printf_stderr("%s", + "Stack range sanity check OK, backtrace follows:\n"); +#if defined(__alpha__) && defined(__GNUC__) + my_safe_printf_stderr("%s", + "Warning: Alpha stacks are difficult -" + "will be taking some wild guesses, stack trace may be incorrect or " + "terminate abruptly\n"); + + /* On Alpha, we need to get pc */ + __asm __volatile__ ("bsr %0, do_next; do_next: " + :"=r"(pc) + :"r"(pc)); +#endif /* __alpha__ */ + + /* We are 1 frame above signal frame with NPTL */ + sigreturn_frame_count = 1; + + while (fp < (uchar**) stack_bottom) + { +#if defined(__i386__) || defined(__x86_64__) + uchar** new_fp = (uchar**)*fp; + my_safe_printf_stderr("%p\n", + frame_count == sigreturn_frame_count ? + *(fp + SIGRETURN_FRAME_OFFSET) : *(fp + 1)); +#endif /* defined(__386__) || defined(__x86_64__) */ + +#if defined(__alpha__) && defined(__GNUC__) + uchar** new_fp = find_prev_fp(pc, fp); + if (frame_count == sigreturn_frame_count - 1) + { + new_fp += 90; + } + + if (fp && pc) + { + pc = find_prev_pc(pc, fp); + if (pc) + my_safe_printf_stderr("%p\n", pc); + else + { + my_safe_printf_stderr("%s", + "Not smart enough to deal with the rest of this stack\n"); + goto end; + } + } + else + { + my_safe_printf_stderr("%s", + "Not smart enough to deal with the rest of this stack\n"); + goto end; + } +#endif /* defined(__alpha__) && defined(__GNUC__) */ + if (new_fp <= fp ) + { + my_safe_printf_stderr("New value of fp=%p failed sanity check, " + "terminating stack trace!\n", new_fp); + goto end; + } + fp = new_fp; + ++frame_count; + } + my_safe_printf_stderr("%s", + "Stack trace seems successful - bottom reached\n"); + +end: + if (!silent) + my_safe_printf_stderr("%s", + "Please read " + "http://dev.mysql.com/doc/refman/5.1/en/resolve-stack-dump.html\n" + "and follow instructions on how to resolve the stack trace.\n" + "Resolved stack trace is much more helpful in diagnosing the\n" + "problem, so please do resolve it\n"); +} +#endif /* TARGET_OS_LINUX */ +#endif /* HAVE_STACKTRACE */ + +/* Produce a core for the thread */ +void my_write_core(int sig) +{ + signal(sig, SIG_DFL); +#ifdef HAVE_gcov + /* + For GCOV build, crashing will prevent the writing of code coverage + information from this process, causing gcov output to be incomplete. + So we force the writing of coverage information here before terminating. + */ + __gcov_dump(); +#endif + pthread_kill(pthread_self(), sig); +#if defined(P_MYID) && !defined(SCO) + /* On Solaris, the above kill is not enough */ + sigsend(P_PID,P_MYID,sig); +#endif +} + +#else /* _WIN32*/ + +#ifdef _MSC_VER +/* Silence warning in OS header dbghelp.h */ +#pragma warning(push) +#pragma warning(disable : 4091) +#endif + +#include + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#include +#include +#if _MSC_VER +#pragma comment(lib, "dbghelp") +#endif + +static EXCEPTION_POINTERS *exception_ptrs; + +#define MODULE64_SIZE_WINXP 576 +#define STACKWALK_MAX_FRAMES 64 + +void my_set_exception_pointers(EXCEPTION_POINTERS *ep) +{ + exception_ptrs = ep; +} + +/* + Appends directory to symbol path. +*/ +static void add_to_symbol_path(char *path, size_t path_buffer_size, + char *dir, size_t dir_buffer_size) +{ + strcat_s(dir, dir_buffer_size, ";"); + if (!strstr(path, dir)) + { + strcat_s(path, path_buffer_size, dir); + } +} + +/* + Get symbol path - semicolon-separated list of directories to search + for debug symbols. We expect PDB in the same directory as + corresponding exe or dll, so the path is build from directories of + the loaded modules. If environment variable _NT_SYMBOL_PATH is set, + it's value appended to the symbol search path +*/ +static void get_symbol_path(char *path, size_t size) +{ + HANDLE hSnap; + char *envvar; + char *p; +#ifndef DBUG_OFF + static char pdb_debug_dir[MAX_PATH + 7]; +#endif + + path[0]= '\0'; + +#ifndef DBUG_OFF + /* + Add "debug" subdirectory of the application directory, sometimes PDB will + placed here by installation. + */ + GetModuleFileName(NULL, pdb_debug_dir, MAX_PATH); + p= strrchr(pdb_debug_dir, '\\'); + if(p) + { + *p= 0; + strcat_s(pdb_debug_dir, sizeof(pdb_debug_dir), "\\debug;"); + add_to_symbol_path(path, size, pdb_debug_dir, sizeof(pdb_debug_dir)); + } +#endif + + /* + Enumerate all modules, and add their directories to the path. + Avoid duplicate entries. + */ + hSnap= CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId()); + if (hSnap != INVALID_HANDLE_VALUE) + { + BOOL ret; + MODULEENTRY32 mod; + mod.dwSize= sizeof(MODULEENTRY32); + for (ret= Module32First(hSnap, &mod); ret; ret= Module32Next(hSnap, &mod)) + { + char *module_dir= mod.szExePath; + p= strrchr(module_dir,'\\'); + if (!p) + { + /* + Path separator was not found. Not known to happen, if ever happens, + will indicate current directory. + */ + module_dir[0]= '.'; + module_dir[1]= '\0'; + } + else + { + *p= '\0'; + } + add_to_symbol_path(path, size, module_dir,sizeof(mod.szExePath)); + } + CloseHandle(hSnap); + } + + + /* Add _NT_SYMBOL_PATH, if present. */ + envvar= getenv("_NT_SYMBOL_PATH"); + if(envvar) + { + strcat_s(path, size, envvar); + } +} + +#define MAX_SYMBOL_PATH 32768 + +/* Platform SDK in VS2003 does not have definition for SYMOPT_NO_PROMPTS*/ +#ifndef SYMOPT_NO_PROMPTS +#define SYMOPT_NO_PROMPTS 0 +#endif + +void my_print_stacktrace(uchar* unused1, ulong unused2, my_bool silent) +{ + HANDLE hProcess= GetCurrentProcess(); + HANDLE hThread= GetCurrentThread(); + static IMAGEHLP_MODULE64 module= {sizeof(module)}; + static IMAGEHLP_SYMBOL64_PACKAGE package; + DWORD64 addr; + DWORD machine; + int i; + CONTEXT context; + STACKFRAME64 frame={0}; + static char symbol_path[MAX_SYMBOL_PATH]; + + if(!exception_ptrs) + return; + + /* Copy context, as stackwalking on original will unwind the stack */ + context = *(exception_ptrs->ContextRecord); + /*Initialize symbols.*/ + SymSetOptions(SYMOPT_LOAD_LINES|SYMOPT_NO_PROMPTS|SYMOPT_DEFERRED_LOADS|SYMOPT_DEBUG); + get_symbol_path(symbol_path, sizeof(symbol_path)); + SymInitialize(hProcess, symbol_path, TRUE); + + /*Prepare stackframe for the first StackWalk64 call*/ + frame.AddrFrame.Mode= frame.AddrPC.Mode= frame.AddrStack.Mode= AddrModeFlat; +#if (defined _M_IX86) + machine= IMAGE_FILE_MACHINE_I386; + frame.AddrFrame.Offset= context.Ebp; + frame.AddrPC.Offset= context.Eip; + frame.AddrStack.Offset= context.Esp; +#elif (defined _M_X64) + machine = IMAGE_FILE_MACHINE_AMD64; + frame.AddrFrame.Offset= context.Rbp; + frame.AddrPC.Offset= context.Rip; + frame.AddrStack.Offset= context.Rsp; +#elif defined(_M_ARM64) + machine= IMAGE_FILE_MACHINE_ARM64; + frame.AddrFrame.Offset= context.Fp; + frame.AddrPC.Offset= context.Pc; + frame.AddrStack.Offset= context.Sp; +#else +#pragma error ("unsupported architecture") +#endif + + package.sym.SizeOfStruct= sizeof(package.sym); + package.sym.MaxNameLength= sizeof(package.name); + + /*Walk the stack, output useful information*/ + for(i= 0; i< STACKWALK_MAX_FRAMES;i++) + { + DWORD64 function_offset= 0; + DWORD line_offset= 0; + IMAGEHLP_LINE64 line= {sizeof(line)}; + BOOL have_module= FALSE; + BOOL have_symbol= FALSE; + BOOL have_source= FALSE; + + if(!StackWalk64(machine, hProcess, hThread, &frame, &context, 0, 0, 0 ,0)) + break; + addr= frame.AddrPC.Offset; + + have_module= SymGetModuleInfo64(hProcess,addr,&module); +#ifdef _M_IX86 + if(!have_module) + { + /* + ModuleInfo structure has been "compatibly" extended in + releases after XP, and its size was increased. To make XP + dbghelp.dll function happy, pretend passing the old structure. + */ + module.SizeOfStruct= MODULE64_SIZE_WINXP; + have_module= SymGetModuleInfo64(hProcess, addr, &module); + } +#endif + + have_symbol= SymGetSymFromAddr64(hProcess, addr, &function_offset, + &(package.sym)); + have_source= SymGetLineFromAddr64(hProcess, addr, &line_offset, &line); + + if(have_module) + { + const char *base_image_name= my_basename(module.ImageName); + my_safe_printf_stderr("%s!", base_image_name); + } + if(have_symbol) + my_safe_printf_stderr("%s()", package.sym.Name); + + else if(have_module) + my_safe_printf_stderr("%s", "???"); + + if(have_source) + { + const char *base_file_name= my_basename(line.FileName); + my_safe_printf_stderr("[%s:%lu]", + base_file_name, line.LineNumber); + } + my_safe_printf_stderr("%s", "\n"); + } +} + + +/* + Write dump. The dump is created in current directory, + file name is constructed from executable name plus + ".dmp" extension +*/ +void my_write_core(int unused) +{ + char path[MAX_PATH]; + char dump_fname[MAX_PATH]= "core.dmp"; + MINIDUMP_EXCEPTION_INFORMATION info; + HANDLE hFile; + + if(!exception_ptrs) + return; + + info.ExceptionPointers= exception_ptrs; + info.ClientPointers= FALSE; + info.ThreadId= GetCurrentThreadId(); + + if(GetModuleFileName(NULL, path, sizeof(path))) + { + _splitpath(path, NULL, NULL,dump_fname,NULL); + strcat_s(dump_fname, sizeof(dump_fname), ".dmp"); + } + + hFile= CreateFile(dump_fname, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, 0); + if(hFile) + { + /* Create minidump */ + if(MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), + hFile, MiniDumpNormal, &info, 0, 0)) + { + my_safe_printf_stderr("Minidump written to %s\n", + _fullpath(path, dump_fname, sizeof(path)) ? + path : dump_fname); + } + else + { + my_safe_printf_stderr("MiniDumpWriteDump() failed, last error %u\n", + (uint) GetLastError()); + } + CloseHandle(hFile); + } + else + { + my_safe_printf_stderr("CreateFile(%s) failed, last error %u\n", + dump_fname, (uint) GetLastError()); + } +} + + +int my_safe_print_str(const char *val, size_t len) +{ + __try + { + my_write_stderr(val, len); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + my_safe_printf_stderr("%s", "is an invalid string pointer"); + } + return 0; +} +#endif /*_WIN32*/ + + +size_t my_write_stderr(const void *buf, size_t count) +{ + return (size_t) write(fileno(stderr), buf, (uint)count); +} + + +size_t my_safe_printf_stderr(const char* fmt, ...) +{ + char to[512]; + size_t result; + va_list args; + va_start(args,fmt); + result= my_vsnprintf(to, sizeof(to), fmt, args); + va_end(args); + my_write_stderr(to, result); + return result; +} diff --git a/mysys/string.c b/mysys/string.c new file mode 100644 index 00000000..91e4306c --- /dev/null +++ b/mysys/string.c @@ -0,0 +1,229 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Code for handling strings with can grow dynamicly. + Copyright Monty Program KB. + By monty. +*/ + +#include "mysys_priv.h" +#include + +my_bool init_dynamic_string(DYNAMIC_STRING *str, const char *init_str, + size_t init_alloc, size_t alloc_increment) +{ + size_t length; + DBUG_ENTER("init_dynamic_string"); + + if (!alloc_increment) + alloc_increment=128; + length=1; + if (init_str && (length= strlen(init_str)+1) < init_alloc) + init_alloc=((length+alloc_increment-1)/alloc_increment)*alloc_increment; + if (!init_alloc) + init_alloc=alloc_increment; + + if (!(str->str=(char*) my_malloc(key_memory_DYNAMIC_STRING, + init_alloc, MYF(MY_WME)))) + DBUG_RETURN(TRUE); + str->length=length-1; + if (init_str) + memcpy(str->str,init_str,length); + str->max_length=init_alloc; + str->alloc_increment=alloc_increment; + DBUG_RETURN(FALSE); +} + + +my_bool dynstr_set(DYNAMIC_STRING *str, const char *init_str) +{ + uint length=0; + DBUG_ENTER("dynstr_set"); + + if (init_str && (length= (uint) strlen(init_str)+1) > str->max_length) + { + str->max_length=((length+str->alloc_increment-1)/str->alloc_increment)* + str->alloc_increment; + if (!str->max_length) + str->max_length=str->alloc_increment; + if (!(str->str=(char*) my_realloc(key_memory_DYNAMIC_STRING, + str->str, str->max_length, MYF(MY_WME)))) + DBUG_RETURN(TRUE); + } + if (init_str) + { + str->length=length-1; + memcpy(str->str,init_str,length); + } + else + str->length=0; + DBUG_RETURN(FALSE); +} + + +my_bool dynstr_realloc(DYNAMIC_STRING *str, size_t additional_size) +{ + DBUG_ENTER("dynstr_realloc"); + + if (!additional_size) DBUG_RETURN(FALSE); + if (str->length + additional_size > str->max_length) + { + str->max_length=((str->length + additional_size+str->alloc_increment-1)/ + str->alloc_increment)*str->alloc_increment; + if (!(str->str=(char*) my_realloc(key_memory_DYNAMIC_STRING, str->str, + str->max_length, MYF(MY_WME)))) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +my_bool dynstr_append(DYNAMIC_STRING *str, const char *append) +{ + return dynstr_append_mem(str,append,(uint) strlen(append)); +} + + +my_bool dynstr_append_mem(DYNAMIC_STRING *str, const char *append, + size_t length) +{ + char *new_ptr; + DBUG_ENTER("dynstr_append_mem"); + if (str->length+length >= str->max_length) + { + size_t new_length=(str->length+length+str->alloc_increment)/ + str->alloc_increment; + new_length*=str->alloc_increment; + if (!(new_ptr=(char*) my_realloc(key_memory_DYNAMIC_STRING, str->str, + new_length, MYF(MY_WME)))) + DBUG_RETURN(TRUE); + str->str=new_ptr; + str->max_length=new_length; + } + memcpy(str->str + str->length,append,length); + str->length+=length; + str->str[str->length]=0; /* Safety for C programs */ + DBUG_RETURN(FALSE); +} + + +my_bool dynstr_trunc(DYNAMIC_STRING *str, size_t n) +{ + str->length-=n; + str->str[str->length]= '\0'; + return FALSE; +} + +/* + Concatenates any number of strings, escapes any OS quote in the result then + surround the whole affair in another set of quotes which is finally appended + to specified DYNAMIC_STRING. This function is especially useful when + building strings to be executed with the system() function. + + @param str Dynamic String which will have addtional strings appended. + @param append String to be appended. + @param ... Optional. Additional string(s) to be appended. + + @note The final argument in the list must be NullS even if no additional + options are passed. + + @return True = Success. +*/ + +my_bool dynstr_append_os_quoted(DYNAMIC_STRING *str, const char *append, ...) +{ +#ifdef _WIN32 + LEX_CSTRING quote= { C_STRING_WITH_LEN("\"") }; + LEX_CSTRING replace= { C_STRING_WITH_LEN("\\\"") }; +#else + LEX_CSTRING quote= { C_STRING_WITH_LEN("\'") }; + LEX_CSTRING replace= { C_STRING_WITH_LEN("'\"'\"'") }; +#endif /* _WIN32 */ + my_bool ret= TRUE; + va_list dirty_text; + + ret&= dynstr_append_mem(str, quote.str, quote.length); /* Leading quote */ + va_start(dirty_text, append); + while (append != NullS) + { + const char *cur_pos= append; + const char *next_pos= cur_pos; + + /* Search for quote in each string and replace with escaped quote */ + while(*(next_pos= strcend(cur_pos, quote.str[0])) != '\0') + { + ret&= dynstr_append_mem(str, cur_pos, (uint) (next_pos - cur_pos)); + ret&= dynstr_append_mem(str, replace.str, replace.length); + cur_pos= next_pos + 1; + } + ret&= dynstr_append_mem(str, cur_pos, (uint) (next_pos - cur_pos)); + append= va_arg(dirty_text, char *); + } + va_end(dirty_text); + ret&= dynstr_append_mem(str, quote.str, quote.length); /* Trailing quote */ + + return ret; +} + +my_bool dynstr_append_quoted(DYNAMIC_STRING *str, + const char *append, size_t len, + char quote) +{ + size_t additional= (str->alloc_increment ? str->alloc_increment : 10); + size_t lim= additional; + size_t i; + if (dynstr_realloc(str, len + additional + 2)) + return TRUE; + str->str[str->length++]= quote; + for (i= 0; i < len; i++) + { + register char c= append[i]; + if (c == quote || c == '\\') + { + if (!lim) + { + if (dynstr_realloc(str, additional)) + return TRUE; + lim= additional; + } + lim--; + str->str[str->length++]= '\\'; + } + str->str[str->length++]= c; + } + str->str[str->length++]= quote; + return FALSE; +} + + +void dynstr_free(DYNAMIC_STRING *str) +{ + if (str->str) /* Safety to allow double free */ + my_free(str->str); + str->str= NULL; +} + + +/* Give over the control of the dynamic string to caller */ + +void dynstr_reassociate(DYNAMIC_STRING *str, char **ptr, size_t *length, + size_t *alloc_length) +{ + *ptr= str->str; + *length= str->length; + *alloc_length= str->max_length; + str->str=0; +} diff --git a/mysys/test_charset.c b/mysys/test_charset.c new file mode 100644 index 00000000..133a5ca1 --- /dev/null +++ b/mysys/test_charset.c @@ -0,0 +1,81 @@ +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include + +#include + +static void _print_array(uint8 *data, uint size) +{ + uint i; + for (i = 0; i < size; ++i) + { + if (i == 0 || i % 16 == size % 16) printf(" "); + printf(" %02x", data[i]); + if ((i+1) % 16 == size % 16) printf("\n"); + } +} + +static void _print_csinfo(CHARSET_INFO *cs) +{ + printf("%s #%d\n", cs->name, cs->number); + printf("ctype:\n"); _print_array(cs->ctype, 257); + printf("to_lower:\n"); _print_array(cs->to_lower, 256); + printf("to_upper:\n"); _print_array(cs->to_upper, 256); + printf("sort_order:\n"); _print_array(cs->sort_order, 256); + printf("collate: %3s (%d, %p, %p, %p)\n", + cs->strxfrm_multiply ? "yes" : "no", + cs->strxfrm_multiply, + cs->strnncoll, + cs->strnxfrm, + cs->like_range); + printf("multi-byte: %3s (%d, %p, %p, %p)\n", + cs->mbmaxlen > 1 ? "yes" : "no", + cs->mbmaxlen, + cs->ismbchar, + cs->ismbhead, + cs->mbcharlen); +} + + +int main(int argc, char **argv) { + const char *the_set = MYSQL_CHARSET; + char *cs_list; + int argcnt = 1; + CHARSET_INFO *cs; + + my_init(); + + if (argc > argcnt && argv[argcnt][0] == '-' && argv[argcnt][1] == '#') + DBUG_PUSH(argv[argcnt++]+2); + + if (argc > argcnt) + the_set = argv[argcnt++]; + + if (argc > argcnt) + charsets_dir = argv[argcnt++]; + + if (!(cs= get_charset_by_name(the_set, MYF(MY_UTF8_IS_UTF8MB3 | MY_WME)))) + return 1; + + puts("CHARSET INFO:"); + _print_csinfo(cs); + fflush(stdout); + + return 0; +} diff --git a/mysys/test_dir.c b/mysys/test_dir.c new file mode 100644 index 00000000..00b0c778 --- /dev/null +++ b/mysys/test_dir.c @@ -0,0 +1,48 @@ +/* Copyright (c) 2000, 2006 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* TODO: Test all functions */ + +#include "mysys_priv.h" +#include "my_dir.h" + +int main(int argc, char *argv[]) +{ + MY_DIR *a; + uint f; + DBUG_ENTER ("main"); + DBUG_PROCESS (argv[0]); + + if (--argc > 0 && (*(++argv))[0] == '-' && (*argv)[1] == '#' ) + DBUG_PUSH (*argv+2); + + a = my_dir("./", 0); + for (f = 0; f < a->number_off_files; f++) + { + printf("%s\n", a->dir_entry[f].name); + } + + a = my_dir("./", MY_WANT_STAT); + for (f = 0; f < a->number_off_files; f++) + { + printf("%s %d %d %d %s\n", a->dir_entry[f].name, + (int) a->dir_entry[f].mystat.st_size, + (int) a->dir_entry[f].mystat.st_uid, + (int) a->dir_entry[f].mystat.st_gid, + S_ISDIR(a->dir_entry[f].mystat.st_mode) ? "dir" : ""); + } + return 0; +} diff --git a/mysys/test_thr_mutex.c b/mysys/test_thr_mutex.c new file mode 100644 index 00000000..00b1f5f4 --- /dev/null +++ b/mysys/test_thr_mutex.c @@ -0,0 +1,162 @@ +/* Copyright (C) 2008 Sun Microsystems, Inc + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Testing of deadlock detector */ + +#include +#include + + +int main(int argc __attribute__((unused)), char** argv) +{ + pthread_mutex_t LOCK_A, LOCK_B, LOCK_C, LOCK_D, LOCK_E, LOCK_F, LOCK_G; + pthread_mutex_t LOCK_H, LOCK_I; + MY_INIT(argv[0]); + DBUG_ENTER("main"); + + DBUG_PUSH("d:t:O,/tmp/trace"); + printf("This program is testing the mutex deadlock detection.\n" + "It should print out different failures of wrong mutex usage" + "on stderr\n\n"); + + safe_mutex_deadlock_detector= 1; + pthread_mutex_init(&LOCK_A, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_B, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_C, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_D, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_E, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_F, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_G, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_H, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_I, MY_MUTEX_INIT_FAST); + + printf("Testing A->B and B->A\n"); + fflush(stdout); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + /* Test different (wrong) lock order */ + pthread_mutex_lock(&LOCK_B); + pthread_mutex_lock(&LOCK_A); /* Should give warning */ + + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + /* Check that we don't get another warning for same lock */ + printf("Testing A->B and B->A again (should not give a warning)\n"); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + /* + Test of ring with many mutex + We also unlock mutex in different orders to get the unlock code properly + tested. + */ + printf("Testing A->C and C->D and D->A\n"); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_C); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_lock(&LOCK_D); + pthread_mutex_unlock(&LOCK_D); + pthread_mutex_unlock(&LOCK_C); + + pthread_mutex_lock(&LOCK_D); + pthread_mutex_lock(&LOCK_A); /* Should give warning */ + + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_D); + + printf("Testing E -> F ; H -> I ; F -> H ; H -> I -> E\n"); + fflush(stdout); + + pthread_mutex_lock(&LOCK_E); + pthread_mutex_lock(&LOCK_F); + pthread_mutex_unlock(&LOCK_E); + pthread_mutex_unlock(&LOCK_F); + pthread_mutex_lock(&LOCK_H); + pthread_mutex_lock(&LOCK_I); + pthread_mutex_unlock(&LOCK_I); + pthread_mutex_unlock(&LOCK_H); + pthread_mutex_lock(&LOCK_F); + pthread_mutex_lock(&LOCK_H); + pthread_mutex_unlock(&LOCK_H); + pthread_mutex_unlock(&LOCK_F); + + pthread_mutex_lock(&LOCK_H); + pthread_mutex_lock(&LOCK_I); + pthread_mutex_lock(&LOCK_E); /* Should give warning */ + + pthread_mutex_unlock(&LOCK_E); + pthread_mutex_unlock(&LOCK_I); + pthread_mutex_unlock(&LOCK_H); + + printf("\nFollowing shouldn't give any warnings\n"); + printf("Testing A->B and B->A without deadlock detection\n"); + fflush(stdout); + + /* Reinitialize mutex to get rid of old wrong usage markers */ + pthread_mutex_destroy(&LOCK_A); + pthread_mutex_destroy(&LOCK_B); + pthread_mutex_init(&LOCK_A, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_B, MY_MUTEX_INIT_FAST); + + /* Start testing */ + my_pthread_mutex_lock(&LOCK_A, MYF(MYF_NO_DEADLOCK_DETECTION)); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + pthread_mutex_lock(&LOCK_A); + my_pthread_mutex_lock(&LOCK_B, MYF(MYF_NO_DEADLOCK_DETECTION)); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + printf("Testing A -> C ; B -> C ; A->B\n"); + fflush(stdout); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_unlock(&LOCK_C); + pthread_mutex_unlock(&LOCK_A); + + pthread_mutex_lock(&LOCK_B); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_unlock(&LOCK_C); + pthread_mutex_unlock(&LOCK_B); + + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_unlock(&LOCK_B); + pthread_mutex_unlock(&LOCK_A); + + /* Cleanup */ + pthread_mutex_destroy(&LOCK_A); + pthread_mutex_destroy(&LOCK_B); + pthread_mutex_destroy(&LOCK_C); + pthread_mutex_destroy(&LOCK_D); + pthread_mutex_destroy(&LOCK_E); + pthread_mutex_destroy(&LOCK_F); + pthread_mutex_destroy(&LOCK_G); + pthread_mutex_destroy(&LOCK_H); + pthread_mutex_destroy(&LOCK_I); + + my_end(MY_DONT_FREE_DBUG); + exit(0); +} diff --git a/mysys/test_xml.c b/mysys/test_xml.c new file mode 100644 index 00000000..ac106da6 --- /dev/null +++ b/mysys/test_xml.c @@ -0,0 +1,105 @@ +/* Copyright (c) 2000, 2002 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include +#include "my_xml.h" + +static void mstr(char *str,const char *src,uint l1,uint l2) +{ + l1 = l1 +#include +#include +#include + +#define MAX_RECORDS 100000 +#define MAX_KEYS 3 + +static int get_options(int argc, char *argv[]); +static int do_test(); +static int rnd(int max_value); + +static uint testflag=0,recant=10000,reclength=37; +static uint16 key1[1000]; + +#ifdef DBUG_OFF +#define hash_check(A) 0 +#else +my_bool hash_check(HASH *hash); +#endif + +void free_record(void *record); + +static uchar *hash2_key(const uchar *rec,uint *length, + my_bool not_used __attribute__((unused))) +{ + *length=(uint) (uchar) rec[reclength-1]; + return (uchar*) rec; +} + +/* main program */ + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + DBUG_PROCESS(argv[0]); + + get_options(argc,argv); + + exit(do_test()); +} + +static int do_test() +{ + register uint i,j; + uint n1,n2,n3; + uint write_count,update,delete; + ulong pos; + unsigned long key_check; + char *record,*recpos,oldrecord[120],key[10]; + HASH hash,hash2; + DBUG_ENTER("do_test"); + + write_count=update=delete=0; + key_check=0; + bzero((char*) key1,sizeof(key1[0])*1000); + + printf("- Creating hash\n"); + if (hash_init(&hash, default_charset_info, recant/2, 0, 6, 0, free_record, 0)) + goto err; + printf("- Writing records:\n"); + + for (i=0 ; i < recant ; i++) + { + n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*5,MAX_RECORDS)); + record= (char*) my_malloc(reclength,MYF(MY_FAE)); + sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count); + if (my_hash_insert(&hash,record)) + { + printf("Error: %d in write at record: %d\n",my_errno,i); + goto err; + } + key1[n1]++; + key_check+=n1; + write_count++; + } + if (hash_check(&hash)) + { + puts("Heap keys crashed"); + goto err; + } + printf("- Delete\n"); + for (i=0 ; i < write_count/10 ; i++) + { + for (j=rnd(1000) ; j>0 && key1[j] == 0 ; j--) ; + if (j != 0) + { + sprintf(key,"%6d",j); + if (!(recpos=hash_search(&hash,key,0))) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + key1[atoi(recpos)]--; + key_check-=atoi(recpos); + memcpy(oldrecord,recpos,reclength); + if (hash_delete(&hash,recpos)) + { + printf("error: %d; can't delete record: \"%s\"\n", my_errno,oldrecord); + goto err; + } + delete++; + if (testflag == 2 && hash_check(&hash)) + { + puts("Heap keys crashed"); + goto err; + } + } + } + if (hash_check(&hash)) + { + puts("Hash keys crashed"); + goto err; + } + + printf("- Update\n"); + for (i=0 ; i < write_count/10 ; i++) + { + n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*2,MAX_RECORDS)); + for (j=rnd(1000) ; j>0 && key1[j] == 0 ; j--) ; + if (j) + { + sprintf(key,"%6d",j); + if (!(recpos=hash_search(&hash,key,0))) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + key1[atoi(recpos)]--; + key_check=key_check-atoi(recpos)+n1; + key1[n1]++; + sprintf(recpos,"%6d:%4d:%8d:XXX: %4d ",n1,n2,n3,update); + update++; + if (hash_update(&hash,recpos,key,0)) + { + printf("can't update key1: \"%s\"\n",key); + goto err; + } + if (testflag == 3 && hash_check(&hash)) + { + printf("Heap keys crashed for %d update\n",update); + goto err; + } + } + } + if (hash_check(&hash)) + { + puts("Heap keys crashed"); + goto err; + } + + for (j=0 ; j < 1000 ; j++) + if (key1[j] > 1) + break; + // j will be 1000 only if we have no keys in the hash. This only happens + // when the parameter recant is set to 0 via command line argument. + if (j < 1000 && key1[j] > 1) + { + HASH_SEARCH_STATE state; + printf("- Testing identical read\n"); + sprintf(key,"%6d",j); + pos=1; + if (!(recpos= hash_first(&hash, key, 0, &state))) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + while (hash_next(&hash, key, 0, &state) && pos < (ulong) (key1[j]+10)) + pos++; + if (pos != (ulong) key1[j]) + { + printf("Found %ld copies of key: %s. Should be %d",pos,key,key1[j]); + goto err; + } + } + printf("- Creating output heap-file 2\n"); + if (hash_init(&hash2, default_charset_info, hash.records, 0, 0, hash2_key, free_record,0)) + goto err; + + printf("- Copying and removing records\n"); + pos=0; + while ((recpos=hash_element(&hash,0))) + { + record=(uchar*) my_malloc(reclength,MYF(MY_FAE)); + memcpy(record,recpos,reclength); + record[reclength-1]=rnd(5)+1; + if (my_hash_insert(&hash2,record)) + { + printf("Got error when inserting record: %*s",reclength,record); + goto err; + } + key_check-=atoi(record); + write_count++; + if (hash_delete(&hash,recpos)) + { + printf("Got error when deleting record: %*s",reclength,recpos); + goto err; + } + if (testflag==4) + { + if (hash_check(&hash) || hash_check(&hash2)) + { + puts("Hash keys crashed"); + goto err; + } + } + pos++; + } + if (hash_check(&hash) || hash_check(&hash2)) + { + puts("Hash keys crashed"); + goto err; + } + if (key_check != 0) + { + printf("Key check didn't get to 0 (%ld)\n",key_check); + } + + printf("\nFollowing test have been made:\n"); + printf("Write records: %d\nUpdate records: %d\nDelete records: %d\n", write_count, + update,delete); + hash_free(&hash); hash_free(&hash2); + my_end(MY_GIVE_INFO); + DBUG_RETURN(0); +err: + printf("Got error: %d when using hashing\n",my_errno); + DBUG_RETURN(-1); +} /* main */ + + +/* read options */ +/* NOTE! DBUG not initialised - no debugging here! */ + +static int get_options(int argc, char **argv) +{ + char *pos,*progname; + + progname= argv[0]; + + while (--argc >0 && *(pos = *(++argv)) == '-' ) { + switch(*++pos) { + case 'm': /* records */ + recant=atoi(++pos); + break; + case 't': + testflag=atoi(++pos); /* testmod */ + break; + case 'V': + case 'I': + case '?': + printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + printf("MySQL AB, by Monty\n\n"); + printf("Usage: %s [-?ABIKLWv] [-m#] [-t#]\n",progname); + exit(0); + case '#': + DBUG_PUSH (++pos); + break; + } + } + return 0; +} /* get_options */ + + +/* Get a random number in the interval 0 <= x <= n */ + +static int rnd(int max_value) +{ + return (int) ((rand() & 32767)/32767.0*max_value); +} /* rnd */ + + +void free_record(void *record) +{ + my_free(record); +} diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c new file mode 100644 index 00000000..b98775e1 --- /dev/null +++ b/mysys/thr_alarm.c @@ -0,0 +1,845 @@ +/* Copyright (c) 2000, 2013, Oracle and/or its affiliates + Copyright (c) 2012, 2014, SkySQL Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* To avoid problems with alarms in debug code, we disable DBUG here */ +#define FORCE_DBUG_OFF +#include "mysys_priv.h" +#include + +#if !defined(DONT_USE_THR_ALARM) +#include +#include +#include +#include +#include +#include +#include "thr_alarm.h" + +#ifdef HAVE_SYS_SELECT_H +#include /* AIX needs this for fd_set */ +#endif + +#ifndef ETIME +#define ETIME ETIMEDOUT +#endif + +#ifdef DBUG_OFF +#define reset_index_in_queue(alarm_data) +#else +#define reset_index_in_queue(alarm_data) alarm_data->index_in_queue= 0; +#endif /* DBUG_OFF */ + +#ifndef USE_ONE_SIGNAL_HAND +#define one_signal_hand_sigmask(A,B,C) pthread_sigmask((A), (B), (C)) +#else +#define one_signal_hand_sigmask(A,B,C) +#endif + +my_bool thr_alarm_inited= 0, my_disable_thr_alarm= 0; + +#if !defined(_WIN32) + +uint thr_client_alarm; +static int alarm_aborted=1; /* No alarm thread */ +volatile my_bool alarm_thread_running= 0; +time_t next_alarm_expire_time= ~ (time_t) 0; +static sig_handler process_alarm_part2(int sig); + +static mysql_mutex_t LOCK_alarm; +static mysql_cond_t COND_alarm; +static sigset_t full_signal_set; +static QUEUE alarm_queue; +static uint max_used_alarms=0; +pthread_t alarm_thread; + +#define MY_THR_ALARM_QUEUE_EXTENT 10 + +#ifdef USE_ALARM_THREAD +static void *alarm_handler(void *arg); +#define reschedule_alarms() mysql_cond_signal(&COND_alarm) +#else +#define reschedule_alarms() pthread_kill(alarm_thread,THR_SERVER_ALARM) +#endif + +static sig_handler thread_alarm(int sig __attribute__((unused))); + +static int compare_ulong(void *not_used __attribute__((unused)), + uchar *a_ptr,uchar* b_ptr) +{ + ulong a=*((ulong*) a_ptr),b= *((ulong*) b_ptr); + return (a < b) ? -1 : (a == b) ? 0 : 1; +} + +void init_thr_alarm(uint max_alarms) +{ + sigset_t s; + DBUG_ENTER("init_thr_alarm"); + alarm_aborted=0; + next_alarm_expire_time= ~ (time_t) 0; + init_queue(&alarm_queue, max_alarms+1, offsetof(ALARM,expire_time), 0, + compare_ulong, NullS, offsetof(ALARM, index_in_queue)+1, + MY_THR_ALARM_QUEUE_EXTENT); + sigfillset(&full_signal_set); /* Neaded to block signals */ + mysql_mutex_init(key_LOCK_alarm, &LOCK_alarm, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_alarm, &COND_alarm, NULL); + thr_client_alarm= SIGUSR1; + my_sigset(thr_client_alarm, thread_alarm); + sigemptyset(&s); + sigaddset(&s, THR_SERVER_ALARM); + alarm_thread=pthread_self(); +#if defined(USE_ALARM_THREAD) + { + pthread_attr_t thr_attr; + pthread_attr_init(&thr_attr); + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_PROCESS); + pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + mysql_thread_create(key_thread_alarm, + &alarm_thread, &thr_attr, alarm_handler, NULL); + pthread_attr_destroy(&thr_attr); + } +#elif defined(USE_ONE_SIGNAL_HAND) + pthread_sigmask(SIG_BLOCK, &s, NULL); /* used with sigwait() */ +#else + my_sigset(THR_SERVER_ALARM, process_alarm); + pthread_sigmask(SIG_UNBLOCK, &s, NULL); +#endif + DBUG_VOID_RETURN; +} + + +void resize_thr_alarm(uint max_alarms) +{ + mysql_mutex_lock(&LOCK_alarm); + /* + It's ok not to shrink the queue as there may be more pending alarms than + than max_alarms + */ + if (alarm_queue.elements < max_alarms) + { + resize_queue(&alarm_queue,max_alarms+1); + max_used_alarms= alarm_queue.elements; + } + mysql_mutex_unlock(&LOCK_alarm); +} + + +/* + Request alarm after sec seconds. + + SYNOPSIS + thr_alarm() + alrm Pointer to alarm detection + alarm_data Structure to store in alarm queue + + NOTES + This function can't be called from the alarm-handling thread. + + RETURN VALUES + 0 ok + 1 If no more alarms are allowed (aborted by process) + + Stores in first argument a pointer to a non-zero int which is set to 0 + when the alarm has been given +*/ + +my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data) +{ + time_t now, next; +#ifndef USE_ONE_SIGNAL_HAND + sigset_t old_mask; +#endif + my_bool reschedule; + struct st_my_thread_var *current_my_thread_var= my_thread_var; + DBUG_ENTER("thr_alarm"); + DBUG_PRINT("enter",("thread: %s sec: %d",my_thread_name(),sec)); + + if (my_disable_thr_alarm) + { + (*alrm)= &alarm_data->alarmed; + alarm_data->alarmed= 1; /* Abort if interrupted */ + DBUG_RETURN(0); + } + + if (unlikely(alarm_aborted)) + { /* No signal thread */ + DBUG_PRINT("info", ("alarm aborted")); + if (alarm_aborted > 0) + goto abort_no_unlock; + sec= 1; /* Abort mode */ + } + + now= my_time(0); + if (!alarm_data) + { + if (!(alarm_data=(ALARM*) my_malloc(PSI_INSTRUMENT_ME, sizeof(ALARM), + MYF(MY_WME)))) + goto abort_no_unlock; + alarm_data->malloced= 1; + } + else + alarm_data->malloced= 0; + next= now + sec; + alarm_data->expire_time= next; + alarm_data->alarmed= 0; + alarm_data->thread= current_my_thread_var->pthread_self; + alarm_data->thread_id= current_my_thread_var->id; + + one_signal_hand_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); + mysql_mutex_lock(&LOCK_alarm); /* Lock from threads & alarms */ + if (alarm_queue.elements >= max_used_alarms) + { + max_used_alarms=alarm_queue.elements+1; + } + reschedule= (ulong) next_alarm_expire_time > (ulong) next; + queue_insert_safe(&alarm_queue, (uchar*) alarm_data); + assert(alarm_data->index_in_queue > 0); + + /* Reschedule alarm if the current one has more than sec left */ + if (unlikely(reschedule)) + { + DBUG_PRINT("info", ("reschedule")); + if (pthread_equal(pthread_self(),alarm_thread)) + { + alarm(sec); /* purecov: inspected */ + next_alarm_expire_time= next; + } + else + reschedule_alarms(); /* Reschedule alarms */ + } + mysql_mutex_unlock(&LOCK_alarm); + one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL); + (*alrm)= &alarm_data->alarmed; + DBUG_RETURN(0); + +abort_no_unlock: + *alrm= 0; /* No alarm */ + DBUG_RETURN(1); +} + + +/* + Remove alarm from list of alarms +*/ + +void thr_end_alarm(thr_alarm_t *alarmed) +{ + ALARM *alarm_data; +#ifndef USE_ONE_SIGNAL_HAND + sigset_t old_mask; +#endif + DBUG_ENTER("thr_end_alarm"); + + if (my_disable_thr_alarm) + DBUG_VOID_RETURN; + one_signal_hand_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); + alarm_data= (ALARM*) ((uchar*) *alarmed - offsetof(ALARM,alarmed)); + mysql_mutex_lock(&LOCK_alarm); + DBUG_ASSERT(alarm_data->index_in_queue != 0); + DBUG_ASSERT((ALARM*) queue_element(&alarm_queue, + alarm_data->index_in_queue) == + alarm_data); + queue_remove(&alarm_queue, alarm_data->index_in_queue); + mysql_mutex_unlock(&LOCK_alarm); + one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL); + reset_index_in_queue(alarm_data); + DBUG_VOID_RETURN; +} + +/* + Come here when some alarm in queue is due. + Mark all alarms with are finnished in list. + Schedule alarms to be sent again after 1-10 sec (many alarms at once) + If alarm_aborted is set then all alarms are given and resent + every second. +*/ + +sig_handler process_alarm(int sig __attribute__((unused))) +{ + sigset_t old_mask; +/* + This must be first as we can't call DBUG inside an alarm for a normal thread +*/ + + /* + We have to do do the handling of the alarm in a sub function, + because otherwise we would get problems with two threads calling + DBUG_... functions at the same time (as two threads may call + process_alarm() at the same time + */ + +#ifndef USE_ALARM_THREAD + pthread_sigmask(SIG_SETMASK,&full_signal_set,&old_mask); + mysql_mutex_lock(&LOCK_alarm); +#endif + process_alarm_part2(sig); +#ifndef USE_ALARM_THREAD +#if defined(SIGNAL_HANDLER_RESET_ON_DELIVERY) && !defined(USE_ONE_SIGNAL_HAND) + my_sigset(THR_SERVER_ALARM,process_alarm); +#endif + mysql_mutex_unlock(&LOCK_alarm); + pthread_sigmask(SIG_SETMASK,&old_mask,NULL); +#endif + return; +} + + +static sig_handler process_alarm_part2(int sig __attribute__((unused))) +{ + ALARM *alarm_data; + DBUG_ENTER("process_alarm"); + DBUG_PRINT("info",("sig: %d active alarms: %d",sig,alarm_queue.elements)); + +#if defined(MAIN) && !defined(__bsdi__) + printf("process_alarm\n"); fflush(stdout); +#endif + if (likely(alarm_queue.elements)) + { + if (unlikely(alarm_aborted)) + { + uint i; + for (i= queue_first_element(&alarm_queue) ; + i <= queue_last_element(&alarm_queue) ;) + { + alarm_data=(ALARM*) queue_element(&alarm_queue,i); + alarm_data->alarmed=1; /* Info to thread */ + if (pthread_equal(alarm_data->thread,alarm_thread) || + pthread_kill(alarm_data->thread, thr_client_alarm)) + { +#ifdef MAIN + printf("Warning: pthread_kill couldn't find thread!!!\n"); +#endif + queue_remove(&alarm_queue,i); /* No thread. Remove alarm */ + reset_index_in_queue(alarm_data); + } + else + i++; /* Signal next thread */ + } +#ifndef USE_ALARM_THREAD + if (alarm_queue.elements) + alarm(1); /* Signal soon again */ +#endif + } + else + { + time_t now= my_time(0); + time_t next= now+10-(now%10); + while ((alarm_data=(ALARM*) queue_top(&alarm_queue))->expire_time <= now) + { + alarm_data->alarmed=1; /* Info to thread */ + DBUG_PRINT("info",("sending signal to waiting thread")); + if (pthread_equal(alarm_data->thread,alarm_thread) || + pthread_kill(alarm_data->thread, thr_client_alarm)) + { +#ifdef MAIN + printf("Warning: pthread_kill couldn't find thread!!!\n"); +#endif /* MAIN */ + queue_remove_top(&alarm_queue); /* No thread. Remove alarm */ + reset_index_in_queue(alarm_data); + if (!alarm_queue.elements) + break; + } + else + { + alarm_data->expire_time=next; + queue_replace_top(&alarm_queue); + } + } +#ifndef USE_ALARM_THREAD + if (alarm_queue.elements) + { +#ifdef __bsdi__ + alarm(0); /* Remove old alarm */ +#endif + alarm((uint) (alarm_data->expire_time-now)); + next_alarm_expire_time= alarm_data->expire_time; + } +#endif + } + } + else + { + /* + Ensure that next time we call thr_alarm(), we will schedule a new alarm + */ + next_alarm_expire_time= ~(time_t) 0; + } + DBUG_VOID_RETURN; +} + + +/* + Schedule all alarms now and optionally free all structures + + SYNPOSIS + end_thr_alarm() + free_structures Set to 1 if we should free memory used for + the alarm queue. + When we call this we should KNOW that there + is no active alarms + IMPLEMENTATION + Set alarm_abort to -1 which will change the behavior of alarms as follows: + - All old alarms will be rescheduled at once + - All new alarms will be rescheduled to one second +*/ + +void end_thr_alarm(my_bool free_structures) +{ + DBUG_ENTER("end_thr_alarm"); + if (alarm_aborted != 1) /* If memory not freed */ + { + mysql_mutex_lock(&LOCK_alarm); + DBUG_PRINT("info",("Rescheduling %d waiting alarms",alarm_queue.elements)); + alarm_aborted= -1; /* mark aborted */ + if (alarm_queue.elements || (alarm_thread_running && free_structures)) + { + if (pthread_equal(pthread_self(),alarm_thread)) + alarm(1); /* Shut down everything soon */ + else + reschedule_alarms(); + } + if (free_structures) + { + struct timespec abstime; + + DBUG_ASSERT(!alarm_queue.elements); + + /* Wait until alarm thread dies */ + set_timespec(abstime, 10); /* Wait up to 10 seconds */ + while (alarm_thread_running) + { + int error= mysql_cond_timedwait(&COND_alarm, &LOCK_alarm, &abstime); + if (error == ETIME || error == ETIMEDOUT) + break; /* Don't wait forever */ + } + delete_queue(&alarm_queue); + alarm_aborted= 1; + mysql_mutex_unlock(&LOCK_alarm); + if (!alarm_thread_running) /* Safety */ + { + mysql_mutex_destroy(&LOCK_alarm); + mysql_cond_destroy(&COND_alarm); + } + } + else + mysql_mutex_unlock(&LOCK_alarm); + } + DBUG_VOID_RETURN; +} + + +/* + Remove another thread from the alarm +*/ + +void thr_alarm_kill(my_thread_id thread_id) +{ + uint i; + DBUG_ENTER("thr_alarm_kill"); + + if (alarm_aborted) + return; + mysql_mutex_lock(&LOCK_alarm); + for (i= queue_first_element(&alarm_queue) ; + i <= queue_last_element(&alarm_queue); + i++) + { + ALARM *element= (ALARM*) queue_element(&alarm_queue,i); + if (element->thread_id == thread_id) + { + DBUG_PRINT("info", ("found thread; Killing it")); + element->expire_time= 0; + queue_replace(&alarm_queue, i); + reschedule_alarms(); + break; + } + } + mysql_mutex_unlock(&LOCK_alarm); + DBUG_VOID_RETURN; +} + + +void thr_alarm_info(ALARM_INFO *info) +{ + mysql_mutex_lock(&LOCK_alarm); + info->next_alarm_time= 0; + info->max_used_alarms= max_used_alarms; + if ((info->active_alarms= alarm_queue.elements)) + { + time_t now= my_time(0); + long time_diff; + ALARM *alarm_data= (ALARM*) queue_top(&alarm_queue); + time_diff= (long) (alarm_data->expire_time - now); + info->next_alarm_time= (ulong) (time_diff < 0 ? 0 : time_diff); + } + mysql_mutex_unlock(&LOCK_alarm); +} + +/* + This is here for thread to get interruptet from read/write/fcntl + ARGSUSED +*/ + + +static sig_handler thread_alarm(int sig __attribute__((unused))) +{ +#ifdef MAIN + printf("thread_alarm\n"); fflush(stdout); +#endif +#ifdef SIGNAL_HANDLER_RESET_ON_DELIVERY + my_sigset(sig,thread_alarm); /* int. thread system calls */ +#endif +} + + +#ifdef HAVE_TIMESPEC_TS_SEC +#define tv_sec ts_sec +#define tv_nsec ts_nsec +#endif + +/* set up a alarm thread with uses 'sleep' to sleep between alarms */ + +#ifdef USE_ALARM_THREAD +static void *alarm_handler(void *arg __attribute__((unused))) +{ + int error; + struct timespec abstime; +#ifdef MAIN + puts("Starting alarm thread"); +#endif + my_thread_init(); + alarm_thread_running= 1; + mysql_mutex_lock(&LOCK_alarm); + for (;;) + { + if (alarm_queue.elements) + { + time_t sleep_time,now= my_time(0); + if (alarm_aborted) + sleep_time=now+1; + else + sleep_time= ((ALARM*) queue_top(&alarm_queue))->expire_time; + if (sleep_time > now) + { + abstime.tv_sec=sleep_time; + abstime.tv_nsec=0; + next_alarm_expire_time= sleep_time; + if ((error= mysql_cond_timedwait(&COND_alarm, &LOCK_alarm, &abstime)) && + error != ETIME && error != ETIMEDOUT) + { +#ifdef MAIN + printf("Got error: %d from ptread_cond_timedwait (errno: %d)\n", + error,errno); +#endif + } + } + } + else if (alarm_aborted == -1) + break; + else + { + next_alarm_expire_time= ~ (time_t) 0; + if ((error= mysql_cond_wait(&COND_alarm, &LOCK_alarm))) + { +#ifdef MAIN + printf("Got error: %d from ptread_cond_wait (errno: %d)\n", + error,errno); +#endif + } + } + process_alarm(0); + } + bzero((char*) &alarm_thread,sizeof(alarm_thread)); /* For easy debugging */ + alarm_thread_running= 0; + mysql_cond_signal(&COND_alarm); + mysql_mutex_unlock(&LOCK_alarm); + pthread_exit(0); + return 0; /* Impossible */ +} +#endif /* USE_ALARM_THREAD */ +#endif + +/**************************************************************************** + Handling of test case (when compiled with -DMAIN) +***************************************************************************/ + +#ifdef MAIN +#if !defined(DONT_USE_THR_ALARM) + +static mysql_cond_t COND_thread_count; +static mysql_mutex_t LOCK_thread_count; +static uint thread_count; + +#ifdef HPUX10 +typedef int * fd_set_ptr; +#else +typedef fd_set * fd_set_ptr; +#endif /* HPUX10 */ + +static void *test_thread(void *arg) +{ + int i,param=*((int*) arg),wait_time,retry; + time_t start_time; + thr_alarm_t got_alarm; + fd_set fd; + FD_ZERO(&fd); + my_thread_init(); + printf("Thread %d (%s) started\n",param,my_thread_name()); fflush(stdout); + for (i=1 ; i <= 10 ; i++) + { + wait_time=param ? 11-i : i; + start_time= my_time(0); + if (thr_alarm(&got_alarm,wait_time,0)) + { + printf("Thread: %s Alarms aborted\n",my_thread_name()); + break; + } + if (wait_time == 3) + { + printf("Thread: %s Simulation of no alarm needed\n",my_thread_name()); + fflush(stdout); + } + else + { + for (retry=0 ; !thr_got_alarm(&got_alarm) && retry < 10 ; retry++) + { + printf("Thread: %s Waiting %d sec\n",my_thread_name(),wait_time); + select(0,(fd_set_ptr) &fd,0,0,0); + } + if (!thr_got_alarm(&got_alarm)) + { + printf("Thread: %s didn't get an alarm. Aborting!\n", + my_thread_name()); + break; + } + if (wait_time == 7) + { /* Simulate alarm-miss */ + fd_set readFDs; + uint max_connection=fileno(stdin); + FD_ZERO(&readFDs); + FD_SET(max_connection,&readFDs); + retry=0; + for (;;) + { + printf("Thread: %s Simulating alarm miss\n",my_thread_name()); + fflush(stdout); + if (select(max_connection+1, (fd_set_ptr) &readFDs,0,0,0) < 0) + { + if (errno == EINTR) + break; /* Got new interrupt */ + printf("Got errno: %d from select. Retrying..\n",errno); + if (retry++ >= 3) + { + printf("Warning: Interrupt of select() doesn't set errno!\n"); + break; + } + } + else /* This shouldn't happen */ + { + if (!FD_ISSET(max_connection,&readFDs)) + { + printf("Select interrupted, but errno not set\n"); + fflush(stdout); + if (retry++ >= 3) + break; + continue; + } + (void) getchar(); /* Somebody was playing */ + } + } + } + } + printf("Thread: %s Slept for %d (%d) sec\n",my_thread_name(), + (int) (my_time(0)-start_time), wait_time); fflush(stdout); + thr_end_alarm(&got_alarm); + fflush(stdout); + } + mysql_mutex_lock(&LOCK_thread_count); + thread_count--; + mysql_cond_signal(&COND_thread_count); /* Tell main we are ready */ + mysql_mutex_unlock(&LOCK_thread_count); + my_thread_end(); + return 0; +} + + +static void *signal_hand(void *arg __attribute__((unused))) +{ + sigset_t set; + int sig,error,err_count=0;; + + my_thread_init(); + pthread_detach_this_thread(); + init_thr_alarm(10); /* Setup alarm handler */ + mysql_mutex_lock(&LOCK_thread_count); /* Required by bsdi */ + mysql_cond_signal(&COND_thread_count); /* Tell main we are ready */ + mysql_mutex_unlock(&LOCK_thread_count); + + sigemptyset(&set); /* Catch all signals */ + sigaddset(&set,SIGINT); + sigaddset(&set,SIGQUIT); + sigaddset(&set,SIGTERM); + sigaddset(&set,SIGHUP); +#ifdef SIGTSTP + sigaddset(&set,SIGTSTP); +#endif +#ifdef USE_ONE_SIGNAL_HAND + sigaddset(&set,THR_SERVER_ALARM); /* For alarms */ + puts("Starting signal and alarm handling thread"); +#else + puts("Starting signal handling thread"); +#endif + printf("server alarm: %d thread alarm: %d\n", + THR_SERVER_ALARM, thr_client_alarm); + DBUG_PRINT("info",("Starting signal and alarm handling thread")); + for(;;) + { + int code; + while ((error=my_sigwait(&set,&sig,&code)) == EINTR) + printf("sigwait restarted\n"); + if (error) + { + fprintf(stderr,"Got error %d from sigwait\n",error); + if (err_count++ > 5) + exit(1); /* Too many errors in test */ + continue; + } +#ifdef USE_ONE_SIGNAL_HAND + if (sig != THR_SERVER_ALARM) +#endif + printf("Main thread: Got signal %d\n",sig); + switch (sig) { + case SIGINT: + case SIGQUIT: + case SIGTERM: + case SIGHUP: + printf("Aborting nicely\n"); + end_thr_alarm(0); + break; +#ifdef SIGTSTP + case SIGTSTP: + printf("Aborting\n"); + exit(1); + return 0; /* Keep some compilers happy */ +#endif +#ifdef USE_ONE_SIGNAL_HAND + case THR_SERVER_ALARM: + process_alarm(sig); + break; +#endif + } + } +} + + +int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int i, param[2], error; + sigset_t set; + ALARM_INFO alarm_info; + MY_INIT(argv[0]); + + if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '#') + { + DBUG_PUSH(argv[1]+2); + } + mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_INIT_FAST); + mysql_cond_init(0, &COND_thread_count, NULL); + + /* Start a alarm handling thread */ + sigemptyset(&set); + sigaddset(&set,SIGINT); + sigaddset(&set,SIGQUIT); + sigaddset(&set,SIGTERM); + sigaddset(&set,SIGHUP); + signal(SIGTERM,SIG_DFL); /* If it's blocked by parent */ +#ifdef SIGTSTP + sigaddset(&set,SIGTSTP); +#endif + sigaddset(&set,THR_SERVER_ALARM); + sigdelset(&set, thr_client_alarm); + (void) pthread_sigmask(SIG_SETMASK,&set,NULL); + + pthread_attr_init(&thr_attr); + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_PROCESS); + pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + pthread_attr_setstacksize(&thr_attr,65536L); + + /* Start signal thread and wait for it to start */ + mysql_mutex_lock(&LOCK_thread_count); + mysql_thread_create(0, &tid, &thr_attr, signal_hand, NULL); + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + DBUG_PRINT("info",("signal thread created")); + + thr_setconcurrency(3); + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_PROCESS); + printf("Main thread: %s\n",my_thread_name()); + for (i=0 ; i < 2 ; i++) + { + param[i]= i; + mysql_mutex_lock(&LOCK_thread_count); + if ((error= mysql_thread_create(0, + &tid, &thr_attr, test_thread, + (void*) ¶m[i]))) + { + printf("Can't create thread %d, error: %d\n",i,error); + exit(1); + } + thread_count++; + mysql_mutex_unlock(&LOCK_thread_count); + } + + pthread_attr_destroy(&thr_attr); + mysql_mutex_lock(&LOCK_thread_count); + thr_alarm_info(&alarm_info); + printf("Main_thread: Alarms: %u max_alarms: %u next_alarm_time: %lu\n", + alarm_info.active_alarms, alarm_info.max_used_alarms, + alarm_info.next_alarm_time); + while (thread_count) + { + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + if (thread_count == 1) + { + printf("Calling end_thr_alarm. This should cancel the last thread\n"); + end_thr_alarm(0); + } + } + mysql_mutex_unlock(&LOCK_thread_count); + thr_alarm_info(&alarm_info); + end_thr_alarm(1); + printf("Main_thread: Alarms: %u max_alarms: %u next_alarm_time: %lu\n", + alarm_info.active_alarms, alarm_info.max_used_alarms, + alarm_info.next_alarm_time); + printf("Test succeeded\n"); + mysql_cond_destroy(&COND_thread_count); + mysql_mutex_destroy(&LOCK_thread_count); + my_end(MY_CHECK_ERROR); + return 0; +} + +#else /* !defined(DONT_USE_ALARM_THREAD) */ + +int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) +{ + printf("thr_alarm disabled with DONT_USE_THR_ALARM\n"); + exit(1); +} + +#endif /* !defined(DONT_USE_ALARM_THREAD) */ +#endif /* WIN */ +#endif /* MAIN */ diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c new file mode 100644 index 00000000..c1ec0623 --- /dev/null +++ b/mysys/thr_lock.c @@ -0,0 +1,1837 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2012, Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* +Read and write locks for Posix threads. All tread must acquire +all locks it needs through thr_multi_lock() to avoid dead-locks. +A lock consists of a master lock (THR_LOCK), and lock instances +(THR_LOCK_DATA). +Any thread can have any number of lock instances (read and write:s) on +any lock. All lock instances must be freed. +Locks are prioritized according to: + +The current lock types are: + +TL_READ # Low priority read +TL_READ_WITH_SHARED_LOCKS +TL_READ_HIGH_PRIORITY # High priority read +TL_READ_NO_INSERT # Read without concurrent inserts +TL_WRITE_ALLOW_WRITE # Write lock that allows other writers +TL_WRITE_CONCURRENT_INSERT + # Insert that can be mixed when selects +TL_WRITE_DELAYED # Used by delayed insert + # Allows lower locks to take over +TL_WRITE_LOW_PRIORITY # Low priority write +TL_WRITE # High priority write +TL_WRITE_ONLY # High priority write + # Abort all new lock request with an error + +Locks are prioritized according to: + +WRITE_ALLOW_WRITE, WRITE_CONCURRENT_INSERT, WRITE_DELAYED, +WRITE_LOW_PRIORITY, READ, WRITE, READ_HIGH_PRIORITY and WRITE_ONLY + +Locks in the same privilege level are scheduled in first-in-first-out order. + +To allow concurrent read/writes locks, with 'WRITE_CONCURRENT_INSERT' one +should put a pointer to the following functions in the lock structure: +(If the pointer is zero (default), the function is not called) + +check_status: + Before giving a lock of type TL_WRITE_CONCURRENT_INSERT, + we check if this function exists and returns 0. + If not, then the lock is upgraded to TL_WRITE_LOCK + In MyISAM this is a simple check if the insert can be done + at the end of the datafile. +update_status: + in thr_reschedule_write_lock(), when an insert delayed thread + downgrades TL_WRITE lock to TL_WRITE_DELAYED, to allow SELECT + threads to proceed. + A storage engine should also call update_status internally + in the ::external_lock(F_UNLCK) method. + In MyISAM and CSV this functions updates the length of the datafile. + MySQL does in some exceptional cases (when doing DLL statements on + open tables calls thr_unlock() followed by thr_lock() without calling + ::external_lock() in between. In this case thr_unlock() is called with + the THR_UNLOCK_UPDATE_STATUS flag and thr_unlock() will call + update_status for write locks. +get_status: + When one gets a lock this functions is called. + In MyISAM this stores the number of rows and size of the datafile + for concurrent reads. + +The lock algorithm allows one to have one TL_WRITE_CONCURRENT_INSERT or +one TL_WRITE_DELAYED lock at the same time as multiple read locks. + +In addition, if lock->allow_multiple_concurrent_insert is set then there can +be any number of TL_WRITE_CONCURRENT_INSERT locks aktive at the same time. +*/ + +#if !defined(MAIN) && !defined(DBUG_OFF) && !defined(EXTRA_DEBUG) +#define FORCE_DBUG_OFF +#endif + +#include "mysys_priv.h" + +#include "thr_lock.h" +#include "mysql/psi/mysql_table.h" +#include +#include + +my_bool thr_lock_inited=0; +ulong locks_immediate = 0L, locks_waited = 0L; +enum thr_lock_type thr_upgraded_concurrent_insert_lock = TL_WRITE; + +/* The following constants are only for debug output */ +#define MAX_THREADS 1000 +#define MAX_LOCKS 1000 + + +LIST *thr_lock_thread_list; /* List of threads in use */ +ulong max_write_lock_count= ~(ulong) 0L; + +static void (*before_lock_wait)(void)= 0; +static void (*after_lock_wait)(void)= 0; + +void thr_set_lock_wait_callback(void (*before_wait)(void), + void (*after_wait)(void)) +{ + before_lock_wait= before_wait; + after_lock_wait= after_wait; +} + +static inline mysql_cond_t *get_cond(void) +{ + return &my_thread_var->suspend; +} + + +/* + Sort locks in priority order + + LOCK_CMP() + A First lock + B Second lock + + Return: + 0 if A >= B + 1 if A < B + + Priority for locks (decides in which order locks are locked) + We want all write locks to be first, followed by read locks. + Locks from MERGE tables has a little lower priority than other + locks, to allow one to release merge tables without having + to unlock and re-lock other locks. + The lower the number, the higher the priority for the lock. + For MERGE tables we add 2 (THR_LOCK_MERGE_PRIV) to the lock priority. + THR_LOCK_LATE_PRIV (1) is used when one locks other tables to be merged + with existing locks. This way we prioritize the original locks over the + new locks. +*/ + + +static inline int LOCK_CMP(THR_LOCK_DATA *a, THR_LOCK_DATA *b) +{ + if (a->lock != b->lock) + return a->lock < b->lock; + + if (a->type != b->type) + return a->type > b->type; + + return a->priority < b->priority; +} + + +/* + For the future (now the thread specific cond is alloced by my_pthread.c) +*/ + +my_bool init_thr_lock() +{ + thr_lock_inited=1; + return 0; +} + +static inline my_bool +thr_lock_owner_equal(THR_LOCK_INFO *rhs, THR_LOCK_INFO *lhs) +{ + return rhs == lhs; +} + + +#ifdef EXTRA_DEBUG +#define MAX_FOUND_ERRORS 10 /* Report 10 first errors */ +static uint found_errors=0; + +static int check_lock(struct st_lock_list *list, const char* lock_type, + const char *where, my_bool same_owner, my_bool no_cond, + my_bool read_lock) +{ + THR_LOCK_DATA *data,**prev; + uint count=0; + + prev= &list->data; + if (list->data) + { + enum thr_lock_type last_lock_type= list->data->type; + THR_LOCK_INFO *first_owner= list->data->owner; + + for (data=list->data; data && count++ < MAX_LOCKS ; data=data->next) + { + if (data->type == TL_UNLOCK) + { + fprintf(stderr, + "Warning: Found unlocked lock at %s: %s\n", + lock_type, where); + return 1; + } + if ((read_lock && data->type >= TL_FIRST_WRITE) || + (!read_lock && data->type < TL_FIRST_WRITE)) + { + fprintf(stderr, + "Warning: Found %s lock in %s queue at %s: %s\n", + read_lock ? "write" : "read", + read_lock ? "read" : "write", + lock_type, where); + return 1; + } + if (data->type != last_lock_type) + last_lock_type=TL_IGNORE; + if (data->prev != prev) + { + fprintf(stderr, + "Warning: prev link %d didn't point at previous lock at %s: %s\n", + count, lock_type, where); + return 1; + } + if (same_owner && + !thr_lock_owner_equal(data->owner, first_owner) && + last_lock_type != TL_WRITE_ALLOW_WRITE && + last_lock_type != TL_WRITE_CONCURRENT_INSERT) + { + fprintf(stderr, + "Warning: Found locks from different threads for lock '%s' in '%s' at '%s'. org_lock_type: %d last_lock_type: %d new_lock_type: %d\n", + data->lock->name ? data->lock->name : "", + lock_type, where, list->data->type, last_lock_type, + data->type); + return 1; + } + if (no_cond && data->cond) + { + fprintf(stderr, + "Warning: Found active lock with not reset cond %s: %s\n", + lock_type,where); + return 1; + } + prev= &data->next; + } + if (data) + { + fprintf(stderr,"Warning: found too many locks at %s: %s\n", + lock_type,where); + return 1; + } + } + if (prev != list->last) + { + fprintf(stderr,"Warning: last didn't point at last lock at %s: %s\n", + lock_type, where); + return 1; + } + return 0; +} + + +static void check_locks(THR_LOCK *lock, const char *where, + enum thr_lock_type type, + my_bool allow_no_locks) +{ + uint old_found_errors=found_errors; + DBUG_ENTER("check_locks"); + + if (found_errors < MAX_FOUND_ERRORS) + { + if (check_lock(&lock->write,"write",where,1,1,0) | + check_lock(&lock->write_wait,"write_wait",where,0,0,0) | + check_lock(&lock->read,"read",where,0,1,1) | + check_lock(&lock->read_wait,"read_wait",where,0,0,1)) + { + DBUG_ASSERT(my_assert_on_error == 0); + found_errors++; + } + + if (found_errors < MAX_FOUND_ERRORS) + { + uint count=0, count2= 0; + THR_LOCK_DATA *data; + for (data=lock->read.data ; data ; data=data->next) + { + count2++; + if (data->type == TL_READ_NO_INSERT) + count++; + /* Protect against infinite loop. */ + DBUG_ASSERT(count <= lock->read_no_write_count && + count2 <= MAX_LOCKS); + } + if (count != lock->read_no_write_count) + { + found_errors++; + fprintf(stderr, + "Warning at '%s': Locks read_no_write_count was %u when it should have been %u\n", where, lock->read_no_write_count,count); + } + + if (!lock->write.data) + { + if (!allow_no_locks && !lock->read.data && + (lock->write_wait.data || lock->read_wait.data)) + { + found_errors++; + fprintf(stderr, + "Warning at '%s': No locks in use but locks are in wait queue\n", + where); + } + if (!lock->write_wait.data) + { + if (!allow_no_locks && lock->read_wait.data) + { + found_errors++; + fprintf(stderr, + "Warning at '%s': No write locks and waiting read locks\n", + where); + } + } + else + { + if (!allow_no_locks && + (((lock->write_wait.data->type == TL_WRITE_CONCURRENT_INSERT || + lock->write_wait.data->type == TL_WRITE_ALLOW_WRITE) && + !lock->read_no_write_count) || + (lock->write_wait.data->type == TL_WRITE_DELAYED && + !lock->read.data))) + { + found_errors++; + fprintf(stderr, + "Warning at '%s': Write lock %d waiting while no exclusive read locks\n",where,(int) lock->write_wait.data->type); + DBUG_PRINT("warning", ("Warning at '%s': Write lock %d waiting while no exclusive read locks",where,(int) lock->write_wait.data->type)); + } + } + } + else + { + /* We have at least one write lock */ + if (lock->write.data->type == TL_WRITE_CONCURRENT_INSERT) + { + count= 0; + for (data=lock->write.data->next; + data && count < MAX_LOCKS; + data=data->next) + { + if (data->type != TL_WRITE_CONCURRENT_INSERT && + data->type != TL_WRITE_ALLOW_WRITE) + { + fprintf(stderr, + "Warning at '%s': Found TL_WRITE_CONCURRENT_INSERT lock mixed with other write lock: %d\n", + where, data->type); + DBUG_PRINT("warning", ("Warning at '%s': Found TL_WRITE_CONCURRENT_INSERT lock mixed with other write lock: %d", + where, data->type)); + break; + } + } + } + if (lock->write_wait.data) + { + if (!allow_no_locks && + lock->write.data->type == TL_WRITE_ALLOW_WRITE && + lock->write_wait.data->type == TL_WRITE_ALLOW_WRITE) + { + found_errors++; + fprintf(stderr, + "Warning at '%s': Found WRITE_ALLOW_WRITE lock waiting for WRITE_ALLOW_WRITE lock\n", + where); + DBUG_PRINT("warning", ("Warning at '%s': Found WRITE_ALLOW_WRITE lock waiting for WRITE_ALLOW_WRITE lock", + where)); + + } + } + if (lock->read.data) + { + for (data=lock->read.data ; data ; data=data->next) + { + if (!thr_lock_owner_equal(lock->write.data->owner, + data->owner) && + ((lock->write.data->type > TL_WRITE_DELAYED && + lock->write.data->type != TL_WRITE_ONLY) || + ((lock->write.data->type == TL_WRITE_CONCURRENT_INSERT || + lock->write.data->type == TL_WRITE_ALLOW_WRITE) && + data->type == TL_READ_NO_INSERT))) + { + found_errors++; + fprintf(stderr, + "Warning at '%s' for lock: %d: Found lock of type %d that is write and read locked. Read_no_write_count: %d\n", + where, (int) type, lock->write.data->type, + lock->read_no_write_count); + DBUG_PRINT("warning",("At '%s' for lock %d: Found lock of type %d that is write and read locked", + where, (int) type, + lock->write.data->type)); + } + } + } + if (lock->read_wait.data) + { + if (!allow_no_locks && lock->write.data->type <= TL_WRITE_DELAYED && + lock->read_wait.data->type <= TL_READ_HIGH_PRIORITY) + { + found_errors++; + fprintf(stderr, + "Warning at '%s': Found read lock of type %d waiting for write lock of type %d\n", + where, + (int) lock->read_wait.data->type, + (int) lock->write.data->type); + } + } + } + } + if (found_errors != old_found_errors) + { + DBUG_PRINT("error",("Found wrong lock")); + } + } + DBUG_VOID_RETURN; +} + +#else /* EXTRA_DEBUG */ +#define check_locks(A,B,C,D) +#endif + + + /* Initialize a lock */ + +void thr_lock_init(THR_LOCK *lock) +{ + DBUG_ENTER("thr_lock_init"); + bzero((char*) lock,sizeof(*lock)); + mysql_mutex_init(key_THR_LOCK_mutex, &lock->mutex, MY_MUTEX_INIT_FAST); + lock->read.last= &lock->read.data; + lock->read_wait.last= &lock->read_wait.data; + lock->write_wait.last= &lock->write_wait.data; + lock->write.last= &lock->write.data; + + mysql_mutex_lock(&THR_LOCK_lock); /* Add to locks in use */ + lock->list.data=(void*) lock; + thr_lock_thread_list=list_add(thr_lock_thread_list,&lock->list); + mysql_mutex_unlock(&THR_LOCK_lock); + DBUG_VOID_RETURN; +} + + +void thr_lock_delete(THR_LOCK *lock) +{ + DBUG_ENTER("thr_lock_delete"); + mysql_mutex_lock(&THR_LOCK_lock); + thr_lock_thread_list=list_delete(thr_lock_thread_list,&lock->list); + mysql_mutex_unlock(&THR_LOCK_lock); + mysql_mutex_destroy(&lock->mutex); + DBUG_VOID_RETURN; +} + + +void thr_lock_info_init(THR_LOCK_INFO *info, struct st_my_thread_var *tmp) +{ + if (!tmp) + tmp= my_thread_var; + info->thread= tmp->pthread_self; + info->thread_id= tmp->id; +} + + /* Initialize a lock instance */ + +void thr_lock_data_init(THR_LOCK *lock,THR_LOCK_DATA *data, void *param) +{ + data->lock=lock; + data->type=TL_UNLOCK; + data->owner= 0; /* no owner yet */ + data->status_param=param; + data->cond=0; + data->priority= 0; + data->debug_print_param= 0; +} + + +static inline my_bool +has_old_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner) +{ + for ( ; data ; data=data->next) + { + if (thr_lock_owner_equal(data->owner, owner)) + return 1; /* Already locked by thread */ + } + return 0; +} + +static void wake_up_waiters(THR_LOCK *lock); + + +static enum enum_thr_lock_result +wait_for_lock(struct st_lock_list *wait, THR_LOCK_DATA *data, + my_bool in_wait_list, ulong lock_wait_timeout) +{ + struct st_my_thread_var *thread_var= my_thread_var; + mysql_cond_t *cond= &thread_var->suspend; + struct timespec wait_timeout; + enum enum_thr_lock_result result= THR_LOCK_ABORTED; + PSI_stage_info old_stage; + my_bool use_wait_callbacks= FALSE; + DBUG_ENTER("wait_for_lock"); + + /* + One can use this to signal when a thread is going to wait for a lock. + See debug_sync.cc. + + Beware of waiting for a signal here. The lock has acquired its mutex. + While waiting on a signal here, the locking thread could not acquire + the mutex to release the lock. One could lock up the table + completely. + + In detail it works so: When thr_lock() tries to acquire a table + lock, it locks the lock->mutex, checks if it can have the lock, and + if not, it calls wait_for_lock(). Here it unlocks the table lock + while waiting on a condition. The sync point is located before this + wait for condition. If we have a waiting action here, we hold the + the table locks mutex all the time. Any attempt to look at the table + lock by another thread blocks it immediately on lock->mutex. This + can easily become an unexpected and unobvious blockage. So be + warned: Do not request a WAIT_FOR action for the 'wait_for_lock' + sync point unless you really know what you do. + */ + DEBUG_SYNC_C("wait_for_lock"); + + if (!in_wait_list) + { + (*wait->last)=data; /* Wait for lock */ + data->prev= wait->last; + wait->last= &data->next; + } + + statistic_increment(locks_waited, &THR_LOCK_lock); + + /* Set up control struct to allow others to abort locks */ + thread_var->current_mutex= &data->lock->mutex; + thread_var->current_cond= cond; + data->cond= cond; + + proc_info_hook(NULL, &stage_waiting_for_table_level_lock, + &old_stage, + __func__, __FILE__, __LINE__); + + /* + Since before_lock_wait potentially can create more threads to + scheduler work for, we don't want to call the before_lock_wait + callback unless it will really start to wait. + + For similar reasons, we do not want to call before_lock_wait and + after_lock_wait for each lap around the loop, so we restrict + ourselves to call it before_lock_wait once before starting to wait + and once after the thread has exited the wait loop. + */ + if ((!thread_var->abort || in_wait_list) && before_lock_wait) + { + use_wait_callbacks= TRUE; + (*before_lock_wait)(); + } + + set_timespec(wait_timeout, lock_wait_timeout); + while (!thread_var->abort || in_wait_list) + { + int rc= mysql_cond_timedwait(cond, &data->lock->mutex, &wait_timeout); + /* + We must break the wait if one of the following occurs: + - the connection has been aborted (!thread_var->abort), but + this is not a delayed insert thread (in_wait_list). For a delayed + insert thread the proper action at shutdown is, apparently, to + acquire the lock and complete the insert. + - the lock has been granted (data->cond is set to NULL by the granter), + or the waiting has been aborted (additionally data->type is set to + TL_UNLOCK). + - the wait has timed out (rc == ETIMEDOUT) + Order of checks below is important to not report about timeout + if the predicate is true. + */ + if (data->cond == 0) + { + DBUG_PRINT("thr_lock", ("lock granted/aborted")); + break; + } + if (rc == ETIMEDOUT || rc == ETIME) + { + /* purecov: begin inspected */ + DBUG_PRINT("thr_lock", ("lock timed out")); + result= THR_LOCK_WAIT_TIMEOUT; + break; + /* purecov: end */ + } + } + + /* + We call the after_lock_wait callback once the wait loop has + finished. + */ + if (after_lock_wait && use_wait_callbacks) + (*after_lock_wait)(); + + DBUG_PRINT("thr_lock", ("aborted: %d in_wait_list: %d", + thread_var->abort, in_wait_list)); + + if (data->cond || data->type == TL_UNLOCK) + { + if (data->cond) /* aborted or timed out */ + { + if (((*data->prev)=data->next)) /* remove from wait-list */ + data->next->prev= data->prev; + else + wait->last=data->prev; + data->type= TL_UNLOCK; /* No lock */ + check_locks(data->lock, "killed or timed out wait_for_lock", data->type, + 1); + wake_up_waiters(data->lock); + } + else + { + DBUG_PRINT("thr_lock", ("lock aborted")); + check_locks(data->lock, "aborted wait_for_lock", data->type, 0); + } + } + else + { + result= THR_LOCK_SUCCESS; + if (data->lock->get_status && + (*data->lock->get_status)(data->status_param, + data->type == TL_WRITE_CONCURRENT_INSERT)) + result= THR_LOCK_ABORTED; + check_locks(data->lock,"got wait_for_lock", data->type, 0); + } + mysql_mutex_unlock(&data->lock->mutex); + + /* The following must be done after unlock of lock->mutex */ + mysql_mutex_lock(&thread_var->mutex); + thread_var->current_mutex= 0; + thread_var->current_cond= 0; + mysql_mutex_unlock(&thread_var->mutex); + + proc_info_hook(NULL, &old_stage, NULL, __func__, __FILE__, __LINE__); + + DBUG_RETURN(result); +} + +static enum enum_thr_lock_result +thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner, ulong lock_wait_timeout) +{ + THR_LOCK *lock=data->lock; + enum enum_thr_lock_result result= THR_LOCK_SUCCESS; + struct st_lock_list *wait_queue; + enum thr_lock_type lock_type= data->type; + MYSQL_TABLE_WAIT_VARIABLES(locker, state) /* no ';' */ + DBUG_ENTER("thr_lock"); + + data->next=0; + data->cond=0; /* safety */ + data->owner= owner; /* Must be reset ! */ + data->priority&= ~THR_LOCK_LATE_PRIV; + + MYSQL_START_TABLE_LOCK_WAIT(locker, &state, data->m_psi, + PSI_TABLE_LOCK, lock_type); + + mysql_mutex_lock(&lock->mutex); + DBUG_PRINT("lock",("data:%p thread:%lu lock:%p type: %d", + data, (ulong) data->owner->thread_id, + lock, (int) lock_type)); + check_locks(lock,(uint) lock_type < (uint) TL_FIRST_WRITE ? + "enter read_lock" : "enter write_lock", lock_type, 0); + if ((int) lock_type < (int) TL_FIRST_WRITE) + { + /* Request for READ lock */ + if (lock->write.data) + { + /* + We can allow a read lock even if there is already a + write lock on the table if they are owned by the same + thread or if they satisfy the following lock + compatibility matrix: + + Request + /------- + H|++++ WRITE_ALLOW_WRITE + e|+++- WRITE_CONCURRENT_INSERT + l|++++ WRITE_DELAYED + d |||| + |||\= READ_NO_INSERT + ||\ = READ_HIGH_PRIORITY + |\ = READ_WITH_SHARED_LOCKS + \ = READ + + + + = Request can be satisfied. + - = Request cannot be satisfied. + + READ_NO_INSERT and WRITE_ALLOW_WRITE should in principle + be incompatible. However this will cause starvation of + LOCK TABLE READ in InnoDB under high write load. + See Bug#42147 for more information. + */ + + DBUG_PRINT("lock",("write locked 1 by thread:%lu", + (ulong) lock->write.data->owner->thread_id)); + if (thr_lock_owner_equal(data->owner, lock->write.data->owner) || + (lock->write.data->type <= TL_WRITE_DELAYED && + (((int) lock_type <= (int) TL_READ_HIGH_PRIORITY) || + (lock->write.data->type != TL_WRITE_CONCURRENT_INSERT)))) + { /* Already got a write lock */ + (*lock->read.last)=data; /* Add to running FIFO */ + data->prev=lock->read.last; + lock->read.last= &data->next; + if (lock_type == TL_READ_NO_INSERT) + lock->read_no_write_count++; + check_locks(lock,"read lock with old write lock", lock_type, 0); + if ((lock->get_status) && (*lock->get_status)(data->status_param, 0)) + result= THR_LOCK_ABORTED; + statistic_increment(locks_immediate,&THR_LOCK_lock); + goto end; + } + if (lock->write.data->type == TL_WRITE_ONLY) + { + /* We are not allowed to get a READ lock in this case */ + data->type=TL_UNLOCK; + result= THR_LOCK_ABORTED; /* Can't wait for this one */ + goto end; + } + } + else if (!lock->write_wait.data || + lock->write_wait.data->type <= TL_WRITE_LOW_PRIORITY || + lock_type == TL_READ_HIGH_PRIORITY || + has_old_lock(lock->read.data, data->owner)) /* Has old read lock */ + { /* No important write-locks */ + (*lock->read.last)=data; /* Add to running FIFO */ + data->prev=lock->read.last; + lock->read.last= &data->next; + if (lock_type == TL_READ_NO_INSERT) + lock->read_no_write_count++; + check_locks(lock,"read lock with no write locks", lock_type, 0); + if ((lock->get_status) && (*lock->get_status)(data->status_param, 0)) + result= THR_LOCK_ABORTED; + statistic_increment(locks_immediate,&THR_LOCK_lock); + goto end; + } + /* + We're here if there is an active write lock or no write + lock but a high priority write waiting in the write_wait queue. + In the latter case we should yield the lock to the writer. + */ + wait_queue= &lock->read_wait; + } + else /* Request for WRITE lock */ + { + if (lock_type == TL_WRITE_DELAYED) + { + if (lock->write.data && lock->write.data->type == TL_WRITE_ONLY) + { + data->type=TL_UNLOCK; + result= THR_LOCK_ABORTED; /* Can't wait for this one */ + goto end; + } + if (lock->write.data || lock->read.data) + { + /* Add delayed write lock to write_wait queue, and return at once */ + (*lock->write_wait.last)=data; + data->prev=lock->write_wait.last; + lock->write_wait.last= &data->next; + data->cond=get_cond(); + /* + We don't have to do get_status here as we will do it when we change + the delayed lock to a real write lock + */ + statistic_increment(locks_immediate,&THR_LOCK_lock); + goto end; + } + } + else if (lock_type == TL_WRITE_CONCURRENT_INSERT && ! lock->check_status) + data->type=lock_type= thr_upgraded_concurrent_insert_lock; + + if (lock->write.data) /* If there is a write lock */ + { + if (lock->write.data->type == TL_WRITE_ONLY) + { + /* purecov: begin tested */ + /* Allow lock owner to bypass TL_WRITE_ONLY. */ + if (!thr_lock_owner_equal(data->owner, lock->write.data->owner)) + { + /* We are not allowed to get a lock in this case */ + data->type=TL_UNLOCK; + result= THR_LOCK_ABORTED; /* Can't wait for this one */ + goto end; + } + /* purecov: end */ + } + + /* + The idea is to allow us to get a lock at once if we already have + a write lock or if there is no pending write locks and if all + write locks are of the same type and are either + TL_WRITE_ALLOW_WRITE or TL_WRITE_CONCURRENT_INSERT and + there is no TL_READ_NO_INSERT lock. + + Note that, since lock requests for the same table are sorted in + such way that requests with higher thr_lock_type value come first + (with one exception (*)), lock being requested usually (**) has + equal or "weaker" type than one which thread might have already + acquired. + *) The only exception to this rule is case when type of old lock + is TL_WRITE_LOW_PRIORITY and type of new lock is changed inside + of thr_lock() from TL_WRITE_CONCURRENT_INSERT to TL_WRITE since + engine turns out to be not supporting concurrent inserts. + Note that since TL_WRITE has the same compatibility rules as + TL_WRITE_LOW_PRIORITY (their only difference is priority), + it is OK to grant new lock without additional checks in such + situation. + **) The exceptions are situations when: + - when old lock type is TL_WRITE_DELAYED + But these should never happen within MariaDB. + Therefore it is OK to allow acquiring write lock on the table if + this thread already holds some write lock on it. + + (INSERT INTO t1 VALUES (f1()), where f1() is stored function which + tries to update t1, is an example of statement which requests two + different types of write lock on the same table). + */ + DBUG_ASSERT(! has_old_lock(lock->write.data, data->owner) || + ((lock_type <= lock->write.data->type || + (lock_type == TL_WRITE && + lock->write.data->type == TL_WRITE_LOW_PRIORITY)) && + lock->write.data->type != TL_WRITE_DELAYED)); + + if (((lock_type == TL_WRITE_ALLOW_WRITE || + (lock_type == TL_WRITE_CONCURRENT_INSERT && + lock->allow_multiple_concurrent_insert && + !lock->read_no_write_count)) && + ! lock->write_wait.data && + lock->write.data->type == lock_type && + ! lock->read_no_write_count) || + has_old_lock(lock->write.data, data->owner)) + { + DBUG_PRINT("info", ("write_wait.data: %p old_type: %d", + lock->write_wait.data, + lock->write.data->type)); + + (*lock->write.last)=data; /* Add to running fifo */ + data->prev=lock->write.last; + lock->write.last= &data->next; + check_locks(lock,"second write lock", lock_type, 0); + if ((lock->get_status) && + (*lock->get_status)(data->status_param, + lock_type == TL_WRITE_CONCURRENT_INSERT)) + result= THR_LOCK_ABORTED; + statistic_increment(locks_immediate,&THR_LOCK_lock); + goto end; + } + DBUG_PRINT("lock",("write locked 2 by thread: %lu", + (ulong) lock->write.data->owner->thread_id)); + } + else + { + DBUG_PRINT("info", ("write_wait.data:%p", + lock->write_wait.data)); + if (!lock->write_wait.data) + { /* no scheduled write locks */ + my_bool concurrent_insert= 0; + if (lock_type == TL_WRITE_CONCURRENT_INSERT) + { + concurrent_insert= 1; + if ((*lock->check_status)(data->status_param)) + { + concurrent_insert= 0; + data->type=lock_type= thr_upgraded_concurrent_insert_lock; + } + } + + if (!lock->read.data || + (lock_type <= TL_WRITE_DELAYED && + ((lock_type != TL_WRITE_CONCURRENT_INSERT && + lock_type != TL_WRITE_ALLOW_WRITE) || + !lock->read_no_write_count))) + { + (*lock->write.last)=data; /* Add as current write lock */ + data->prev=lock->write.last; + lock->write.last= &data->next; + if ((lock->get_status) && + (*lock->get_status)(data->status_param, concurrent_insert)) + result= THR_LOCK_ABORTED; + check_locks(lock,"only write lock", lock_type, 0); + statistic_increment(locks_immediate,&THR_LOCK_lock); + goto end; + } + } + DBUG_PRINT("lock",("write locked 3 by thread:%lu type: %d", + (ulong) lock->read.data->owner->thread_id, + data->type)); + } + wait_queue= &lock->write_wait; + } + result= wait_for_lock(wait_queue, data, 0, lock_wait_timeout); + MYSQL_END_TABLE_LOCK_WAIT(locker); + DBUG_RETURN(result); +end: + mysql_mutex_unlock(&lock->mutex); + MYSQL_END_TABLE_LOCK_WAIT(locker); + DBUG_RETURN(result); +} + + +static inline void free_all_read_locks(THR_LOCK *lock, + my_bool using_concurrent_insert) +{ + THR_LOCK_DATA *data=lock->read_wait.data; + + check_locks(lock,"before freeing read locks", TL_UNLOCK, 1); + + /* move all locks from read_wait list to read list */ + (*lock->read.last)=data; + data->prev=lock->read.last; + lock->read.last=lock->read_wait.last; + + /* Clear read_wait list */ + lock->read_wait.last= &lock->read_wait.data; + + do + { + mysql_cond_t *cond= data->cond; + if ((int) data->type == (int) TL_READ_NO_INSERT) + { + if (using_concurrent_insert) + { + /* + We can't free this lock; + Link lock away from read chain back into read_wait chain + */ + if (((*data->prev)=data->next)) + data->next->prev=data->prev; + else + lock->read.last=data->prev; + *lock->read_wait.last= data; + data->prev= lock->read_wait.last; + lock->read_wait.last= &data->next; + continue; + } + lock->read_no_write_count++; + } + /* purecov: begin inspected */ + DBUG_PRINT("lock",("giving read lock to thread: %lu", + (ulong)data->owner->thread_id)); + /* purecov: end */ + data->cond=0; /* Mark thread free */ + mysql_cond_signal(cond); + } while ((data=data->next)); + *lock->read_wait.last=0; + if (!lock->read_wait.data) + lock->write_lock_count=0; + check_locks(lock,"after giving read locks", TL_UNLOCK, 0); +} + + /* Unlock lock and free next thread on same lock */ + +void thr_unlock(THR_LOCK_DATA *data, uint unlock_flags) +{ + THR_LOCK *lock=data->lock; + enum thr_lock_type lock_type=data->type; + DBUG_ENTER("thr_unlock"); + DBUG_PRINT("lock",("data: %p thread: %lu lock: %p", + data, (ulong) data->owner->thread_id, + lock)); + mysql_mutex_lock(&lock->mutex); + check_locks(lock,"start of release lock", lock_type, 0); + + if (((*data->prev)=data->next)) /* remove from lock-list */ + data->next->prev= data->prev; + else if (lock_type <= TL_READ_NO_INSERT) + lock->read.last=data->prev; + else if (lock_type == TL_WRITE_DELAYED && data->cond) + { + /* + This only happens in extreme circumstances when a + write delayed lock that is waiting for a lock + */ + lock->write_wait.last=data->prev; /* Put it on wait queue */ + } + else + lock->write.last=data->prev; + + if (unlock_flags & THR_UNLOCK_UPDATE_STATUS) + { + /* External lock was not called; Update or restore status */ + if (lock_type >= TL_WRITE_CONCURRENT_INSERT) + { + if (lock->update_status) + (*lock->update_status)(data->status_param); + } + else + { + if (lock->restore_status) + (*lock->restore_status)(data->status_param); + } + } + if (lock_type == TL_READ_NO_INSERT) + lock->read_no_write_count--; + data->type=TL_UNLOCK; /* Mark unlocked */ + wake_up_waiters(lock); + mysql_mutex_unlock(&lock->mutex); + DBUG_VOID_RETURN; +} + + +/** + @brief Wake up all threads which pending requests for the lock + can be satisfied. + + @param lock Lock for which threads should be woken up + +*/ + +static void wake_up_waiters(THR_LOCK *lock) +{ + THR_LOCK_DATA *data; + enum thr_lock_type lock_type; + DBUG_ENTER("wake_up_waiters"); + + check_locks(lock, "before waking up waiters", TL_UNLOCK, 1); + if (!lock->write.data) /* If no active write locks */ + { + data=lock->write_wait.data; + if (!lock->read.data) /* If no more locks in use */ + { + /* Release write-locks with TL_WRITE or TL_WRITE_ONLY priority first */ + if (data && + (data->type != TL_WRITE_LOW_PRIORITY || !lock->read_wait.data || + lock->read_wait.data->type < TL_READ_HIGH_PRIORITY)) + { + if (lock->write_lock_count++ > max_write_lock_count) + { + /* Too many write locks in a row; Release all waiting read locks */ + lock->write_lock_count=0; + if (lock->read_wait.data) + { + DBUG_PRINT("info",("Freeing all read_locks because of max_write_lock_count")); + free_all_read_locks(lock,0); + goto end; + } + } + for (;;) + { + if (((*data->prev)=data->next)) /* remove from wait-list */ + data->next->prev= data->prev; + else + lock->write_wait.last=data->prev; + (*lock->write.last)=data; /* Put in execute list */ + data->prev=lock->write.last; + data->next=0; + lock->write.last= &data->next; + if (data->type == TL_WRITE_CONCURRENT_INSERT && + (*lock->check_status)(data->status_param)) + data->type=TL_WRITE; /* Upgrade lock */ + /* purecov: begin inspected */ + DBUG_PRINT("lock",("giving write lock of type %d to thread: %lu", + data->type, (ulong) data->owner->thread_id)); + /* purecov: end */ + { + mysql_cond_t *cond= data->cond; + data->cond=0; /* Mark thread free */ + mysql_cond_signal(cond); /* Start waiting thread */ + } + if (data->type != TL_WRITE_ALLOW_WRITE || + !lock->write_wait.data || + lock->write_wait.data->type != TL_WRITE_ALLOW_WRITE) + break; + data=lock->write_wait.data; /* Free this too */ + } + if (data->type >= TL_WRITE_LOW_PRIORITY) + goto end; + /* Release possible read locks together with the write lock */ + } + if (lock->read_wait.data) + free_all_read_locks(lock, + data && + (data->type == TL_WRITE_CONCURRENT_INSERT || + data->type == TL_WRITE_ALLOW_WRITE)); + else + { + DBUG_PRINT("lock",("No waiting read locks to free")); + } + } + else if (data && + (lock_type=data->type) <= TL_WRITE_DELAYED && + ((lock_type != TL_WRITE_CONCURRENT_INSERT && + lock_type != TL_WRITE_ALLOW_WRITE) || + !lock->read_no_write_count)) + { + /* + For DELAYED, ALLOW_READ, WRITE_ALLOW_WRITE or CONCURRENT_INSERT locks + start WRITE locks together with the READ locks + */ + if (lock_type == TL_WRITE_CONCURRENT_INSERT && + (*lock->check_status)(data->status_param)) + { + data->type=TL_WRITE; /* Upgrade lock */ + if (lock->read_wait.data) + free_all_read_locks(lock,0); + goto end; + } + do { + mysql_cond_t *cond= data->cond; + if (((*data->prev)=data->next)) /* remove from wait-list */ + data->next->prev= data->prev; + else + lock->write_wait.last=data->prev; + (*lock->write.last)=data; /* Put in execute list */ + data->prev=lock->write.last; + lock->write.last= &data->next; + data->next=0; /* Only one write lock */ + data->cond=0; /* Mark thread free */ + mysql_cond_signal(cond); /* Start waiting thread */ + } while (lock_type == TL_WRITE_ALLOW_WRITE && + (data=lock->write_wait.data) && + data->type == TL_WRITE_ALLOW_WRITE); + if (lock->read_wait.data) + free_all_read_locks(lock, + (lock_type == TL_WRITE_CONCURRENT_INSERT || + lock_type == TL_WRITE_ALLOW_WRITE)); + } + else if (!data && lock->read_wait.data) + free_all_read_locks(lock,0); + } +end: + check_locks(lock, "after waking up waiters", TL_UNLOCK, 0); + DBUG_VOID_RETURN; +} + + +/* + Get all locks in a specific order to avoid dead-locks + Sort according to lock position and put write_locks before read_locks if + lock on same lock. Locks on MERGE tables has lower priority than other + locks of the same type. See comment for lock_priority. +*/ + +static void sort_locks(THR_LOCK_DATA **data,uint count) +{ + THR_LOCK_DATA **pos,**end,**prev,*tmp; + + /* Sort locks with insertion sort (fast because almost always few locks) */ + + for (pos=data+1,end=data+count; pos < end ; pos++) + { + tmp= *pos; + if (LOCK_CMP(tmp,pos[-1])) + { + prev=pos; + do { + prev[0]=prev[-1]; + } while (--prev != data && LOCK_CMP(tmp,prev[-1])); + prev[0]=tmp; + } + } +} + + +enum enum_thr_lock_result +thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_INFO *owner, + ulong lock_wait_timeout) +{ + THR_LOCK_DATA **pos, **end, **first_lock; + DBUG_ENTER("thr_multi_lock"); + DBUG_PRINT("lock",("data: %p count: %d", data, count)); + + if (count > 1) + sort_locks(data,count); + else if (count == 0) + DBUG_RETURN(THR_LOCK_SUCCESS); + + /* lock everything */ + DEBUG_SYNC_C("thr_multi_lock_before_thr_lock"); + for (pos=data,end=data+count; pos < end ; pos++) + { + enum enum_thr_lock_result result= thr_lock(*pos, owner, lock_wait_timeout); + if (result != THR_LOCK_SUCCESS) + { /* Aborted */ + thr_multi_unlock(data,(uint) (pos-data), 0); + /* Mark all requested locks as TL_UNLOCK (to simplify lock checking) */ + for ( ; pos < end ; pos++) + (*pos)->type= TL_UNLOCK; + DBUG_RETURN(result); + } +#ifdef MAIN + printf("Thread: %s Got lock:%p type: %d\n",my_thread_name(), + pos[0]->lock, pos[0]->type); fflush(stdout); +#endif + } + DEBUG_SYNC_C("thr_multi_lock_after_thr_lock"); + + /* + Call start_trans for all locks. + If we lock the same table multiple times, we must use the same + status_param; We ensure this by calling copy_status() for all + copies of the same tables. + */ + if ((*data)->lock->start_trans) + ((*data)->lock->start_trans)((*data)->status_param); + for (first_lock=data, pos= data+1 ; pos < end ; pos++) + { + /* Get the current status (row count, checksum, trid etc) */ + if ((*pos)->lock->start_trans) + (*(*pos)->lock->start_trans)((*pos)->status_param); + /* + If same table as previous table use pointer to previous status + information to ensure that all read/write tables shares same + state. + */ + if (pos[0]->lock == pos[-1]->lock && pos[0]->lock->copy_status) + (pos[0]->lock->copy_status)((*pos)->status_param, + (*first_lock)->status_param); + else + { + /* Different lock, use this as base for next lock */ + first_lock= pos; + } + } + DBUG_RETURN(THR_LOCK_SUCCESS); +} + + +/** + Merge two sets of locks. + + @param data All locks. First old locks, then new locks. + @param old_count Original number of locks. These are first in 'data'. + @param new_count How many new locks + + The merge is needed if the new locks contains same tables as the old + locks, in which case we have to ensure that same tables shares the + same status (as after a thr_multi_lock()). +*/ + +void thr_merge_locks(THR_LOCK_DATA **data, uint old_count, uint new_count) +{ + THR_LOCK_DATA **pos, **end, **first_lock= 0; + DBUG_ENTER("thr_merge_lock"); + + /* Remove marks on old locks to make them sort before new ones */ + for (pos=data, end= pos + old_count; pos < end ; pos++) + (*pos)->priority&= ~THR_LOCK_LATE_PRIV; + + /* Mark new locks with LATE_PRIV to make them sort after org ones */ + for (pos=data + old_count, end= pos + new_count; pos < end ; pos++) + (*pos)->priority|= THR_LOCK_LATE_PRIV; + + sort_locks(data, old_count + new_count); + + for (pos=data ; pos < end ; pos++) + { + /* Check if lock was unlocked before */ + if (pos[0]->type == TL_UNLOCK || ! pos[0]->lock->fix_status) + { + DBUG_PRINT("info", ("lock skipped. unlocked: %d fix_status: %d", + pos[0]->type == TL_UNLOCK, + pos[0]->lock->fix_status == 0)); + continue; + } + + /* + If same table as previous table use pointer to previous status + information to ensure that all read/write tables shares same + state. + */ + if (first_lock && pos[0]->lock == first_lock[0]->lock) + (pos[0]->lock->fix_status)((*first_lock)->status_param, + (*pos)->status_param); + else + { + /* Different lock, use this as base for next lock */ + first_lock= pos; + (pos[0]->lock->fix_status)((*first_lock)->status_param, 0); + } + } + DBUG_VOID_RETURN; +} + + +/* Unlock all locks */ + +void thr_multi_unlock(THR_LOCK_DATA **data,uint count, uint unlock_flags) +{ + THR_LOCK_DATA **pos,**end; + DBUG_ENTER("thr_multi_unlock"); + DBUG_PRINT("lock",("data: %p count: %d flags: %u", data, count, + unlock_flags)); + + for (pos=data,end=data+count; pos < end ; pos++) + { +#ifdef MAIN + printf("Thread: %s Rel lock: %p type: %d\n", + my_thread_name(), pos[0]->lock, pos[0]->type); + fflush(stdout); +#endif + if ((*pos)->type != TL_UNLOCK) + thr_unlock(*pos, unlock_flags); + else + { + DBUG_PRINT("lock",("Free lock: data: %p thread:%lu lock: %p", + *pos, (ulong) (*pos)->owner->thread_id, + (*pos)->lock)); + } + } + DBUG_VOID_RETURN; +} + +/* + Abort all threads waiting for a lock. The lock will be upgraded to + TL_WRITE_ONLY to abort any new accesses to the lock +*/ + +void thr_abort_locks(THR_LOCK *lock, my_bool upgrade_lock) +{ + THR_LOCK_DATA *data; + DBUG_ENTER("thr_abort_locks"); + mysql_mutex_lock(&lock->mutex); + + for (data=lock->read_wait.data; data ; data=data->next) + { + data->type=TL_UNLOCK; /* Mark killed */ + /* It's safe to signal the cond first: we're still holding the mutex. */ + mysql_cond_signal(data->cond); + data->cond=0; /* Removed from list */ + } + for (data=lock->write_wait.data; data ; data=data->next) + { + data->type=TL_UNLOCK; + mysql_cond_signal(data->cond); + data->cond=0; + } + lock->read_wait.last= &lock->read_wait.data; + lock->write_wait.last= &lock->write_wait.data; + lock->read_wait.data=lock->write_wait.data=0; + if (upgrade_lock && lock->write.data) + lock->write.data->type=TL_WRITE_ONLY; + mysql_mutex_unlock(&lock->mutex); + DBUG_VOID_RETURN; +} + + +/* + Abort all locks for specific table/thread combination + + This is used to abort all locks for a specific thread +*/ + +my_bool thr_abort_locks_for_thread(THR_LOCK *lock, my_thread_id thread_id) +{ + THR_LOCK_DATA *data; + my_bool found= FALSE; + DBUG_ENTER("thr_abort_locks_for_thread"); + + mysql_mutex_lock(&lock->mutex); + for (data= lock->read_wait.data; data ; data= data->next) + { + if (data->owner->thread_id == thread_id) /* purecov: tested */ + { + DBUG_PRINT("info",("Aborting read-wait lock")); + data->type= TL_UNLOCK; /* Mark killed */ + /* It's safe to signal the cond first: we're still holding the mutex. */ + found= TRUE; + mysql_cond_signal(data->cond); + data->cond= 0; /* Removed from list */ + + if (((*data->prev)= data->next)) + data->next->prev= data->prev; + else + lock->read_wait.last= data->prev; + } + } + for (data= lock->write_wait.data; data ; data= data->next) + { + if (data->owner->thread_id == thread_id) /* purecov: tested */ + { + DBUG_PRINT("info",("Aborting write-wait lock")); + data->type= TL_UNLOCK; + found= TRUE; + mysql_cond_signal(data->cond); + data->cond= 0; + + if (((*data->prev)= data->next)) + data->next->prev= data->prev; + else + lock->write_wait.last= data->prev; + } + } + wake_up_waiters(lock); + mysql_mutex_unlock(&lock->mutex); + DBUG_RETURN(found); +} + + +/* + Downgrade a WRITE_* to a lower WRITE level + SYNOPSIS + thr_downgrade_write_lock() + in_data Lock data of thread downgrading its lock + new_lock_type New write lock type + RETURN VALUE + NONE + DESCRIPTION + This can be used to downgrade a lock already owned. When the downgrade + occurs also other waiters, both readers and writers can be allowed to + start. + The previous lock is often TL_WRITE_ONLY but can also be + TL_WRITE. The normal downgrade variants are: + TL_WRITE_ONLY => TL_WRITE after a short exclusive lock while holding a + write table lock + TL_WRITE_ONLY => TL_WRITE_ALLOW_WRITE After a short exclusive lock after + already earlier having dongraded lock to TL_WRITE_ALLOW_WRITE + The implementation is conservative and rather don't start rather than + go on unknown paths to start, the common cases are handled. + + NOTE: + In its current implementation it is only allowed to downgrade from + TL_WRITE_ONLY. In this case there are no waiters. Thus no wake up + logic is required. +*/ + +void thr_downgrade_write_lock(THR_LOCK_DATA *in_data, + enum thr_lock_type new_lock_type) +{ + THR_LOCK *lock=in_data->lock; +#ifdef DBUG_ASSERT_EXISTS + enum thr_lock_type old_lock_type= in_data->type; +#endif + DBUG_ENTER("thr_downgrade_write_only_lock"); + + mysql_mutex_lock(&lock->mutex); + DBUG_ASSERT(old_lock_type == TL_WRITE_ONLY); + DBUG_ASSERT(old_lock_type > new_lock_type); + in_data->type= new_lock_type; + check_locks(lock,"after downgrading lock", old_lock_type, 0); + + mysql_mutex_unlock(&lock->mutex); + DBUG_VOID_RETURN; +} + +/* Upgrade a WRITE_DELAY lock to a WRITE_LOCK */ + +my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data, + enum thr_lock_type new_lock_type, + ulong lock_wait_timeout) +{ + THR_LOCK *lock=data->lock; + enum enum_thr_lock_result res; + DBUG_ENTER("thr_upgrade_write_delay_lock"); + + mysql_mutex_lock(&lock->mutex); + if (data->type == TL_UNLOCK || data->type >= TL_WRITE_LOW_PRIORITY) + { + mysql_mutex_unlock(&lock->mutex); + DBUG_RETURN(data->type == TL_UNLOCK); /* Test if Aborted */ + } + check_locks(lock,"before upgrading lock", data->type, 0); + /* TODO: Upgrade to TL_WRITE_CONCURRENT_INSERT in some cases */ + data->type= new_lock_type; /* Upgrade lock */ + + /* Check if someone has given us the lock */ + if (!data->cond) + { + if (!lock->read.data) /* No read locks */ + { /* We have the lock */ + /* For this function, get_status is not allowed to fail */ + if (data->lock->get_status) + (*data->lock->get_status)(data->status_param, 0); + mysql_mutex_unlock(&lock->mutex); + if (lock->start_trans) + (*lock->start_trans)(data->status_param); + DBUG_RETURN(0); + } + + if (((*data->prev)=data->next)) /* remove from lock-list */ + data->next->prev= data->prev; + else + lock->write.last=data->prev; + + if ((data->next=lock->write_wait.data)) /* Put first in lock_list */ + data->next->prev= &data->next; + else + lock->write_wait.last= &data->next; + data->prev= &lock->write_wait.data; + lock->write_wait.data=data; + check_locks(lock,"upgrading lock", new_lock_type, 0); + } + else + { + check_locks(lock,"waiting for lock", new_lock_type, 0); + } + res= wait_for_lock(&lock->write_wait, data, 1, lock_wait_timeout); + if (res == THR_LOCK_SUCCESS && lock->start_trans) + DBUG_RETURN((*lock->start_trans)(data->status_param)); + DBUG_RETURN(0); +} + + +/* downgrade a WRITE lock to a WRITE_DELAY lock if there is pending locks */ + +my_bool thr_reschedule_write_lock(THR_LOCK_DATA *data, + ulong lock_wait_timeout) +{ + THR_LOCK *lock=data->lock; + enum thr_lock_type write_lock_type; + DBUG_ENTER("thr_reschedule_write_lock"); + + mysql_mutex_lock(&lock->mutex); + if (!lock->read_wait.data) /* No waiting read locks */ + { + mysql_mutex_unlock(&lock->mutex); + DBUG_RETURN(0); + } + + write_lock_type= data->type; + data->type=TL_WRITE_DELAYED; + if (lock->update_status) + (*lock->update_status)(data->status_param); + if (((*data->prev)=data->next)) /* remove from lock-list */ + data->next->prev= data->prev; + else + lock->write.last=data->prev; + + if ((data->next=lock->write_wait.data)) /* Put first in lock_list */ + data->next->prev= &data->next; + else + lock->write_wait.last= &data->next; + data->prev= &lock->write_wait.data; + data->cond=get_cond(); /* This was zero */ + lock->write_wait.data=data; + free_all_read_locks(lock,0); + + mysql_mutex_unlock(&lock->mutex); + DBUG_RETURN(thr_upgrade_write_delay_lock(data, write_lock_type, + lock_wait_timeout)); +} + + +#include + +static void thr_print_lock(const char* name,struct st_lock_list *list) +{ + THR_LOCK_DATA *data,**prev; + uint count=0; + + if (list->data) + { + printf("%-10s: ",name); + prev= &list->data; + for (data=list->data; data && count++ < MAX_LOCKS ; data=data->next) + { + printf("%p (%lu:%d); ", data, (ulong) data->owner->thread_id, + (int) data->type); + if (data->prev != prev) + printf("\nWarning: prev didn't point at previous lock\n"); + prev= &data->next; + } + puts(""); + if (prev != list->last) + printf("Warning: last didn't point at last lock\n"); + } +} + +void thr_print_locks(void) +{ + LIST *list; + uint count=0; + + mysql_mutex_lock(&THR_LOCK_lock); + puts("Current active THR (table level locks):"); + for (list= thr_lock_thread_list; list && count++ < MAX_THREADS; + list= list_rest(list)) + { + THR_LOCK *lock=(THR_LOCK*) list->data; + mysql_mutex_lock(&lock->mutex); + if ((lock->write.data || lock->read.data || + lock->write_wait.data || lock->read_wait.data)) + { + printf("lock: %p:", lock); + if ((lock->write_wait.data || lock->read_wait.data) && + (! lock->read.data && ! lock->write.data)) + printf(" WARNING: "); + if (lock->write.data) + printf(" write"); + if (lock->write_wait.data) + printf(" write_wait"); + if (lock->read.data) + printf(" read"); + if (lock->read_wait.data) + printf(" read_wait"); + puts(""); + thr_print_lock("write",&lock->write); + thr_print_lock("write_wait",&lock->write_wait); + thr_print_lock("read",&lock->read); + thr_print_lock("read_wait",&lock->read_wait); + puts(""); + } + mysql_mutex_unlock(&lock->mutex); + } + fflush(stdout); + mysql_mutex_unlock(&THR_LOCK_lock); +} + + +/***************************************************************************** +** Test of thread locks +****************************************************************************/ + +#ifdef MAIN + +struct st_test { + uint lock_nr; + enum thr_lock_type lock_type; +}; + +THR_LOCK locks[6]; /* Number of locks +1 */ + +struct st_test test_0[] = {{0,TL_READ}}; /* One lock */ +struct st_test test_1[] = {{0,TL_READ},{0,TL_WRITE}}; /* Read and write lock of lock 0 */ +struct st_test test_2[] = {{1,TL_WRITE},{0,TL_READ},{2,TL_READ}}; +struct st_test test_3[] = {{2,TL_WRITE},{1,TL_READ},{0,TL_READ}}; /* Deadlock with test_2 ? */ +struct st_test test_4[] = {{0,TL_WRITE},{0,TL_READ},{0,TL_WRITE},{0,TL_READ}}; +struct st_test test_5[] = {{0,TL_READ},{1,TL_READ},{2,TL_READ},{3,TL_READ}}; /* Many reads */ +struct st_test test_6[] = {{0,TL_WRITE},{1,TL_WRITE},{2,TL_WRITE},{3,TL_WRITE}}; /* Many writes */ +struct st_test test_7[] = {{3,TL_READ}}; +struct st_test test_8[] = {{1,TL_READ_NO_INSERT},{2,TL_READ_NO_INSERT},{3,TL_READ_NO_INSERT}}; /* Should be quick */ +struct st_test test_9[] = {{4,TL_READ_HIGH_PRIORITY}}; +struct st_test test_10[] ={{4,TL_WRITE}}; +struct st_test test_11[] = {{0,TL_WRITE_LOW_PRIORITY},{1,TL_WRITE_LOW_PRIORITY},{2,TL_WRITE_LOW_PRIORITY},{3,TL_WRITE_LOW_PRIORITY}}; /* Many writes */ +struct st_test test_12[] = {{0,TL_WRITE_CONCURRENT_INSERT},{1,TL_WRITE_CONCURRENT_INSERT},{2,TL_WRITE_CONCURRENT_INSERT},{3,TL_WRITE_CONCURRENT_INSERT}}; +struct st_test test_13[] = {{0,TL_WRITE_CONCURRENT_INSERT},{1,TL_READ}}; +struct st_test test_14[] = {{0,TL_WRITE_ALLOW_WRITE},{1,TL_READ}}; +struct st_test test_15[] = {{0,TL_WRITE_ALLOW_WRITE},{1,TL_WRITE_ALLOW_WRITE}}; + +struct st_test *tests[] = {test_0,test_1,test_2,test_3,test_4,test_5,test_6, + test_7,test_8,test_9,test_10,test_11,test_12, + test_13,test_14,test_15}; +int lock_counts[]= {sizeof(test_0)/sizeof(struct st_test), + sizeof(test_1)/sizeof(struct st_test), + sizeof(test_2)/sizeof(struct st_test), + sizeof(test_3)/sizeof(struct st_test), + sizeof(test_4)/sizeof(struct st_test), + sizeof(test_5)/sizeof(struct st_test), + sizeof(test_6)/sizeof(struct st_test), + sizeof(test_7)/sizeof(struct st_test), + sizeof(test_8)/sizeof(struct st_test), + sizeof(test_9)/sizeof(struct st_test), + sizeof(test_10)/sizeof(struct st_test), + sizeof(test_11)/sizeof(struct st_test), + sizeof(test_12)/sizeof(struct st_test), + sizeof(test_13)/sizeof(struct st_test), + sizeof(test_14)/sizeof(struct st_test), + sizeof(test_15)/sizeof(struct st_test) +}; + + +static mysql_cond_t COND_thread_count; +static mysql_mutex_t LOCK_thread_count; +static uint thread_count; +static ulong sum=0; + +#define MAX_LOCK_COUNT 8 +#define TEST_TIMEOUT 100000 + +/* The following functions is for WRITE_CONCURRENT_INSERT */ + +static my_bool test_get_status(void* param __attribute__((unused)), + my_bool concurrent_insert __attribute__((unused))) +{ + return 0; +} + +static void test_update_status(void* param __attribute__((unused))) +{ +} + +static void test_copy_status(void* to __attribute__((unused)) , + void *from __attribute__((unused))) +{ +} + +static my_bool test_check_status(void* param __attribute__((unused))) +{ + return 0; +} + + +static void *test_thread(void *arg) +{ + int i,j,param=*((int*) arg); + THR_LOCK_DATA data[MAX_LOCK_COUNT]; + THR_LOCK_INFO lock_info; + THR_LOCK_DATA *multi_locks[MAX_LOCK_COUNT]; + my_thread_init(); + + printf("Thread %s (%d) started\n",my_thread_name(),param); fflush(stdout); + + thr_lock_info_init(&lock_info, 0); + for (i=0; i < lock_counts[param] ; i++) + thr_lock_data_init(locks+tests[param][i].lock_nr,data+i,NULL); + for (j=1 ; j < 10 ; j++) /* try locking 10 times */ + { + for (i=0; i < lock_counts[param] ; i++) + { /* Init multi locks */ + multi_locks[i]= &data[i]; + data[i].type= tests[param][i].lock_type; + } + thr_multi_lock(multi_locks, lock_counts[param], &lock_info, TEST_TIMEOUT); + mysql_mutex_lock(&LOCK_thread_count); + { + int tmp=rand() & 7; /* Do something from 0-2 sec */ + if (tmp == 0) + sleep(1); + else if (tmp == 1) + sleep(2); + else + { + ulong k; + for (k=0 ; k < (ulong) (tmp-2)*100000L ; k++) + sum+=k; + } + } + mysql_mutex_unlock(&LOCK_thread_count); + thr_multi_unlock(multi_locks,lock_counts[param], THR_UNLOCK_UPDATE_STATUS); + } + + printf("Thread %s (%d) ended\n",my_thread_name(),param); fflush(stdout); + thr_print_locks(); + mysql_mutex_lock(&LOCK_thread_count); + thread_count--; + mysql_cond_signal(&COND_thread_count); /* Tell main we are ready */ + mysql_mutex_unlock(&LOCK_thread_count); + my_thread_end(); + return 0; +} + + +int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int param[array_elements(lock_counts)], error; + uint i; + MY_INIT(argv[0]); + if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '#') + DBUG_PUSH(argv[1]+2); + + printf("Main thread: %s\n",my_thread_name()); + + if ((error= mysql_cond_init(0, &COND_thread_count, NULL))) + { + fprintf(stderr, "Got error: %d from mysql_cond_init (errno: %d)", + error,errno); + exit(1); + } + if ((error= mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "Got error: %d from mysql_cond_init (errno: %d)", + error,errno); + exit(1); + } + + for (i=0 ; i < array_elements(locks) ; i++) + { + thr_lock_init(locks+i); + locks[i].check_status= test_check_status; + locks[i].update_status=test_update_status; + locks[i].copy_status= test_copy_status; + locks[i].get_status= test_get_status; + locks[i].allow_multiple_concurrent_insert= 1; + } + if ((error=pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)", + error,errno); + exit(1); + } + if ((error=pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)", + error,errno); + exit(1); + } +#ifndef pthread_attr_setstacksize /* void return value */ + if ((error=pthread_attr_setstacksize(&thr_attr,65536L))) + { + fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)", + error,errno); + exit(1); + } +#endif +#ifdef HAVE_THR_SETCONCURRENCY + (void) thr_setconcurrency(2); +#endif + for (i=0 ; i < array_elements(lock_counts) ; i++) + { + param[i]= i; + + if ((error= mysql_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr, "Got error: %d from mysql_mutex_lock (errno: %d)", + error, errno); + exit(1); + } + if ((error= mysql_thread_create(0, + &tid, &thr_attr, test_thread, + (void*) ¶m[i]))) + { + fprintf(stderr, "Got error: %d from mysql_thread_create (errno: %d)\n", + error, errno); + mysql_mutex_unlock(&LOCK_thread_count); + exit(1); + } + thread_count++; + mysql_mutex_unlock(&LOCK_thread_count); + } + + pthread_attr_destroy(&thr_attr); + if ((error= mysql_mutex_lock(&LOCK_thread_count))) + fprintf(stderr, "Got error: %d from mysql_mutex_lock\n", error); + while (thread_count) + { + if ((error= mysql_cond_wait(&COND_thread_count, &LOCK_thread_count))) + fprintf(stderr, "Got error: %d from mysql_cond_wait\n", error); + } + if ((error= mysql_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr, "Got error: %d from mysql_mutex_unlock\n", error); + for (i=0 ; i < array_elements(locks) ; i++) + thr_lock_delete(locks+i); +#ifdef EXTRA_DEBUG + if (found_errors) + printf("Got %d warnings\n",found_errors); + else +#endif + printf("Test succeeded\n"); + mysql_cond_destroy(&COND_thread_count); + mysql_mutex_destroy(&LOCK_thread_count); + my_end(MY_CHECK_ERROR); + return 0; +} + +#endif /* MAIN */ diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c new file mode 100644 index 00000000..aca1c1f7 --- /dev/null +++ b/mysys/thr_mutex.c @@ -0,0 +1,843 @@ +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. + Copyright (c) 2010, 2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* This makes a wrapper for mutex handling to make it easier to debug mutex */ + +#include +#if defined(TARGET_OS_LINUX) && !defined (__USE_UNIX98) +#define __USE_UNIX98 /* To get rw locks under Linux */ +#endif + +#ifdef SAFE_MUTEX +#define SAFE_MUTEX_DEFINED +#undef SAFE_MUTEX /* Avoid safe_mutex redefinitions */ +#endif + +#include "mysys_priv.h" +#include "my_static.h" +#include +#include + +#ifndef DO_NOT_REMOVE_THREAD_WRAPPERS +/* Remove wrappers */ +#undef pthread_mutex_t +#undef pthread_mutex_init +#undef pthread_mutex_lock +#undef pthread_mutex_unlock +#undef pthread_mutex_trylock +#undef pthread_mutex_destroy +#undef pthread_cond_wait +#undef pthread_cond_timedwait +#undef safe_mutex_free_deadlock_data +#endif /* DO_NOT_REMOVE_THREAD_WRAPPERS */ + +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +pthread_mutexattr_t my_fast_mutexattr; +#endif +#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP +pthread_mutexattr_t my_errorcheck_mutexattr; +#endif + +#ifdef SAFE_MUTEX_DEFINED +static pthread_mutex_t THR_LOCK_mutex; +static ulong safe_mutex_count= 0; /* Number of mutexes created */ +static ulong safe_mutex_id= 0; +my_bool safe_mutex_deadlock_detector= 1; /* On by default */ + +#ifdef SAFE_MUTEX_DETECT_DESTROY +static struct st_safe_mutex_create_info_t *safe_mutex_create_root= NULL; +#endif + +static my_bool add_used_to_locked_mutex(safe_mutex_t *used_mutex, + safe_mutex_deadlock_t *locked_mutex); +static my_bool add_to_locked_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *current_mutex); +static my_bool remove_from_locked_mutex(safe_mutex_t *mp, + safe_mutex_t *delete_mutex); +static my_bool remove_from_used_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *mutex); +static void print_deadlock_warning(safe_mutex_t *new_mutex, + safe_mutex_t *conflicting_mutex); +#endif + + +/* Initialize all mutex handling */ + +void my_mutex_init() +{ + /* Initialize mutex attributes */ +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP + /* + Set mutex type to "fast" a.k.a "adaptive" + + In this case the thread may steal the mutex from some other thread + that is waiting for the same mutex. This will save us some + context switches but may cause a thread to 'starve forever' while + waiting for the mutex (not likely if the code within the mutex is + short). + */ + pthread_mutexattr_init(&my_fast_mutexattr); + pthread_mutexattr_settype(&my_fast_mutexattr, + PTHREAD_MUTEX_ADAPTIVE_NP); +#endif +#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP + /* + Set mutex type to "errorcheck" + */ + pthread_mutexattr_init(&my_errorcheck_mutexattr); + pthread_mutexattr_settype(&my_errorcheck_mutexattr, + PTHREAD_MUTEX_ERRORCHECK); +#endif + +#if defined(SAFE_MUTEX_DEFINED) + safe_mutex_global_init(); +#endif +} + +void my_mutex_end() +{ +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP + pthread_mutexattr_destroy(&my_fast_mutexattr); +#endif +#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP + pthread_mutexattr_destroy(&my_errorcheck_mutexattr); +#endif +} + + +/* Initialize safe_mutex handling */ + +#ifdef SAFE_MUTEX_DEFINED +void safe_mutex_global_init(void) +{ + pthread_mutex_init(&THR_LOCK_mutex,MY_MUTEX_INIT_FAST); + safe_mutex_id= safe_mutex_count= 0; + safe_mutex_deadlock_detector= 1; + +#ifdef SAFE_MUTEX_DETECT_DESTROY + safe_mutex_create_root= 0; +#endif /* SAFE_MUTEX_DETECT_DESTROY */ +} + +static inline void remove_from_active_list(safe_mutex_t *mp) +{ + if (!(mp->active_flags & (MYF_NO_DEADLOCK_DETECTION | MYF_TRY_LOCK))) + { + /* Remove mutex from active mutex linked list */ + if (mp->next) + mp->next->prev= mp->prev; + if (mp->prev) + mp->prev->next= mp->next; + else + *my_thread_var_mutex_in_use()= mp->next; + } + mp->prev= mp->next= 0; +} + +/* + We initialize the hashes for deadlock detection lazily. + This greatly helps with performance when lots of mutexes are initialized but + only a few of them are actually used (eg. InnoDB). +*/ + +static int safe_mutex_lazy_init_deadlock_detection(safe_mutex_t *mp) +{ + if (!my_multi_malloc(PSI_NOT_INSTRUMENTED, MY_FAE | MY_WME, + &mp->locked_mutex, sizeof(*mp->locked_mutex), + &mp->used_mutex, sizeof(*mp->used_mutex), NullS)) + { + /* Disable deadlock handling for this mutex */ + mp->create_flags|= MYF_NO_DEADLOCK_DETECTION; + mp->active_flags|= MYF_NO_DEADLOCK_DETECTION; + return 1; /* Error */ + } + + pthread_mutex_lock(&THR_LOCK_mutex); + mp->id= ++safe_mutex_id; + pthread_mutex_unlock(&THR_LOCK_mutex); + my_hash_init2(PSI_NOT_INSTRUMENTED, mp->locked_mutex, 64, &my_charset_bin, 128, + offsetof(safe_mutex_deadlock_t, id), sizeof(mp->id), 0, 0, 0, + HASH_UNIQUE); + my_hash_init2(PSI_NOT_INSTRUMENTED, mp->used_mutex, 64, &my_charset_bin, 128, + offsetof(safe_mutex_t, id), sizeof(mp->id), 0, 0, 0, + HASH_UNIQUE); + return 0; +} + +int safe_mutex_init(safe_mutex_t *mp, + const pthread_mutexattr_t *attr __attribute__((unused)), + const char *name, const char *file, uint line) +{ + DBUG_ENTER("safe_mutex_init"); + DBUG_PRINT("enter",("mutex: 0x%lx name: %s", (ulong) mp, name)); + bzero((char*) mp,sizeof(*mp)); + pthread_mutex_init(&mp->global,MY_MUTEX_INIT_ERRCHK); + pthread_mutex_init(&mp->mutex,attr); + /* Mark that mutex is initialized */ + mp->file= file; + mp->line= line; + /* Skip the very common '&' prefix from the autogenerated name */ + mp->name= name[0] == '&' ? name + 1 : name; + + /* Deadlock detection is initialised only lazily, on first use. */ + + mp->create_flags= safe_mutex_deadlock_detector ? 0 : MYF_NO_DEADLOCK_DETECTION; + +#ifdef SAFE_MUTEX_DETECT_DESTROY + /* + Monitor the freeing of mutexes. This code depends on single thread init + and destroy + */ + if ((mp->info= (safe_mutex_info_t *) malloc(sizeof(safe_mutex_info_t)))) + { + struct st_safe_mutex_info_t *info= mp->info; + + info->init_file= file; + info->init_line= line; + info->prev= NULL; + info->next= NULL; + + pthread_mutex_lock(&THR_LOCK_mutex); + if ((info->next= safe_mutex_create_root)) + safe_mutex_create_root->prev= info; + safe_mutex_create_root= info; + safe_mutex_count++; + pthread_mutex_unlock(&THR_LOCK_mutex); + } +#else + pthread_mutex_lock(&THR_LOCK_mutex); + safe_mutex_count++; + pthread_mutex_unlock(&THR_LOCK_mutex); +#endif /* SAFE_MUTEX_DETECT_DESTROY */ + DBUG_RETURN(0); +} + + +int safe_mutex_lock(safe_mutex_t *mp, myf my_flags, const char *file, + uint line) +{ + int error; + DBUG_PRINT("mutex", ("%s (0x%lx) locking", mp->name ? mp->name : "Null", + (ulong) mp)); + DBUG_PUSH_EMPTY; + + pthread_mutex_lock(&mp->global); + if (!mp->file) + { + fprintf(stderr, + "safe_mutex: Trying to lock uninitialized mutex at %s, line %d\n", + file, line); + fflush(stderr); + abort(); + } + if (mp->count > 0) + { + /* + Check that we are not trying to lock mutex twice. This is an error + even if we are using 'try_lock' as it's not portably what happens + if you lock the mutex many times and this is in any case bad + behaviour that should not be encouraged + */ + if (pthread_equal(pthread_self(),mp->thread)) + { + fprintf(stderr, + "safe_mutex: Trying to lock mutex at %s, line %d, when the" + " mutex was already locked at %s, line %d in thread %s\n", + file,line,mp->file, mp->line, my_thread_name()); + fflush(stderr); + abort(); + } + } + pthread_mutex_unlock(&mp->global); + + /* + If we are imitating trylock(), we need to take special + precautions. + + - We cannot use pthread_mutex_lock() only since another thread can + overtake this thread and take the lock before this thread + causing pthread_mutex_trylock() to hang. In this case, we should + just return EBUSY. Hence, we use pthread_mutex_trylock() to be + able to return immediately. + + - We cannot just use trylock() and continue execution below, since + this would generate an error and abort execution if the thread + was overtaken and trylock() returned EBUSY . In this case, we + instead just return EBUSY, since this is the expected behaviour + of trylock(). + */ + if (my_flags & MYF_TRY_LOCK) + { + error= pthread_mutex_trylock(&mp->mutex); + if (error == EBUSY) + goto end; + } + else + error= pthread_mutex_lock(&mp->mutex); + + if (error || (error=pthread_mutex_lock(&mp->global))) + { + fprintf(stderr,"Got error %d when trying to lock mutex %s at %s, line %d\n", + error, mp->name, file, line); + fflush(stderr); + abort(); + } + mp->thread= pthread_self(); + if (mp->count++) + { + fprintf(stderr,"safe_mutex: Error in thread libray: Got mutex %s at %s, " + "line %d more than 1 time\n", mp->name, file,line); + fflush(stderr); + abort(); + } + mp->file= file; + mp->line= line; + mp->active_flags= mp->create_flags | my_flags; + pthread_mutex_unlock(&mp->global); + + /* Deadlock detection */ + + mp->prev= mp->next= 0; + if (!(mp->active_flags & (MYF_TRY_LOCK | MYF_NO_DEADLOCK_DETECTION)) && + (mp->used_mutex != NULL || !safe_mutex_lazy_init_deadlock_detection(mp))) + { + safe_mutex_t **mutex_in_use= my_thread_var_mutex_in_use(); + + if (!mutex_in_use) + { + /* thread has not called my_thread_init() */ + mp->active_flags|= MYF_NO_DEADLOCK_DETECTION; + } + else + { + safe_mutex_t *mutex_root; + if ((mutex_root= *mutex_in_use)) /* If not first locked */ + { + /* + Protect locked_mutex against changes if a mutex is deleted + */ + pthread_mutex_lock(&THR_LOCK_mutex); + + if (!my_hash_search(mutex_root->locked_mutex, (uchar*) &mp->id, + sizeof(mp->id))) + { + safe_mutex_deadlock_t *deadlock; + safe_mutex_t *mutex; + + /* Create object to store mutex info */ + if (!(deadlock= my_malloc(PSI_NOT_INSTRUMENTED, sizeof(*deadlock), + MYF(MY_ZEROFILL | MY_WME | MY_FAE)))) + goto abort_loop; + deadlock->name= mp->name; + deadlock->id= mp->id; + deadlock->mutex= mp; + /* The following is useful for debugging wrong mutex usage */ + deadlock->file= file; + deadlock->line= line; + + /* Check if potential deadlock */ + mutex= mutex_root; + do + { + if (my_hash_search(mp->locked_mutex, (uchar*) &mutex->id, + sizeof(mutex->id))) + { + print_deadlock_warning(mp, mutex); + /* Mark wrong usage to avoid future warnings for same error */ + deadlock->warning_only= 1; + add_to_locked_mutex(deadlock, mutex_root); + DBUG_ASSERT(deadlock->count > 0); + goto abort_loop; + } + } + while ((mutex= mutex->next)); + + /* + Copy current mutex and all mutex that has been locked + after current mutex (mp->locked_mutex) to all mutex that + was locked before previous mutex (mutex_root->used_mutex) + + For example if A->B would have been done before and we + are now locking (C) in B->C, then we would add C into + B->locked_mutex and A->locked_mutex + */ + my_hash_iterate(mutex_root->used_mutex, + (my_hash_walk_action) add_used_to_locked_mutex, + deadlock); + + /* + Copy all current mutex and all mutex locked after current one + into the prev mutex + */ + add_used_to_locked_mutex(mutex_root, deadlock); + DBUG_ASSERT(deadlock->count > 0); + } + abort_loop: + pthread_mutex_unlock(&THR_LOCK_mutex); + } + /* Link mutex into mutex_in_use list */ + if ((mp->next= *mutex_in_use)) + (*mutex_in_use)->prev= mp; + *mutex_in_use= mp; + } + } + +end: + DBUG_POP_EMPTY; + if (!error) + DBUG_PRINT("mutex", ("%s (0x%lx) locked", mp->name, (ulong) mp)); + return error; +} + + +int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) +{ + int error; + DBUG_PRINT("mutex", ("%s (0x%lx) unlocking", mp->name, (ulong) mp)); + pthread_mutex_lock(&mp->global); + if (mp->count == 0) + { + fprintf(stderr, + "safe_mutex: Trying to unlock mutex %s that wasn't locked at " + "%s, line %d\n" + "Last used at %s, line: %d\n", + mp->name ? mp->name : "Null", file, line, + mp->file ? mp->file : "Null", mp->line); + fflush(stderr); + abort(); + } + if (!pthread_equal(pthread_self(),mp->thread)) + { + fprintf(stderr, + "safe_mutex: Trying to unlock mutex %s at %s, line %d that was " + "locked by " + "another thread at: %s, line: %d\n", + mp->name, file, line, mp->file, mp->line); + fflush(stderr); + abort(); + } + mp->thread= 0; + mp->count--; + + remove_from_active_list(mp); + +#ifdef _WIN32 + pthread_mutex_unlock(&mp->mutex); + error=0; +#else + error=pthread_mutex_unlock(&mp->mutex); + if (error) + { + fprintf(stderr, + "safe_mutex: Got error: %d (%d) when trying to unlock mutex " + "%s at %s, line %d\n", error, errno, mp->name, file, line); + fflush(stderr); + abort(); + } +#endif /* _WIN32 */ + pthread_mutex_unlock(&mp->global); + return error; +} + + +int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file, + uint line) +{ + int error; + safe_mutex_t save_state; + + pthread_mutex_lock(&mp->global); + if (mp->count == 0) + { + fprintf(stderr, + "safe_mutex: Trying to cond_wait on a unlocked mutex %s at %s, " + "line %d\n", + mp->name ? mp->name : "Null", file, line); + fflush(stderr); + abort(); + } + if (!pthread_equal(pthread_self(),mp->thread)) + { + fprintf(stderr, + "safe_mutex: Trying to cond_wait on a mutex %s at %s, line %d " + "that was locked by another thread at: %s, line: %d\n", + mp->name, file, line, mp->file, mp->line); + fflush(stderr); + abort(); + } + + if (mp->count-- != 1) + { + fprintf(stderr, + "safe_mutex: Count was %d on locked mutex %s at %s, line %d\n", + mp->count+1, mp->name, file, line); + fflush(stderr); + abort(); + } + save_state= *mp; + remove_from_active_list(mp); + pthread_mutex_unlock(&mp->global); + error=pthread_cond_wait(cond,&mp->mutex); + pthread_mutex_lock(&mp->global); + + if (error) + { + fprintf(stderr, + "safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait on " + "%s at %s, line %d\n", error, errno, mp->name, file, line); + fflush(stderr); + abort(); + } + /* Restore state as it was before */ + mp->thread= save_state.thread; + mp->active_flags= save_state.active_flags; + mp->next= save_state.next; + mp->prev= save_state.prev; + + if (mp->count++) + { + fprintf(stderr, + "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s " + "at %s, line %d\n", + mp->count-1, (ulong) my_thread_dbug_id(), mp->name, file, line); + fflush(stderr); + abort(); + } + mp->file= file; + mp->line=line; + pthread_mutex_unlock(&mp->global); + return error; +} + + +int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp, + const struct timespec *abstime, + const char *file, uint line) +{ + int error; + safe_mutex_t save_state; + + pthread_mutex_lock(&mp->global); + if (mp->count != 1 || !pthread_equal(pthread_self(),mp->thread)) + { + fprintf(stderr, + "safe_mutex: Trying to cond_wait at %s, line %d on a not hold " + "mutex %s\n", + file, line, mp->name ? mp->name : "Null"); + fflush(stderr); + abort(); + } + mp->count--; /* Mutex will be released */ + save_state= *mp; + remove_from_active_list(mp); + pthread_mutex_unlock(&mp->global); + error=pthread_cond_timedwait(cond,&mp->mutex,abstime); +#ifdef EXTRA_DEBUG + if (error && (error != EINTR && error != ETIMEDOUT && error != ETIME)) + { + fprintf(stderr, + "safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait " + "on %s at %s, line %d\n", + error, errno, mp->name, file, line); + } +#endif /* EXTRA_DEBUG */ + pthread_mutex_lock(&mp->global); + /* Restore state as it was before */ + mp->thread= save_state.thread; + mp->active_flags= save_state.active_flags; + mp->next= save_state.next; + mp->prev= save_state.prev; + + if (mp->count++) + { + fprintf(stderr, + "safe_mutex: Count was %d in thread 0x%lx when locking mutex " + "%s at %s, line %d (error: %d (%d))\n", + mp->count-1, (ulong) my_thread_dbug_id(), mp->name, file, line, + error, error); + fflush(stderr); + abort(); + } + mp->file= file; + mp->line=line; + pthread_mutex_unlock(&mp->global); + return error; +} + + +int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line) +{ + int error=0; + DBUG_ENTER("safe_mutex_destroy"); + DBUG_PRINT("enter", ("mutex: 0x%lx name: %s", (ulong) mp, mp->name)); + if (!mp->file) + { + fprintf(stderr, + "safe_mutex: Trying to destroy uninitialized mutex at %s, line %d\n", + file, line); + fflush(stderr); + abort(); + } + if (mp->count != 0) + { + fprintf(stderr, + "safe_mutex: Trying to destroy a mutex %s that was locked at %s, " + "line %d at %s, line %d\n", + mp->name, mp->file, mp->line, file, line); + fflush(stderr); + abort(); + } + + /* Free all entries that points to this one */ + safe_mutex_free_deadlock_data(mp); + +#ifdef _WIN32 + pthread_mutex_destroy(&mp->global); + pthread_mutex_destroy(&mp->mutex); +#else + if (pthread_mutex_destroy(&mp->global)) + error=1; + if (pthread_mutex_destroy(&mp->mutex)) + error=1; +#endif /* _WIN32 */ + mp->file= 0; /* Mark destroyed */ + +#ifdef SAFE_MUTEX_DETECT_DESTROY + if (mp->info) + { + struct st_safe_mutex_info_t *info= mp->info; + pthread_mutex_lock(&THR_LOCK_mutex); + + if (info->prev) + info->prev->next = info->next; + else + safe_mutex_create_root = info->next; + if (info->next) + info->next->prev = info->prev; + safe_mutex_count--; + + pthread_mutex_unlock(&THR_LOCK_mutex); + free(info); + mp->info= NULL; /* Get crash if double free */ + } +#else + pthread_mutex_lock(&THR_LOCK_mutex); + safe_mutex_count--; + pthread_mutex_unlock(&THR_LOCK_mutex); +#endif /* SAFE_MUTEX_DETECT_DESTROY */ + DBUG_RETURN(error); +} + + +/** + Free all data related to deadlock detection + + This is also useful together with safemalloc when you don't want to + have reports of not freed memory for mysys mutexes. +*/ + +void safe_mutex_free_deadlock_data(safe_mutex_t *mp) +{ + /* Free all entries that points to this one */ + if (!(mp->create_flags & MYF_NO_DEADLOCK_DETECTION) && mp->used_mutex != NULL) + { + pthread_mutex_lock(&THR_LOCK_mutex); + my_hash_iterate(mp->used_mutex, + (my_hash_walk_action) remove_from_locked_mutex, + mp); + my_hash_iterate(mp->locked_mutex, + (my_hash_walk_action) remove_from_used_mutex, + mp); + pthread_mutex_unlock(&THR_LOCK_mutex); + + my_hash_free(mp->used_mutex); + my_hash_free(mp->locked_mutex); + my_free(mp->locked_mutex); + mp->create_flags|= MYF_NO_DEADLOCK_DETECTION; + } +} + +/* + Free global resources and check that all mutex has been destroyed + + SYNOPSIS + safe_mutex_end() + file Print errors on this file + + NOTES + We can't use DBUG_PRINT() here as we have in my_end() disabled + DBUG handling before calling this function. + + In MySQL one may get one warning for a mutex created in my_thr_init.c + This is ok, as this thread may not yet have been exited. +*/ + +void safe_mutex_end(FILE *file __attribute__((unused))) +{ + if (!safe_mutex_count) /* safetly */ + pthread_mutex_destroy(&THR_LOCK_mutex); +#ifdef SAFE_MUTEX_DETECT_DESTROY + if (!file) + return; + + if (safe_mutex_count) + { + fprintf(file, "Warning: Not destroyed mutex: %lu\n", safe_mutex_count); + (void) fflush(file); + } + { + struct st_safe_mutex_info_t *ptr; + for (ptr= safe_mutex_create_root ; ptr ; ptr= ptr->next) + { + fprintf(file, "\tMutex %s initiated at line %4u in '%s'\n", + ptr->name, ptr->init_line, ptr->init_file); + (void) fflush(file); + } + } +#endif /* SAFE_MUTEX_DETECT_DESTROY */ +} + +static my_bool add_used_to_locked_mutex(safe_mutex_t *used_mutex, + safe_mutex_deadlock_t *locked_mutex) +{ + /* Add mutex to all parent of the current mutex */ + if (!locked_mutex->warning_only) + { + (void) my_hash_iterate(locked_mutex->mutex->locked_mutex, + (my_hash_walk_action) add_to_locked_mutex, + used_mutex); + /* mark that locked_mutex is locked after used_mutex */ + (void) add_to_locked_mutex(locked_mutex, used_mutex); + } + return 0; +} + + +/** + register that locked_mutex was locked after current_mutex +*/ + +static my_bool add_to_locked_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *current_mutex) +{ + DBUG_ENTER("add_to_locked_mutex"); + DBUG_PRINT("info", ("inserting 0x%lx into 0x%lx (id: %lu -> %lu)", + (ulong) locked_mutex, (long) current_mutex, + locked_mutex->id, current_mutex->id)); + if (my_hash_insert(current_mutex->locked_mutex, (uchar*) locked_mutex)) + { + /* Got mutex through two paths; ignore */ + DBUG_RETURN(0); + } + locked_mutex->count++; + if (my_hash_insert(locked_mutex->mutex->used_mutex, + (uchar*) current_mutex)) + { + DBUG_ASSERT(0); + } + DBUG_RETURN(0); +} + + +/** + Remove mutex from the locked mutex hash + @fn remove_from_used_mutex() + @param mp Mutex that has delete_mutex in it's locked_mutex hash + @param delete_mutex Mutex should be removed from the hash + + @notes + safe_mutex_deadlock_t entries in the locked hash are shared. + When counter goes to 0, we delete the safe_mutex_deadlock_t entry. +*/ + +static my_bool remove_from_locked_mutex(safe_mutex_t *mp, + safe_mutex_t *delete_mutex) +{ + safe_mutex_deadlock_t *found; + DBUG_ENTER("remove_from_locked_mutex"); + DBUG_PRINT("enter", ("delete_mutex: 0x%lx mutex: 0x%lx (id: %lu <- %lu)", + (ulong) delete_mutex, (ulong) mp, + delete_mutex->id, mp->id)); + + found= (safe_mutex_deadlock_t *) my_hash_search(mp->locked_mutex, + (uchar*) &delete_mutex->id, + sizeof(delete_mutex->id)); + DBUG_ASSERT(found); + if (found) + { + if (my_hash_delete(mp->locked_mutex, (uchar*) found)) + { + DBUG_ASSERT(0); + } + if (!--found->count) + my_free(found); + } + DBUG_RETURN(0); +} + +static my_bool remove_from_used_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *mutex) +{ + DBUG_ENTER("remove_from_used_mutex"); + DBUG_PRINT("enter", ("delete_mutex: 0x%lx mutex: 0x%lx (id: %lu <- %lu)", + (ulong) mutex, (ulong) locked_mutex, + mutex->id, locked_mutex->id)); + if (my_hash_delete(locked_mutex->mutex->used_mutex, (uchar*) mutex)) + { + DBUG_ASSERT(0); + } + if (!--locked_mutex->count) + my_free(locked_mutex); + DBUG_RETURN(0); +} + + +static void print_deadlock_warning(safe_mutex_t *new_mutex, + safe_mutex_t *parent_mutex) +{ + safe_mutex_t *mutex_root; + DBUG_ENTER("print_deadlock_warning"); + DBUG_PRINT("enter", ("mutex: %s parent: %s", + new_mutex->name, parent_mutex->name)); + + fprintf(stderr, "safe_mutex: Found wrong usage of mutex " + "'%s' and '%s'\n", + parent_mutex->name, new_mutex->name); + DBUG_PRINT("info", ("safe_mutex: Found wrong usage of mutex " + "'%s' and '%s'", + parent_mutex->name, new_mutex->name)); + fprintf(stderr, "Mutex currently locked (in reverse order):\n"); + DBUG_PRINT("info", ("Mutex currently locked (in reverse order):")); + fprintf(stderr, "%-32.32s %s line %u\n", new_mutex->name, new_mutex->file, + new_mutex->line); + DBUG_PRINT("info", ("%-32.32s %s line %u\n", new_mutex->name, + new_mutex->file, new_mutex->line)); + for (mutex_root= *my_thread_var_mutex_in_use() ; + mutex_root; + mutex_root= mutex_root->next) + { + fprintf(stderr, "%-32.32s %s line %u\n", mutex_root->name, + mutex_root->file, mutex_root->line); + DBUG_PRINT("info", ("%-32.32s %s line %u", mutex_root->name, + mutex_root->file, mutex_root->line)); + } + fflush(stderr); + DBUG_ASSERT(my_assert_on_error == 0); + DBUG_VOID_RETURN; +} + +#endif diff --git a/mysys/thr_rwlock.c b/mysys/thr_rwlock.c new file mode 100644 index 00000000..a8711d51 --- /dev/null +++ b/mysys/thr_rwlock.c @@ -0,0 +1,377 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Synchronization - readers / writer thread locks */ + +#include "mysys_priv.h" +#if defined(NEED_MY_RW_LOCK) +#include + +#ifdef _WIN32 + +int my_rw_init(my_rw_lock_t *rwp) +{ + InitializeSRWLock(&rwp->srwlock); + rwp->have_exclusive_srwlock = FALSE; + return 0; +} + + +int my_rw_rdlock(my_rw_lock_t *rwp) +{ + AcquireSRWLockShared(&rwp->srwlock); + return 0; +} + + +int my_rw_tryrdlock(my_rw_lock_t *rwp) +{ + if (!TryAcquireSRWLockShared(&rwp->srwlock)) + return EBUSY; + return 0; +} + + +int my_rw_wrlock(my_rw_lock_t *rwp) +{ + AcquireSRWLockExclusive(&rwp->srwlock); + rwp->have_exclusive_srwlock= TRUE; + return 0; +} + +int my_rw_trywrlock(my_rw_lock_t *rwp) +{ + if (!TryAcquireSRWLockExclusive(&rwp->srwlock)) + return EBUSY; + rwp->have_exclusive_srwlock= TRUE; + return 0; +} + + +int my_rw_unlock(my_rw_lock_t *rwp) +{ + if (rwp->have_exclusive_srwlock) + { + rwp->have_exclusive_srwlock= FALSE; + ReleaseSRWLockExclusive(&rwp->srwlock); + } + else + { + ReleaseSRWLockShared(&rwp->srwlock); + } + return 0; +} + +int my_rw_destroy(my_rw_lock_t* rwp) +{ + DBUG_ASSERT(!rwp->have_exclusive_srwlock); + return 0; +} + +#else + +/* + Source base from Sun Microsystems SPILT, simplified for MySQL use + -- Joshua Chamas + Some cleanup and additional code by Monty +*/ + +/* +* Multithreaded Demo Source +* +* Copyright (C) 1995 by Sun Microsystems, Inc. +* +* +* This file is a product of SunSoft, Inc. and is provided for +* unrestricted use provided that this legend is included on all +* media and as a part of the software program in whole or part. +* Users may copy, modify or distribute this file at will. +* +* THIS FILE IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING +* THE WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR +* PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE. +* +* This file is provided with no support and without any obligation on the +* part of SunSoft, Inc. to assist in its use, correction, modification or +* enhancement. +* +* SUNSOFT AND SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT +* TO THE INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY THIS +* FILE OR ANY PART THEREOF. +* +* IN NO EVENT WILL SUNSOFT OR SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY +* LOST REVENUE OR PROFITS OR OTHER SPECIAL, INDIRECT AND CONSEQUENTIAL +* DAMAGES, EVEN IF THEY HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH +* DAMAGES. +* +* SunSoft, Inc. +* 2550 Garcia Avenue +* Mountain View, California 94043 +*/ + +int my_rw_init(my_rw_lock_t *rwp) +{ + pthread_condattr_t cond_attr; + + pthread_mutex_init( &rwp->lock, MY_MUTEX_INIT_FAST); + pthread_condattr_init( &cond_attr ); + pthread_cond_init( &rwp->readers, &cond_attr ); + pthread_cond_init( &rwp->writers, &cond_attr ); + pthread_condattr_destroy(&cond_attr); + + rwp->state = 0; + rwp->waiters = 0; +#ifdef SAFE_MUTEX + rwp->write_thread = 0; +#endif + + return(0); +} + + +int my_rw_destroy(my_rw_lock_t *rwp) +{ + DBUG_ASSERT(rwp->state == 0); + pthread_mutex_destroy( &rwp->lock ); + pthread_cond_destroy( &rwp->readers ); + pthread_cond_destroy( &rwp->writers ); + return(0); +} + + +int my_rw_rdlock(my_rw_lock_t *rwp) +{ + pthread_mutex_lock(&rwp->lock); + + /* active or queued writers */ + while (( rwp->state < 0 ) || rwp->waiters) + pthread_cond_wait( &rwp->readers, &rwp->lock); + + rwp->state++; + pthread_mutex_unlock(&rwp->lock); + return(0); +} + +int my_rw_tryrdlock(my_rw_lock_t *rwp) +{ + int res; + pthread_mutex_lock(&rwp->lock); + if ((rwp->state < 0 ) || rwp->waiters) + res= EBUSY; /* Can't get lock */ + else + { + res=0; + rwp->state++; + } + pthread_mutex_unlock(&rwp->lock); + return(res); +} + + +int my_rw_wrlock(my_rw_lock_t *rwp) +{ + pthread_mutex_lock(&rwp->lock); + rwp->waiters++; /* another writer queued */ + + my_rw_lock_assert_not_write_owner(rwp); + + while (rwp->state) + pthread_cond_wait(&rwp->writers, &rwp->lock); + rwp->state = -1; + rwp->waiters--; +#ifdef SAFE_MUTEX + rwp->write_thread= pthread_self(); +#endif + pthread_mutex_unlock(&rwp->lock); + return(0); +} + + +int my_rw_trywrlock(my_rw_lock_t *rwp) +{ + int res; + + pthread_mutex_lock(&rwp->lock); + if (rwp->state) + res= EBUSY; /* Can't get lock */ + else + { + res=0; + rwp->state = -1; +#ifdef SAFE_MUTEX + rwp->write_thread= pthread_self(); +#endif + } + pthread_mutex_unlock(&rwp->lock); + return(res); +} + + +int my_rw_unlock(my_rw_lock_t *rwp) +{ + DBUG_PRINT("rw_unlock", + ("state: %d waiters: %d", rwp->state, rwp->waiters)); + pthread_mutex_lock(&rwp->lock); + + DBUG_ASSERT(rwp->state != 0); + + if (rwp->state == -1) /* writer releasing */ + { + my_rw_lock_assert_write_owner(rwp); + rwp->state= 0; /* mark as available */ +#ifdef SAFE_MUTEX + rwp->write_thread= 0; +#endif + + if ( rwp->waiters ) /* writers queued */ + pthread_cond_signal( &rwp->writers ); + else + pthread_cond_broadcast( &rwp->readers ); + } + else + { + if ( --rwp->state == 0 && /* no more readers */ + rwp->waiters) + pthread_cond_signal( &rwp->writers ); + } + + pthread_mutex_unlock( &rwp->lock ); + return(0); +} + +#endif /* !defined _WIN32 */ +#endif /* NEED_MY_RW_LOCK*/ + + +int rw_pr_init(rw_pr_lock_t *rwlock) +{ + pthread_mutex_init(&rwlock->lock, NULL); + pthread_cond_init(&rwlock->no_active_readers, NULL); + rwlock->active_readers= 0; + rwlock->writers_waiting_readers= 0; + rwlock->active_writer= FALSE; +#ifdef SAFE_MUTEX + rwlock->writer_thread= 0; +#endif + return 0; +} + + +int rw_pr_destroy(rw_pr_lock_t *rwlock) +{ + pthread_cond_destroy(&rwlock->no_active_readers); + pthread_mutex_destroy(&rwlock->lock); + return 0; +} + + +int rw_pr_rdlock(rw_pr_lock_t *rwlock) +{ + pthread_mutex_lock(&rwlock->lock); + /* + The fact that we were able to acquire 'lock' mutex means + that there are no active writers and we can acquire rd-lock. + Increment active readers counter to prevent requests for + wr-lock from succeeding and unlock mutex. + */ + rwlock->active_readers++; + pthread_mutex_unlock(&rwlock->lock); + return 0; +} + + +int rw_pr_wrlock(rw_pr_lock_t *rwlock) +{ + pthread_mutex_lock(&rwlock->lock); + + if (rwlock->active_readers != 0) + { + /* There are active readers. We have to wait until they are gone. */ + rwlock->writers_waiting_readers++; + + while (rwlock->active_readers != 0) + pthread_cond_wait(&rwlock->no_active_readers, &rwlock->lock); + + rwlock->writers_waiting_readers--; + } + + /* + We own 'lock' mutex so there is no active writers. + Also there are no active readers. + This means that we can grant wr-lock. + Not releasing 'lock' mutex until unlock will block + both requests for rd and wr-locks. + Set 'active_writer' flag to simplify unlock. + + Thanks to the fact wr-lock/unlock in the absence of + contention from readers is essentially mutex lock/unlock + with a few simple checks make this rwlock implementation + wr-lock optimized. + */ + rwlock->active_writer= TRUE; +#ifdef SAFE_MUTEX + rwlock->writer_thread= pthread_self(); +#endif + return 0; +} + + +int rw_pr_unlock(rw_pr_lock_t *rwlock) +{ + if (rwlock->active_writer) + { + /* We are unlocking wr-lock. */ +#ifdef SAFE_MUTEX + rwlock->writer_thread= 0; +#endif + rwlock->active_writer= FALSE; + if (rwlock->writers_waiting_readers) + { + /* + Avoid expensive cond signal in case when there is no contention + or it is wr-only. + + Note that from view point of performance it would be better to + signal on the condition variable after unlocking mutex (as it + reduces number of contex switches). + + Unfortunately this would mean that such rwlock can't be safely + used by MDL subsystem, which relies on the fact that it is OK + to destroy rwlock once it is in unlocked state. + */ + pthread_cond_signal(&rwlock->no_active_readers); + } + pthread_mutex_unlock(&rwlock->lock); + } + else + { + /* We are unlocking rd-lock. */ + pthread_mutex_lock(&rwlock->lock); + rwlock->active_readers--; + if (rwlock->active_readers == 0 && + rwlock->writers_waiting_readers) + { + /* + If we are last reader and there are waiting + writers wake them up. + */ + pthread_cond_signal(&rwlock->no_active_readers); + } + pthread_mutex_unlock(&rwlock->lock); + } + return 0; +} + + diff --git a/mysys/thr_timer.c b/mysys/thr_timer.c new file mode 100644 index 00000000..f87c1f75 --- /dev/null +++ b/mysys/thr_timer.c @@ -0,0 +1,599 @@ +/* + Copyright (c) 2012 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 or later of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Implementation if OS independent timers. + This is done based on pthread primitives, especially pthread_cond_timedwait() +*/ + +#include "mysys_priv.h" +#include "thr_timer.h" +#include +#include +#ifdef HAVE_TIMER_CREATE +#include +#endif + +struct timespec next_timer_expire_time; + +static my_bool thr_timer_inited= 0; +static mysql_mutex_t LOCK_timer; +static mysql_cond_t COND_timer; +static QUEUE timer_queue; +pthread_t timer_thread; + +#define set_max_time(abs_time) \ + { (abs_time)->MY_tv_sec= INT_MAX32; (abs_time)->MY_tv_nsec= 0; } + + +static void *timer_handler(void *arg __attribute__((unused))); + +/* + Compare two timespecs +*/ + +static int compare_timespec(void *not_used __attribute__((unused)), + uchar *a_ptr, uchar *b_ptr) +{ + return cmp_timespec((*(struct timespec*) a_ptr), + (*(struct timespec*) b_ptr)); +} + + +/** + Initialize timer variables and create timer thread + + @param alloc_timers Init allocation of timers. Will be autoextended + if needed + @return 0 ok + @return 1 error; Can't create thread +*/ + +static thr_timer_t max_timer_data; + +my_bool init_thr_timer(uint alloc_timers) +{ + pthread_attr_t thr_attr; + my_bool res= 0; + DBUG_ENTER("init_thr_timer"); + + init_queue(&timer_queue, alloc_timers+2, offsetof(thr_timer_t,expire_time), + 0, compare_timespec, NullS, + offsetof(thr_timer_t, index_in_queue)+1, 1); + mysql_mutex_init(key_LOCK_timer, &LOCK_timer, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_timer, &COND_timer, NULL); + + /* Set dummy element with max time into the queue to simplify usage */ + bzero(&max_timer_data, sizeof(max_timer_data)); + set_max_time(&max_timer_data.expire_time); + queue_insert(&timer_queue, (uchar*) &max_timer_data); + next_timer_expire_time= max_timer_data.expire_time; + + /* Create a thread to handle timers */ + pthread_attr_init(&thr_attr); + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_PROCESS); + pthread_attr_setstacksize(&thr_attr,64*1024); + thr_timer_inited= 1; + if (mysql_thread_create(key_thread_timer, &timer_thread, &thr_attr, + timer_handler, NULL)) + { + thr_timer_inited= 0; + res= 1; + mysql_mutex_destroy(&LOCK_timer); + mysql_cond_destroy(&COND_timer); + delete_queue(&timer_queue); + } + pthread_attr_destroy(&thr_attr); + + DBUG_RETURN(res); +} + + +void end_thr_timer(void) +{ + DBUG_ENTER("end_thr_timer"); + + if (!thr_timer_inited) + DBUG_VOID_RETURN; + + mysql_mutex_lock(&LOCK_timer); + thr_timer_inited= 0; /* Signal abort */ + mysql_cond_signal(&COND_timer); + mysql_mutex_unlock(&LOCK_timer); + pthread_join(timer_thread, NULL); + + mysql_mutex_destroy(&LOCK_timer); + mysql_cond_destroy(&COND_timer); + delete_queue(&timer_queue); + DBUG_VOID_RETURN; +} + + +/* + Initialize a timer object + + @param timer_data Timer structure + @param function Function to be called when getting timeout + @param argument Argument for function +*/ + +void thr_timer_init(thr_timer_t *timer_data, void(*function)(void*), + void *arg) +{ + DBUG_ENTER("thr_timer_init"); + bzero(timer_data, sizeof(*timer_data)); + timer_data->func= function; + timer_data->func_arg= arg; + timer_data->expired= 1; /* Not active */ + DBUG_VOID_RETURN; +} + +/* + Make timer periodic + + @param timer_data Timer structure + @param micro_seconds Period +*/ +void thr_timer_set_period(thr_timer_t* timer_data, ulonglong micro_seconds) +{ + DBUG_ENTER("thr_timer_set_period"); + timer_data->period= micro_seconds; + DBUG_VOID_RETURN; +} + +/* + Request timer after X milliseconds + + SYNOPSIS + thr_timer() + timer_data Pointer to timer structure + micro_seconds; Number of microseconds until timer + + RETURN VALUES + 0 ok + 1 If no more timers are allowed (aborted by process) + + Stores in first argument a pointer to a non-zero int which is set to 0 + when the timer has been given +*/ + +my_bool thr_timer_settime(thr_timer_t *timer_data, ulonglong micro_seconds) +{ + int reschedule; + DBUG_ENTER("thr_timer_settime"); + DBUG_PRINT("enter",("thread: %s micro_seconds: %llu",my_thread_name(), + micro_seconds)); + + DBUG_ASSERT(timer_data->expired == 1); + + set_timespec_nsec(timer_data->expire_time, micro_seconds*1000); + timer_data->expired= 0; + + mysql_mutex_lock(&LOCK_timer); /* Lock from threads & timers */ + if (queue_insert_safe(&timer_queue,(uchar*) timer_data)) + { + DBUG_PRINT("info", ("timer queue full")); + fprintf(stderr,"Warning: thr_timer queue is full\n"); + timer_data->expired= 1; + mysql_mutex_unlock(&LOCK_timer); + DBUG_RETURN(1); + } + + /* Reschedule timer if the current one has more time left than new one */ + reschedule= cmp_timespec(next_timer_expire_time, timer_data->expire_time); + mysql_mutex_unlock(&LOCK_timer); + if (reschedule > 0) + { +#if defined(MAIN) + printf("reschedule\n"); fflush(stdout); +#endif + DBUG_PRINT("info", ("reschedule")); + mysql_cond_signal(&COND_timer); + } + + DBUG_RETURN(0); +} + + +/* + Remove timer from list of timers + + notes: Timer will be marked as expired +*/ + +void thr_timer_end(thr_timer_t *timer_data) +{ + DBUG_ENTER("thr_timer_end"); + + mysql_mutex_lock(&LOCK_timer); + if (!timer_data->expired) + { + DBUG_ASSERT(timer_data->index_in_queue != 0); + DBUG_ASSERT(queue_element(&timer_queue, timer_data->index_in_queue) == + (uchar*) timer_data); + queue_remove(&timer_queue, timer_data->index_in_queue); + /* Mark as expired for asserts to work */ + timer_data->expired= 1; + } + mysql_mutex_unlock(&LOCK_timer); + DBUG_VOID_RETURN; +} + + +/* + Come here when some timer in queue is due. +*/ + +static sig_handler process_timers(struct timespec *now) +{ + thr_timer_t *timer_data; + DBUG_ENTER("process_timers"); + DBUG_PRINT("info",("active timers: %d", timer_queue.elements - 1)); + +#if defined(MAIN) + printf("process_timer\n"); fflush(stdout); +#endif + + /* We can safely remove the first one as it has already expired */ + for (;;) + { + void (*function)(void*); + void *func_arg; + my_bool is_periodic; + + timer_data= (thr_timer_t*) queue_top(&timer_queue); + function= timer_data->func; + func_arg= timer_data->func_arg; + is_periodic= timer_data->period != 0; + timer_data->expired= 1; /* Mark expired */ + /* + We remove timer before calling timer function to allow thread to + delete it's timer data any time. + + Deleting timer inside the callback would not work + for periodic timers, they need to be removed from + queue prior to destroying timer_data. + */ + queue_remove_top(&timer_queue); /* Remove timer */ + (*function)(func_arg); /* Inform thread of timeout */ + + /* + If timer is periodic, recalculate next expiration time, and + reinsert it into the queue. + */ + if (is_periodic && timer_data->period) + { + set_timespec_nsec(timer_data->expire_time, timer_data->period * 1000); + timer_data->expired= 0; + queue_insert(&timer_queue, (uchar*)timer_data); + } + + /* Check if next one has also expired */ + timer_data= (thr_timer_t*) queue_top(&timer_queue); + if (cmp_timespec(timer_data->expire_time, (*now)) > 0) + break; /* All data processed */ + } + DBUG_VOID_RETURN; +} + + +/* + set up a timer thread to handle timeouts + This will be killed when thr_timer_inited is set to false. +*/ + +static void *timer_handler(void *arg __attribute__((unused))) +{ + my_thread_init(); + + mysql_mutex_lock(&LOCK_timer); + while (likely(thr_timer_inited)) + { + int error; + struct timespec *top_time; + struct timespec now, abstime; + + set_timespec(now, 0); + + top_time= &(((thr_timer_t*) queue_top(&timer_queue))->expire_time); + + if (cmp_timespec((*top_time), now) <= 0) + { + process_timers(&now); + top_time= &(((thr_timer_t*) queue_top(&timer_queue))->expire_time); + } + + abstime= *top_time; + next_timer_expire_time= *top_time; + if ((error= mysql_cond_timedwait(&COND_timer, &LOCK_timer, &abstime)) && + error != ETIME && error != ETIMEDOUT) + { +#ifdef MAIN + printf("Got error: %d from ptread_cond_timedwait (errno: %d)\n", + error,errno); +#endif + } + } + mysql_mutex_unlock(&LOCK_timer); + my_thread_end(); + pthread_exit(0); + return 0; /* Impossible */ +} + + +/**************************************************************************** + Testing of thr_timer (when compiled with -DMAIN) +***************************************************************************/ + +#ifdef MAIN + +static mysql_cond_t COND_thread_count; +static mysql_mutex_t LOCK_thread_count; +static uint thread_count, benchmark_runs, test_to_run= 1; + +static void send_signal(void *arg) +{ + struct st_my_thread_var *current_my_thread_var= arg; +#if defined(MAIN) + printf("sending signal\n"); fflush(stdout); +#endif + mysql_mutex_lock(¤t_my_thread_var->mutex); + mysql_cond_signal(¤t_my_thread_var->suspend); + mysql_mutex_unlock(¤t_my_thread_var->mutex); +} + + +static void run_thread_test(int param) +{ + int i,wait_time,retry; + my_hrtime_t start_time; + thr_timer_t timer_data; + struct st_my_thread_var *current_my_thread_var; + DBUG_ENTER("run_thread_test"); + + current_my_thread_var= my_thread_var; + thr_timer_init(&timer_data, send_signal, current_my_thread_var); + + for (i=1 ; i <= 10 ; i++) + { + wait_time=param ? 11-i : i; + start_time= my_hrtime(); + + mysql_mutex_lock(¤t_my_thread_var->mutex); + if (thr_timer_settime(&timer_data, wait_time * 1000000)) + { + printf("Thread: %s timers aborted\n",my_thread_name()); + break; + } + if (wait_time == 3) + { + printf("Thread: %s Simulation of no timer needed\n",my_thread_name()); + fflush(stdout); + } + else + { + for (retry=0 ; !timer_data.expired && retry < 10 ; retry++) + { + printf("Thread: %s Waiting %d sec\n",my_thread_name(),wait_time); + mysql_cond_wait(¤t_my_thread_var->suspend, + ¤t_my_thread_var->mutex); + + } + if (!timer_data.expired) + { + printf("Thread: %s didn't get an timer. Aborting!\n", + my_thread_name()); + break; + } + } + mysql_mutex_unlock(¤t_my_thread_var->mutex); + printf("Thread: %s Slept for %g (%d) sec\n",my_thread_name(), + (int) (my_hrtime().val-start_time.val)/1000000.0, wait_time); + fflush(stdout); + thr_timer_end(&timer_data); + fflush(stdout); + } + DBUG_VOID_RETURN; +} + + +static void run_thread_benchmark(int param) +{ + int i; + struct st_my_thread_var *current_my_thread_var; + thr_timer_t timer_data; + DBUG_ENTER("run_thread_benchmark"); + + current_my_thread_var= my_thread_var; + thr_timer_init(&timer_data, send_signal, current_my_thread_var); + + for (i=1 ; i <= param ; i++) + { + if (thr_timer_settime(&timer_data, 1000000)) + { + printf("Thread: %s timers aborted\n",my_thread_name()); + break; + } + thr_timer_end(&timer_data); + } + DBUG_VOID_RETURN; +} + + +#ifdef HAVE_TIMER_CREATE + +/* Test for benchmarking posix timers against thr_timer */ + +#ifndef sigev_notify_thread_id +#define sigev_notify_thread_id _sigev_un._tid +#endif + +static void run_timer_benchmark(int param) +{ + int i; + timer_t timerid; + struct sigevent sigev; + pid_t thread_id= (pid_t) syscall(SYS_gettid); + DBUG_ENTER("run_timer_benchmark"); + + /* Setup a signal that will never be signaled */ + sigev.sigev_value.sival_ptr= 0; + sigev.sigev_signo= SIGRTMIN; /* First free signal */ + sigev.sigev_notify= SIGEV_SIGNAL | SIGEV_THREAD_ID; + sigev.sigev_notify_thread_id= thread_id; + + if (timer_create(CLOCK_MONOTONIC, &sigev, &timerid)) + { + printf("Could not create timer\n"); + exit(1); + } + + for (i=1 ; i <= param ; i++) + { + struct itimerspec abstime; + abstime.it_interval.tv_sec= 0; + abstime.it_interval.tv_nsec= 0; + abstime.it_value.tv_sec= 1; + abstime.it_value.tv_nsec= 0; + + if (timer_settime(timerid, 0, &abstime, NULL)) + { + printf("Thread: %s timers aborted\n",my_thread_name()); + break; + } + abstime.it_interval.tv_sec= 0; + abstime.it_interval.tv_nsec= 0; + abstime.it_value.tv_sec= 0; + abstime.it_value.tv_nsec= 0; + timer_settime(timerid, 0, &abstime, NULL); + } + timer_delete(timerid); + DBUG_VOID_RETURN; +} +#endif /* HAVE_TIMER_CREATE */ + + +static void *start_thread(void *arg) +{ + my_thread_init(); + printf("Thread %d (%s) started\n",*((int*) arg),my_thread_name()); + fflush(stdout); + + switch (test_to_run) { + case 1: + run_thread_test(*((int*) arg)); + break; + case 2: + run_thread_benchmark(benchmark_runs); + break; + case 3: +#ifdef HAVE_TIMER_CREATE + run_timer_benchmark(benchmark_runs); +#endif + break; + } + free((uchar*) arg); + mysql_mutex_lock(&LOCK_thread_count); + thread_count--; + mysql_cond_signal(&COND_thread_count); /* Tell main we are ready */ + mysql_mutex_unlock(&LOCK_thread_count); + my_thread_end(); + return 0; +} + + +/* Start a lot of threads that will run with timers */ + +static void run_test() +{ + pthread_t tid; + pthread_attr_t thr_attr; + int i,*param,error; + DBUG_ENTER("run_test"); + + if (init_thr_timer(5)) + { + printf("Can't initialize timers\n"); + exit(1); + } + + mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_INIT_FAST); + mysql_cond_init(0, &COND_thread_count, NULL); + + thr_setconcurrency(3); + pthread_attr_init(&thr_attr); + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_PROCESS); + printf("Main thread: %s\n",my_thread_name()); + for (i=0 ; i < 2 ; i++) + { + param=(int*) malloc(sizeof(int)); + *param= i; + mysql_mutex_lock(&LOCK_thread_count); + if ((error= mysql_thread_create(0, + &tid, &thr_attr, start_thread, + (void*) param))) + { + printf("Can't create thread %d, error: %d\n",i,error); + exit(1); + } + thread_count++; + mysql_mutex_unlock(&LOCK_thread_count); + } + + pthread_attr_destroy(&thr_attr); + mysql_mutex_lock(&LOCK_thread_count); + while (thread_count) + { + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + } + mysql_mutex_unlock(&LOCK_thread_count); + DBUG_ASSERT(timer_queue.elements == 1); + end_thr_timer(); + printf("Test succeeded\n"); + DBUG_VOID_RETURN; +} + + +int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) +{ + MY_INIT(argv[0]); + + if (argc > 1 && argv[1][0] == '-') + { + switch (argv[1][1]) { + case '#': + test_to_run= 1; + DBUG_PUSH(argv[1]+2); + break; + case 'b': + test_to_run= 2; + benchmark_runs= atoi(argv[1]+2); + break; + case 't': + test_to_run= 3; + benchmark_runs= atoi(argv[1]+2); + break; + } + } + if (!benchmark_runs) + benchmark_runs= 1000000; + + run_test(); + my_end(1); + return 0; +} + +#endif /* MAIN */ diff --git a/mysys/tree.c b/mysys/tree.c new file mode 100644 index 00000000..cd44f779 --- /dev/null +++ b/mysys/tree.c @@ -0,0 +1,804 @@ +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2010, 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Code for handling red-black (balanced) binary trees. + key in tree is allocated accrding to following: + + 1) If size < 0 then tree will not allocate keys and only a pointer to + each key is saved in tree. + compare and search functions uses and returns key-pointer + + 2) If size == 0 then there are two options: + - key_size != 0 to tree_insert: The key will be stored in the tree. + - key_size == 0 to tree_insert: A pointer to the key is stored. + compare and search functions uses and returns key-pointer. + + 3) if key_size is given to init_tree then each node will continue the + key and calls to insert_key may increase length of key. + if key_size > sizeof(pointer) and key_size is a multiple of 8 (double + align) then key will be put on a 8 aligned address. Else + the key will be on address (element+1). This is transparent for user + compare and search functions uses a pointer to given key-argument. + + - If you use a free function for tree-elements and you are freeing + the element itself, you should use key_size = 0 to init_tree and + tree_search + + The actual key in TREE_ELEMENT is saved as a pointer or after the + TREE_ELEMENT struct. + If one uses only pointers in tree one can use tree_set_pointer() to + change address of data. + + Implemented by monty. +*/ + +/* + NOTE: + tree->compare function should be ALWAYS called as + (*tree->compare)(custom_arg, ELEMENT_KEY(tree,element), key) + and not other way around, as + (*tree->compare)(custom_arg, key, ELEMENT_KEY(tree,element)) + + ft_boolean_search.c (at least) relies on that. +*/ + +#include "mysys_priv.h" +#include +#include +#include "my_base.h" + +#define BLACK 1 +#define RED 0 +#define DEFAULT_ALLOC_SIZE 8192 +#define DEFAULT_ALIGN_SIZE 8192 + +static int delete_tree_element(TREE *,TREE_ELEMENT *, my_bool abort); +static int tree_walk_left_root_right(TREE *,TREE_ELEMENT *, + tree_walk_action,void *); +static int tree_walk_right_root_left(TREE *,TREE_ELEMENT *, + tree_walk_action,void *); +static void left_rotate(TREE_ELEMENT **parent,TREE_ELEMENT *leaf); +static void right_rotate(TREE_ELEMENT **parent, TREE_ELEMENT *leaf); +static void rb_insert(TREE *tree,TREE_ELEMENT ***parent, + TREE_ELEMENT *leaf); +static void rb_delete_fixup(TREE *tree,TREE_ELEMENT ***parent); + +static TREE_ELEMENT null_element= { NULL, NULL, 0, BLACK }; + +/* The actual code for handling binary trees */ + +#ifndef DBUG_OFF +static int test_rb_tree(TREE_ELEMENT *element); +#endif + +void init_tree(TREE *tree, size_t default_alloc_size, size_t memory_limit, + int size, qsort_cmp2 compare, + tree_element_free free_element, void *custom_arg, + myf my_flags) +{ + DBUG_ENTER("init_tree"); + DBUG_PRINT("enter",("tree: %p size: %d", tree, size)); + + if (default_alloc_size < DEFAULT_ALLOC_SIZE) + default_alloc_size= DEFAULT_ALLOC_SIZE; + default_alloc_size= MY_ALIGN(default_alloc_size, DEFAULT_ALIGN_SIZE); + tree->root= &null_element; + tree->compare=compare; + tree->size_of_element= size > 0 ? (uint) size : 0; + tree->memory_limit=memory_limit; + tree->free=free_element; + tree->allocated=0; + tree->elements_in_tree=0; + tree->custom_arg = custom_arg; + tree->my_flags= my_flags; + tree->flag= 0; + if (!free_element && size >= 0 && + ((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1)))) + { + /* + We know that the data doesn't have to be aligned (like if the key + contains a double), so we can store the data combined with the + TREE_ELEMENT. + */ + tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */ + /* Fix allocation size so that we don't lose any memory */ + default_alloc_size/=(sizeof(TREE_ELEMENT)+size); + if (!default_alloc_size) + default_alloc_size=1; + default_alloc_size*=(sizeof(TREE_ELEMENT)+size); + } + else + { + tree->offset_to_key=0; /* use key through pointer */ + tree->size_of_element+=sizeof(void*); + } + if (!(tree->with_delete= MY_TEST(my_flags & MY_TREE_WITH_DELETE))) + { + init_alloc_root(key_memory_TREE, &tree->mem_root, default_alloc_size, 0, + MYF(my_flags)); + tree->mem_root.min_malloc= sizeof(TREE_ELEMENT)+tree->size_of_element; + } + DBUG_VOID_RETURN; +} + +static int free_tree(TREE *tree, my_bool abort, myf free_flags) +{ + int error, first_error= 0; + DBUG_ENTER("free_tree"); + DBUG_PRINT("enter",("tree: %p", tree)); + + if (tree->root) /* If initialized */ + { + if (tree->with_delete) + { + if ((error= delete_tree_element(tree, tree->root, abort))) + { + first_error= first_error ? first_error : error; + abort= 1; + } + } + else + { + if (tree->free) + { + if (tree->memory_limit) + (*tree->free)(NULL, free_init, tree->custom_arg); + if ((error= delete_tree_element(tree, tree->root, abort))) + first_error= first_error ? first_error : error; + if (tree->memory_limit) + (*tree->free)(NULL, free_end, tree->custom_arg); + } + free_root(&tree->mem_root, free_flags); + } + } + tree->root= &null_element; + tree->elements_in_tree=0; + tree->allocated=0; + + DBUG_RETURN(first_error); +} + + +/** + Delete tree. + + @param tree Tree + @param abort 0 if normal, 1 if tree->free should not be called. + + @return 0 ok + <> 0 Returns first <> 0 from (tree->free)(*,free_free,*) + + @Notes + If one (tree->free)(,free_free,) returns <> 0, no future + tree->free(*,free_free,*) will be called. + Other tree->free operations (free_init, free_end) will be called +*/ + + +int delete_tree(TREE* tree, my_bool abort) +{ + return free_tree(tree, abort, MYF(0)); /* my_free() mem_root if applicable */ +} + +int reset_tree(TREE* tree) +{ + /* do not free mem_root, just mark blocks as free */ + return free_tree(tree, 0, MYF(MY_MARK_BLOCKS_FREE)); +} + + +static int delete_tree_element(TREE *tree, TREE_ELEMENT *element, + my_bool abort) +{ + int error, first_error= 0; + if (element != &null_element) + { + if ((first_error= delete_tree_element(tree, element->left, abort))) + abort= 1; + if (!abort && tree->free) + { + if ((error= (*tree->free)(ELEMENT_KEY(tree,element), free_free, + tree->custom_arg))) + { + first_error= first_error ? first_error : error; + abort= 1; + } + } + if ((error= delete_tree_element(tree, element->right, abort))) + first_error= first_error ? first_error : error; + if (tree->with_delete) + my_free(element); + } + return first_error; +} + + +/* + insert, search and delete of elements + + The following should be true: + parent[0] = & parent[-1][0]->left || + parent[0] = & parent[-1][0]->right +*/ + +TREE_ELEMENT *tree_insert(TREE *tree, void *key, uint key_size, + void* custom_arg) +{ + int cmp; + TREE_ELEMENT *element,***parent; + + parent= tree->parents; + *parent = &tree->root; element= tree->root; + for (;;) + { + if (element == &null_element || + (cmp = (*tree->compare)(custom_arg, ELEMENT_KEY(tree,element), + key)) == 0) + break; + if (cmp < 0) + { + *++parent= &element->right; element= element->right; + } + else + { + *++parent = &element->left; element= element->left; + } + } + if (element == &null_element) + { + uint alloc_size; + if (tree->flag & TREE_ONLY_DUPS) + return TREE_ELEMENT_UNIQUE; + alloc_size=sizeof(TREE_ELEMENT)+key_size+tree->size_of_element; + tree->allocated+=alloc_size; + + if (tree->memory_limit && tree->elements_in_tree + && tree->allocated > tree->memory_limit) + { + reset_tree(tree); + return tree_insert(tree, key, key_size, custom_arg); + } + + key_size+=tree->size_of_element; + if (tree->with_delete) + element=(TREE_ELEMENT *) my_malloc(key_memory_TREE, alloc_size, + MYF(tree->my_flags | MY_WME)); + else + element=(TREE_ELEMENT *) alloc_root(&tree->mem_root,alloc_size); + if (!element) + return(NULL); + **parent=element; + element->left=element->right= &null_element; + if (!tree->offset_to_key) + { + if (key_size == sizeof(void*)) /* no length, save pointer */ + *((void**) (element+1))=key; + else + { + *((void**) (element+1))= (void*) ((void **) (element+1)+1); + memcpy((uchar*) *((void **) (element+1)),key, + (size_t) (key_size-sizeof(void*))); + } + } + else + memcpy((uchar*) element+tree->offset_to_key,key,(size_t) key_size); + element->count=1; /* May give warning in purify */ + tree->elements_in_tree++; + rb_insert(tree,parent,element); /* rebalance tree */ + } + else + { + if (tree->flag & TREE_NO_DUPS) + return(NULL); + element->count++; + /* Avoid a wrap over of the count. */ + if (! element->count) + element->count--; + } + DBUG_EXECUTE("check_tree", test_rb_tree(tree->root);); + return element; +} + +int tree_delete(TREE *tree, void *key, uint key_size, void *custom_arg) +{ + int cmp,remove_colour; + TREE_ELEMENT *element,***parent, ***org_parent, *nod; + if (!tree->with_delete) + return 1; /* not allowed */ + + parent= tree->parents; + *parent= &tree->root; element= tree->root; + for (;;) + { + if (element == &null_element) + return 1; /* Was not in tree */ + if ((cmp = (*tree->compare)(custom_arg, ELEMENT_KEY(tree,element), + key)) == 0) + break; + if (cmp < 0) + { + *++parent= &element->right; element= element->right; + } + else + { + *++parent = &element->left; element= element->left; + } + } + if (element->left == &null_element) + { + (**parent)=element->right; + remove_colour= element->colour; + } + else if (element->right == &null_element) + { + (**parent)=element->left; + remove_colour= element->colour; + } + else + { + org_parent= parent; + *++parent= &element->right; nod= element->right; + while (nod->left != &null_element) + { + *++parent= &nod->left; nod= nod->left; + } + (**parent)=nod->right; /* unlink nod from tree */ + remove_colour= nod->colour; + org_parent[0][0]=nod; /* put y in place of element */ + org_parent[1]= &nod->right; + nod->left=element->left; + nod->right=element->right; + nod->colour=element->colour; + } + if (remove_colour == BLACK) + rb_delete_fixup(tree,parent); + if (tree->free) + (*tree->free)(ELEMENT_KEY(tree,element), free_free, tree->custom_arg); + tree->allocated-= sizeof(TREE_ELEMENT) + tree->size_of_element + key_size; + my_free(element); + tree->elements_in_tree--; + return 0; +} + + +void *tree_search(TREE *tree, void *key, void *custom_arg) +{ + int cmp; + TREE_ELEMENT *element=tree->root; + + for (;;) + { + if (element == &null_element) + return (void*) 0; + if ((cmp = (*tree->compare)(custom_arg, ELEMENT_KEY(tree,element), + key)) == 0) + return ELEMENT_KEY(tree,element); + if (cmp < 0) + element=element->right; + else + element=element->left; + } +} + +void *tree_search_key(TREE *tree, const void *key, + TREE_ELEMENT **parents, TREE_ELEMENT ***last_pos, + enum ha_rkey_function flag, void *custom_arg) +{ + int cmp; + TREE_ELEMENT *element= tree->root; + TREE_ELEMENT **last_left_step_parent= NULL, **last_right_step_parent= NULL; + TREE_ELEMENT **last_equal_element= NULL; + +/* + TODO: support for HA_READ_KEY_OR_PREV, HA_READ_PREFIX flags if needed. +*/ + + *parents = &null_element; + while (element != &null_element) + { + *++parents= element; + if ((cmp= (*tree->compare)(custom_arg, ELEMENT_KEY(tree, element), + key)) == 0) + { + switch (flag) { + case HA_READ_KEY_EXACT: + case HA_READ_KEY_OR_NEXT: + case HA_READ_BEFORE_KEY: + case HA_READ_KEY_OR_PREV: + last_equal_element= parents; + cmp= 1; + break; + case HA_READ_AFTER_KEY: + cmp= -1; + break; + case HA_READ_PREFIX_LAST: + case HA_READ_PREFIX_LAST_OR_PREV: + last_equal_element= parents; + cmp= -1; + break; + default: + return NULL; + } + } + if (cmp < 0) /* element < key */ + { + last_right_step_parent= parents; + element= element->right; + } + else + { + last_left_step_parent= parents; + element= element->left; + } + } + switch (flag) { + case HA_READ_KEY_EXACT: + case HA_READ_PREFIX_LAST: + *last_pos= last_equal_element; + break; + case HA_READ_KEY_OR_NEXT: + *last_pos= last_equal_element ? last_equal_element : last_left_step_parent; + break; + case HA_READ_AFTER_KEY: + *last_pos= last_left_step_parent; + break; + case HA_READ_PREFIX_LAST_OR_PREV: + *last_pos= last_equal_element ? last_equal_element : last_right_step_parent; + break; + case HA_READ_BEFORE_KEY: + *last_pos= last_right_step_parent; + break; + case HA_READ_KEY_OR_PREV: + *last_pos= last_equal_element ? last_equal_element : last_right_step_parent; + break; + default: + return NULL; + } + return *last_pos ? ELEMENT_KEY(tree, **last_pos) : NULL; +} + +/* + Search first (the most left) or last (the most right) tree element +*/ +void *tree_search_edge(TREE *tree, TREE_ELEMENT **parents, + TREE_ELEMENT ***last_pos, int child_offs) +{ + TREE_ELEMENT *element= tree->root; + + *parents= &null_element; + while (element != &null_element) + { + *++parents= element; + element= ELEMENT_CHILD(element, child_offs); + } + *last_pos= parents; + return **last_pos != &null_element ? + ELEMENT_KEY(tree, **last_pos) : NULL; +} + +void *tree_search_next(TREE *tree, TREE_ELEMENT ***last_pos, int l_offs, + int r_offs) +{ + TREE_ELEMENT *x= **last_pos; + + if (ELEMENT_CHILD(x, r_offs) != &null_element) + { + x= ELEMENT_CHILD(x, r_offs); + *++*last_pos= x; + while (ELEMENT_CHILD(x, l_offs) != &null_element) + { + x= ELEMENT_CHILD(x, l_offs); + *++*last_pos= x; + } + return ELEMENT_KEY(tree, x); + } + else + { + TREE_ELEMENT *y= *--*last_pos; + while (y != &null_element && x == ELEMENT_CHILD(y, r_offs)) + { + x= y; + y= *--*last_pos; + } + return y == &null_element ? NULL : ELEMENT_KEY(tree, y); + } +} + +/* + Expected that tree is fully balanced + (each path from root to leaf has the same length) +*/ +ha_rows tree_record_pos(TREE *tree, const void *key, + enum ha_rkey_function flag, void *custom_arg) +{ + int cmp; + TREE_ELEMENT *element= tree->root; + double left= 1; + double right= tree->elements_in_tree; + + while (element != &null_element) + { + if ((cmp= (*tree->compare)(custom_arg, ELEMENT_KEY(tree, element), + key)) == 0) + { + switch (flag) { + case HA_READ_KEY_EXACT: + case HA_READ_BEFORE_KEY: + cmp= 1; + break; + case HA_READ_AFTER_KEY: + cmp= -1; + break; + default: + return HA_POS_ERROR; + } + } + if (cmp < 0) /* element < key */ + { + element= element->right; + left= (left + right) / 2; + } + else + { + element= element->left; + right= (left + right) / 2; + } + } + switch (flag) { + case HA_READ_KEY_EXACT: + case HA_READ_BEFORE_KEY: + return (ha_rows) right; + case HA_READ_AFTER_KEY: + return (ha_rows) left; + default: + return HA_POS_ERROR; + } +} + +int tree_walk(TREE *tree, tree_walk_action action, void *argument, TREE_WALK visit) +{ + switch (visit) { + case left_root_right: + return tree_walk_left_root_right(tree,tree->root,action,argument); + case right_root_left: + return tree_walk_right_root_left(tree,tree->root,action,argument); + } + return 0; /* Keep gcc happy */ +} + +static int tree_walk_left_root_right(TREE *tree, TREE_ELEMENT *element, tree_walk_action action, void *argument) +{ + int error; + if (element->left) /* Not null_element */ + { + if ((error=tree_walk_left_root_right(tree,element->left,action, + argument)) == 0 && + (error=(*action)(ELEMENT_KEY(tree,element), + (element_count) element->count, + argument)) == 0) + error=tree_walk_left_root_right(tree,element->right,action,argument); + return error; + } + return 0; +} + +static int tree_walk_right_root_left(TREE *tree, TREE_ELEMENT *element, tree_walk_action action, void *argument) +{ + int error; + if (element->right) /* Not null_element */ + { + if ((error=tree_walk_right_root_left(tree,element->right,action, + argument)) == 0 && + (error=(*action)(ELEMENT_KEY(tree,element), + (element_count) element->count, + argument)) == 0) + error=tree_walk_right_root_left(tree,element->left,action,argument); + return error; + } + return 0; +} + + + /* Functions to fix up the tree after insert and delete */ + +static void left_rotate(TREE_ELEMENT **parent, TREE_ELEMENT *leaf) +{ + TREE_ELEMENT *y; + + y=leaf->right; + leaf->right=y->left; + parent[0]=y; + y->left=leaf; +} + +static void right_rotate(TREE_ELEMENT **parent, TREE_ELEMENT *leaf) +{ + TREE_ELEMENT *x; + + x=leaf->left; + leaf->left=x->right; + parent[0]=x; + x->right=leaf; +} + +static void rb_insert(TREE *tree, TREE_ELEMENT ***parent, TREE_ELEMENT *leaf) +{ + TREE_ELEMENT *y,*par,*par2; + + leaf->colour=RED; + while (leaf != tree->root && (par=parent[-1][0])->colour == RED) + { + if (par == (par2=parent[-2][0])->left) + { + y= par2->right; + if (y->colour == RED) + { + par->colour=BLACK; + y->colour=BLACK; + leaf=par2; + parent-=2; + leaf->colour=RED; /* And the loop continues */ + } + else + { + if (leaf == par->right) + { + left_rotate(parent[-1],par); + par=leaf; /* leaf is now parent to old leaf */ + } + par->colour=BLACK; + par2->colour=RED; + right_rotate(parent[-2],par2); + break; + } + } + else + { + y= par2->left; + if (y->colour == RED) + { + par->colour=BLACK; + y->colour=BLACK; + leaf=par2; + parent-=2; + leaf->colour=RED; /* And the loop continues */ + } + else + { + if (leaf == par->left) + { + right_rotate(parent[-1],par); + par=leaf; + } + par->colour=BLACK; + par2->colour=RED; + left_rotate(parent[-2],par2); + break; + } + } + } + tree->root->colour=BLACK; +} + +static void rb_delete_fixup(TREE *tree, TREE_ELEMENT ***parent) +{ + TREE_ELEMENT *x,*w,*par; + + x= **parent; + while (x != tree->root && x->colour == BLACK) + { + if (x == (par=parent[-1][0])->left) + { + w=par->right; + if (w->colour == RED) + { + w->colour=BLACK; + par->colour=RED; + left_rotate(parent[-1],par); + parent[0]= &w->left; + *++parent= &par->left; + w=par->right; + } + if (w->left->colour == BLACK && w->right->colour == BLACK) + { + w->colour=RED; + x=par; + parent--; + } + else + { + if (w->right->colour == BLACK) + { + w->left->colour=BLACK; + w->colour=RED; + right_rotate(&par->right,w); + w=par->right; + } + w->colour=par->colour; + par->colour=BLACK; + w->right->colour=BLACK; + left_rotate(parent[-1],par); + x=tree->root; + break; + } + } + else + { + w=par->left; + if (w->colour == RED) + { + w->colour=BLACK; + par->colour=RED; + right_rotate(parent[-1],par); + parent[0]= &w->right; + *++parent= &par->right; + w=par->left; + } + if (w->right->colour == BLACK && w->left->colour == BLACK) + { + w->colour=RED; + x=par; + parent--; + } + else + { + if (w->left->colour == BLACK) + { + w->right->colour=BLACK; + w->colour=RED; + left_rotate(&par->left,w); + w=par->left; + } + w->colour=par->colour; + par->colour=BLACK; + w->left->colour=BLACK; + right_rotate(parent[-1],par); + x=tree->root; + break; + } + } + } + x->colour=BLACK; +} + +#ifndef DBUG_OFF + + /* Test that the proporties for a red-black tree holds */ + +static int test_rb_tree(TREE_ELEMENT *element) +{ + int count_l,count_r; + + if (!element->left) + return 0; /* Found end of tree */ + if (element->colour == RED && + (element->left->colour == RED || element->right->colour == RED)) + { + printf("Wrong tree: Found two red in a row\n"); + return -1; + } + count_l=test_rb_tree(element->left); + count_r=test_rb_tree(element->right); + if (count_l >= 0 && count_r >= 0) + { + if (count_l == count_r) + return count_l+(element->colour == BLACK); + printf("Wrong tree: Incorrect black-count: %d - %d\n",count_l,count_r); + } + return -1; +} +#endif diff --git a/mysys/typelib.c b/mysys/typelib.c new file mode 100644 index 00000000..03f3609b --- /dev/null +++ b/mysys/typelib.c @@ -0,0 +1,399 @@ +/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2013, Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Functions to handle typelib */ + +#include "mysys_priv.h" +#include +#include + + +#define is_field_separator(F, X) \ + ((F & FIND_TYPE_COMMA_TERM) && ((X) == ',' || (X) == '=')) + +int find_type_with_warning(const char *x, TYPELIB *typelib, const char *option) +{ + int res; + const char **ptr; + + if ((res= find_type((char *) x, typelib, FIND_TYPE_BASIC)) <= 0) + { + ptr= typelib->type_names; + if (!*x) + fprintf(stderr, "No option given to %s\n", option); + else + fprintf(stderr, "Unknown option to %s: %s\n", option, x); + fprintf(stderr, "Alternatives are: '%s'", *ptr); + while (*++ptr) + fprintf(stderr, ",'%s'", *ptr); + fprintf(stderr, "\n"); + } + return res; +} + + +/** + Search after a string in a list of strings. Endspace in x is not compared. + + @param x pointer to string to find + (not necessarily zero-terminated). + by return it'll be advanced to point to the terminator. + @param typelib TYPELIB (struct of pointer to values + count) + @param flags flags to tune behaviour: a combination of + FIND_TYPE_NO_PREFIX + FIND_TYPE_COMMA_TERM. + @param eol a pointer to the end of the string. + + @retval + -1 Too many matching values + @retval + 0 No matching value + @retval + >0 Offset+1 in typelib for matched string +*/ + + +static int find_type_eol(const char **x, const TYPELIB *typelib, uint flags, + const char *eol) +{ + int find,pos; + int UNINIT_VAR(findpos); /* guarded by find */ + const char *UNINIT_VAR(termptr); + const char *i; + const char *j; + CHARSET_INFO *cs= &my_charset_latin1; + DBUG_ENTER("find_type_eol"); + DBUG_PRINT("enter",("x: '%s' lib: %p", *x, typelib)); + + DBUG_ASSERT(!(flags & ~(FIND_TYPE_NO_PREFIX | FIND_TYPE_COMMA_TERM))); + + if (!typelib->count) + { + DBUG_PRINT("exit",("no count")); + DBUG_RETURN(0); + } + find=0; + for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) + { + for (i=*x ; + i < eol && !is_field_separator(flags, *i) && + my_toupper(cs, *i) == my_toupper(cs, *j) ; i++, j++) ; + if (! *j) + { + while (i < eol && *i == ' ') + i++; /* skip_end_space */ + if (i >= eol || is_field_separator(flags, *i)) + { + *x= i; + DBUG_RETURN(pos+1); + } + } + if ((i >= eol && !is_field_separator(flags, *i)) && + (!*j || !(flags & FIND_TYPE_NO_PREFIX))) + { + find++; + findpos=pos; + termptr=i; + } + } + if (find == 0 || *x == eol) + { + DBUG_PRINT("exit",("Couldn't find type")); + DBUG_RETURN(0); + } + else if (find != 1 || (flags & FIND_TYPE_NO_PREFIX)) + { + DBUG_PRINT("exit",("Too many possibilities")); + DBUG_RETURN(-1); + } + *x= termptr; + DBUG_RETURN(findpos+1); +} /* find_type_eol */ + + +/** + Search after a string in a list of strings. Endspace in x is not compared. + + Same as find_type_eol, but for zero-terminated strings, + and without advancing the pointer. +*/ +int find_type(const char *x, const TYPELIB *typelib, uint flags) +{ + return find_type_eol(&x, typelib, flags, x + strlen(x)); +} + +/** + Get name of type nr + + @note + first type is 1, 0 = empty field +*/ + +void make_type(register char * to, register uint nr, + register TYPELIB *typelib) +{ + DBUG_ENTER("make_type"); + if (!nr) + to[0]=0; + else + (void) strmov(to,get_type(typelib,nr-1)); + DBUG_VOID_RETURN; +} /* make_type */ + + +/** + Get type + + @note + first type is 0 +*/ + +const char *get_type(TYPELIB *typelib, uint nr) +{ + if (nr < (uint) typelib->count && typelib->type_names) + return(typelib->type_names[nr]); + return "?"; +} + + +/** + Create an integer value to represent the supplied comma-separated + string where each string in the TYPELIB denotes a bit position. + + @param x string to decompose + @param lib TYPELIB (struct of pointer to values + count) + @param err index (not char position) of string element which was not + found or 0 if there was no error + + @retval + a integer representation of the supplied string +*/ + +my_ulonglong find_typeset(const char *x, TYPELIB *lib, int *err) +{ + my_ulonglong result; + int find; + const char *i; + DBUG_ENTER("find_set"); + DBUG_PRINT("enter",("x: '%s' lib: %p", x, lib)); + + if (!lib->count) + { + DBUG_PRINT("exit",("no count")); + DBUG_RETURN(0); + } + result= 0; + *err= 0; + while (*x) + { + (*err)++; + i= x; + while (*x && *x != ',') + x++; + if (x[0] && x[1]) /* skip separator if found */ + x++; + if ((find= find_type(i, lib, FIND_TYPE_COMMA_TERM) - 1) < 0) + DBUG_RETURN(0); + result|= (1ULL << find); + } + *err= 0; + DBUG_RETURN(result); +} /* find_set */ + + +/** + Create a copy of a specified TYPELIB structure. + + @param root pointer to a MEM_ROOT object for allocations + @param from pointer to a source TYPELIB structure + + @retval + pointer to the new TYPELIB structure on successful copy + @retval + NULL otherwise +*/ + +TYPELIB *copy_typelib(MEM_ROOT *root, const TYPELIB *from) +{ + TYPELIB *to; + uint i; + + if (!from) + return NULL; + + if (!(to= (TYPELIB*) alloc_root(root, sizeof(TYPELIB)))) + return NULL; + + if (!(to->type_names= (const char **) + alloc_root(root, (sizeof(char *) + sizeof(int)) * (from->count + 1)))) + return NULL; + to->type_lengths= (unsigned int *)(to->type_names + from->count + 1); + to->count= from->count; + if (from->name) + { + if (!(to->name= strdup_root(root, from->name))) + return NULL; + } + else + to->name= NULL; + + for (i= 0; i < from->count; i++) + { + if (!(to->type_names[i]= strmake_root(root, from->type_names[i], + from->type_lengths[i]))) + return NULL; + to->type_lengths[i]= from->type_lengths[i]; + } + to->type_names[to->count]= NULL; + to->type_lengths[to->count]= 0; + + return to; +} + + +static const char *on_off_default_names[]= { "off","on","default", 0}; +static TYPELIB on_off_default_typelib= {array_elements(on_off_default_names)-1, + "", on_off_default_names, 0}; + +/** + Parse a TYPELIB name from the buffer + + @param lib Set of names to scan for. + @param strpos INOUT Start of the buffer (updated to point to the next + character after the name) + @param end End of the buffer + + @note + The buffer is assumed to contain one of the names specified in the TYPELIB, + followed by comma, '=', or end of the buffer. + + @retval + 0 No matching name + @retval + >0 Offset+1 in typelib for matched name +*/ + +static uint parse_name(const TYPELIB *lib, const char **pos, const char *end) +{ + uint find= find_type_eol(pos, lib, + FIND_TYPE_COMMA_TERM | FIND_TYPE_NO_PREFIX, end); + return find; +} + +/** + Parse and apply a set of flag assingments + + @param lib Flag names + @param default_name Number of "default" in the typelib + @param cur_set Current set of flags (start from this state) + @param default_set Default set of flags (use this for assign-default + keyword and flag=default assignments) + @param str String to be parsed + @param length Length of the string + @param err_pos OUT If error, set to point to start of wrong set string + NULL on success + @param err_len OUT If error, set to the length of wrong set string + + @details + Parse a set of flag assignments, that is, parse a string in form: + + param_name1=value1,param_name2=value2,... + + where the names are specified in the TYPELIB, and each value can be + either 'on','off', or 'default'. Setting the same name twice is not + allowed. + + Besides param=val assignments, we support the "default" keyword (keyword + #default_name in the typelib). It can be used one time, if specified it + causes us to build the new set over the default_set rather than cur_set + value. + + @note + it's not charset aware + + @retval + Parsed set value if (*errpos == NULL), otherwise undefined +*/ + +my_ulonglong find_set_from_flags(const TYPELIB *lib, uint default_name, + my_ulonglong cur_set, my_ulonglong default_set, + const char *str, uint length, + char **err_pos, uint *err_len) +{ + const char *end= str + length; + my_ulonglong flags_to_set= 0, flags_to_clear= 0, res; + my_bool set_defaults= 0; + + *err_pos= 0; /* No error yet */ + if (str != end) + { + const char *start= str; + for (;;) + { + const char *pos= start; + uint flag_no, value; + + if (!(flag_no= parse_name(lib, &pos, end))) + goto err; + + if (flag_no == default_name) + { + /* Using 'default' twice isn't allowed. */ + if (set_defaults) + goto err; + set_defaults= TRUE; + } + else + { + my_ulonglong bit= (1ULL << (flag_no - 1)); + /* parse the '=on|off|default' */ + if ((flags_to_clear | flags_to_set) & bit || + pos >= end || *pos++ != '=' || + !(value= parse_name(&on_off_default_typelib, &pos, end))) + goto err; + + if (value == 1) /* this is '=off' */ + flags_to_clear|= bit; + else if (value == 2) /* this is '=on' */ + flags_to_set|= bit; + else /* this is '=default' */ + { + if (default_set & bit) + flags_to_set|= bit; + else + flags_to_clear|= bit; + } + } + if (pos >= end) + break; + + if (*pos++ != ',') + goto err; + + start=pos; + continue; + err: + *err_pos= (char*)start; + *err_len= (uint)(end - start); + break; + } + } + res= set_defaults? default_set : cur_set; + res|= flags_to_set; + res&= ~flags_to_clear; + return res; +} + diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c new file mode 100644 index 00000000..a03f8da3 --- /dev/null +++ b/mysys/waiting_threads.c @@ -0,0 +1,1143 @@ +/* Copyright (C) 2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. + Copyright (c) 2011, 2013, Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + "waiting threads" subsystem - a unified interface for threads to wait + on each other, with built-in deadlock detection. + + Main concepts + ^^^^^^^^^^^^^ + a thread - is represented by a WT_THD structure. One physical thread + can have only one WT_THD descriptor at any given moment. + + a resource - a thread does not wait for other threads directly, + instead it waits for a "resource", which is "owned" by other threads. + It waits, exactly, for all "owners" to "release" a resource. + It does not have to correspond to a physical resource. For example, it + may be convenient in certain cases to force resource == thread. + A resource is represented by a WT_RESOURCE structure. + + a resource identifier - a pair of {resource type, value}. A value is + an ulonglong number. Represented by a WT_RESOURCE_ID structure. + + a resource type - a pointer to a statically defined instance of + WT_RESOURCE_TYPE structure. This structure contains a pointer to + a function that knows how to compare values of this resource type. + In the simple case it could be wt_resource_id_memcmp(). + + a wait-for graph - a graph, that represenst "wait-for" relationships. + It has two types of nodes - threads and resources. There are directed + edges from a thread to a resource it is waiting for (WT_THD::waiting_for), + from a thread to resources that it "owns" (WT_THD::my_resources), + and from a resource to threads that "own" it (WT_RESOURCE::owners) + + Graph completeness + ^^^^^^^^^^^^^^^^^^ + + For flawless deadlock detection wait-for graph must be complete. + It means that when a thread starts waiting it needs to know *all* its + blockers, and call wt_thd_will_wait_for() for every one of them. + Otherwise two phenomena should be expected: + + 1. Fuzzy timeouts: + + thread A needs to get a lock, and is blocked by a thread B. + it waits. + Just before the timeout thread B releases the lock. + thread A is ready to grab the lock but discovers that it is also + blocked by a thread C. + It waits and times out. + + As a result thread A has waited two timeout intervals, instead of one. + + 2. Unreliable cycle detection: + + Thread A waits for threads B and C + Thread C waits for D + Thread D wants to start waiting for A + + one can see immediately that thread D creates a cycle, and thus + a deadlock is detected. + + But if thread A would only wait for B, and start waiting for C + when B would unlock, thread D would be allowed to wait, a deadlock + would be only detected when B unlocks or somebody times out. + + These two phenomena don't affect a correctness, and strictly speaking, + the caller is not required to call wt_thd_will_wait_for() for *all* + blockers - it may optimize wt_thd_will_wait_for() calls. But they + may be perceived as bugs by users, it must be understood that such + an optimization comes with its price. + + Usage + ^^^^^ + + First, the wt* subsystem must be initialized by calling + wt_init(). In the server you don't need to do it, it's done + in mysqld.cc. + + Similarly, wt_end() frees wt* structures, should be called + at the end, but in the server mysqld.cc takes care of that. + + Every WT_THD should be initialized with wt_thd_lazy_init(). + After that they can be used in other wt_thd_* calls. + Before discarding, WT_THD should be free'd with + wt_thd_destroy(). In the server both are handled in sql_class.cc, + it's an error to try to do it manually. + + To use the deadlock detection one needs to use this thread's WT_THD, + call wt_thd_will_wait_for() for every thread it needs to wait on, + then call wt_thd_cond_timedwait(). When thread releases a resource + it should call wt_thd_release() (or wt_thd_release_all()) - it will + notify (send a signal) threads waiting in wt_thd_cond_timedwait(), + if appropriate. + + Just like with pthread's cond_wait, there could be spurious + wake-ups from wt_thd_cond_timedwait(). A caller is expected to + handle that (that is, to re-check the blocking criteria). + + wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either + WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return + WT_TIMEOUT. Out of memory and other fatal errors are reported as + WT_DEADLOCK - and a transaction must be aborted just the same. + + Configuration + ^^^^^^^^^^^^^ + There are four config variables. Two deadlock search depths - short and + long - and two timeouts. Deadlock search is performed with the short + depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait() + waits with a short timeout, performs a deadlock search with the long + depth, and waits with a long timeout. As most deadlock cycles are supposed + to be short, most deadlocks will be detected at once, and waits will + rarely be necessary. + + These config variables are thread-local. Different threads may have + different search depth and timeout values. + + Also, deadlock detector supports different killing strategies, the victim + in a deadlock cycle is selected based on the "weight". See "weight" + description in waiting_threads.h for details. It's up to the caller to + set weights accordingly. + + Status + ^^^^^^ + We calculate the number of successful waits (WT_OK returned from + wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle + length distribution - number of deadlocks with every length from + 1 to WT_CYCLE_STATS, and a wait time distribution - number + of waits with a time from 1 us to 1 min in WT_WAIT_STATS + intervals on a log e scale. +*/ + +/* + Note that if your lock system satisfy the following condition: + + there exist four lock levels A, B, C, D, such as + A is compatible with B + A is not compatible with C + D is not compatible with B + + (example A=IX, B=IS, C=S, D=X) + + you need to include lock level in the resource identifier - a + thread waiting for lock of the type A on resource R and another + thread waiting for lock of the type B on resource R should wait on + different WT_RESOURCE structures, on different {lock, resource} + pairs. Otherwise the following is possible: + + thread1> take S-lock on R + thread2> take IS-lock on R + thread3> wants X-lock on R, starts waiting for threads 1 and 2 on R. + thread3 is killed (or timeout or whatever) + WT_RESOURCE structure for R is still in the hash, as it has two owners + thread4> wants an IX-lock on R + WT_RESOURCE for R is found in the hash, thread4 starts waiting on it. + !! now thread4 is waiting for both thread1 and thread2 + !! while, in fact, IX-lock and IS-lock are compatible and + !! thread4 should not wait for thread2. +*/ + +#include +#include +#include +#include "my_cpu.h" + +/* status variables */ + +/** + preset table of wait intervals +*/ +ulonglong wt_wait_table[WT_WAIT_STATS]; +/** + wait time distribution (log e scale) +*/ +uint32 wt_wait_stats[WT_WAIT_STATS+1]; +/** + distribution of cycle lengths + first column tells whether this was during short or long detection +*/ +uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1]; +uint32 wt_success_stats; + +#ifdef HAVE_PSI_INTERFACE +extern PSI_cond_key key_WT_RESOURCE_cond; +#endif + +#ifdef SAFE_STATISTICS +#define incr(VAR, LOCK) do { my_atomic_add32(&(VAR), 1); } while(0) +#else +#define incr(VAR,LOCK) do { (VAR)++; } while(0) +#endif + +static void increment_success_stats() +{ + incr(wt_success_stats, success_stats_lock); +} + +static void increment_cycle_stats(uint depth, uint slot) +{ + if (depth >= WT_CYCLE_STATS) + depth= WT_CYCLE_STATS; + incr(wt_cycle_stats[slot][depth], cycle_stats_lock); +} + +static void increment_wait_stats(ulonglong waited,int ret) +{ + uint i; + if ((ret) == ETIMEDOUT) + i= WT_WAIT_STATS; + else + for (i= 0; i < WT_WAIT_STATS && waited/10 > wt_wait_table[i]; i++) ; + incr(wt_wait_stats[i], wait_stats_lock); +} + +/* + 'lock' protects 'owners', 'state', and 'waiter_count' + 'id' is read-only + + a resource is picked up from a hash in a lock-free manner + it's returned pinned, so it cannot be freed at once + but it may be freed right after the pin is removed + to free a resource it should + 1. have no owners + 2. have no waiters + + two ways to access a resource: + 1. find it in a hash + - it's returned pinned. + a) take a lock in exclusive mode + b) check the state, it should be ACTIVE to be usable + c) unpin + 2. by a direct reference + - could only used if a resource cannot be freed + e.g. accessing a resource by thd->waiting_for is safe, + a resource cannot be freed as there's a thread waiting for it +*/ +struct st_wt_resource { + WT_RESOURCE_ID id; + uint waiter_count; + enum { ACTIVE, FREE } state; +#ifndef DBUG_OFF + mysql_mutex_t *cond_mutex; /* a mutex for the 'cond' below */ +#endif + +#ifdef WT_RWLOCKS_USE_MUTEXES + /* + we need a special rwlock-like 'lock' to allow readers bypass + waiting writers, otherwise readers can deadlock. For example: + + A waits on resource x, owned by B, B waits on resource y, owned + by A, we have a cycle (A->x->B->y->A) + Both A and B start deadlock detection: + + A locks x B locks y + A goes deeper B goes deeper + A locks y B locks x + + with mutexes it would deadlock. With rwlocks it won't, as long + as both A and B are taking read locks (and they do). + But other threads may take write locks. Assume there's + C who wants to start waiting on x, and D who wants to start + waiting on y. + + A read-locks x B read-locks y + A goes deeper B goes deeper + => C write-locks x (to add a new edge) D write-locks y + .. C is blocked D is blocked + A read-locks y B read-locks x + + Now, if a read lock can bypass a pending wrote lock request, we're fine. + If it can not, we have a deadlock. + + writer starvation is technically possible, but unlikely, because + the contention is expected to be low. + */ + struct { + pthread_cond_t cond; + pthread_mutex_t mutex; + uint readers: 16; + uint pending_writers: 15; + uint write_locked: 1; + } lock; +#else + rw_lock_t lock; +#endif + mysql_cond_t cond; /* the corresponding mutex is provided by the caller */ + DYNAMIC_ARRAY owners; +}; + +#ifdef WT_RWLOCKS_USE_MUTEXES +static void rc_rwlock_init(WT_RESOURCE *rc) +{ + pthread_cond_init(&rc->lock.cond, 0); + pthread_mutex_init(&rc->lock.mutex, MY_MUTEX_INIT_FAST); +} +static void rc_rwlock_destroy(WT_RESOURCE *rc) +{ + DBUG_ASSERT(rc->lock.write_locked == 0); + DBUG_ASSERT(rc->lock.readers == 0); + pthread_cond_destroy(&rc->lock.cond); + pthread_mutex_destroy(&rc->lock.mutex); +} +static void rc_rdlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value)); + pthread_mutex_lock(&rc->lock.mutex); + while (rc->lock.write_locked) + pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex); + rc->lock.readers++; + pthread_mutex_unlock(&rc->lock.mutex); + DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value)); +} +static void rc_wrlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value)); + pthread_mutex_lock(&rc->lock.mutex); + while (rc->lock.write_locked || rc->lock.readers) + pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex); + rc->lock.write_locked= 1; + pthread_mutex_unlock(&rc->lock.mutex); + DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value)); +} +static void rc_unlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value)); + pthread_mutex_lock(&rc->lock.mutex); + if (rc->lock.write_locked) + { + rc->lock.write_locked= 0; + pthread_cond_broadcast(&rc->lock.cond); + } + else if (--rc->lock.readers == 0) + pthread_cond_broadcast(&rc->lock.cond); + pthread_mutex_unlock(&rc->lock.mutex); +} +#else +static void rc_rwlock_init(WT_RESOURCE *rc) +{ + my_rwlock_init(&rc->lock, 0); +} +static void rc_rwlock_destroy(WT_RESOURCE *rc) +{ + rwlock_destroy(&rc->lock); +} +static void rc_rdlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value)); + rw_rdlock(&rc->lock); + DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value)); +} +static void rc_wrlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value)); + rw_wrlock(&rc->lock); + DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value)); +} +static void rc_unlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value)); + rw_unlock(&rc->lock); +} +#endif + +/* + All resources are stored in a lock-free hash. Different threads + may add new resources and perform deadlock detection concurrently. +*/ +static LF_HASH reshash; + +/** + WT_RESOURCE constructor + + It's called from lf_hash and takes a pointer to an LF_SLIST instance. + WT_RESOURCE is located at arg+sizeof(LF_SLIST) +*/ +static void wt_resource_create(uchar *arg) +{ + WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); + DBUG_ENTER("wt_resource_create"); + + bzero(rc, sizeof(*rc)); + rc_rwlock_init(rc); + mysql_cond_init(key_WT_RESOURCE_cond, &rc->cond, 0); + my_init_dynamic_array(PSI_INSTRUMENT_ME, &rc->owners, + sizeof(WT_THD *), 0, 5, MYF(0)); + DBUG_VOID_RETURN; +} + +/** + WT_RESOURCE destructor + + It's called from lf_hash and takes a pointer to an LF_SLIST instance. + WT_RESOURCE is located at arg+sizeof(LF_SLIST) +*/ +static void wt_resource_destroy(uchar *arg) +{ + WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); + DBUG_ENTER("wt_resource_destroy"); + + DBUG_ASSERT(rc->owners.elements == 0); + rc_rwlock_destroy(rc); + mysql_cond_destroy(&rc->cond); + delete_dynamic(&rc->owners); + DBUG_VOID_RETURN; +} + +/** + WT_RESOURCE initializer + + It's called from lf_hash when an element is inserted. +*/ +static void wt_resource_init(LF_HASH *hash __attribute__((unused)), + WT_RESOURCE *rc, WT_RESOURCE_ID *id) +{ + DBUG_ENTER("wt_resource_init"); + rc->id= *id; + rc->waiter_count= 0; + rc->state= ACTIVE; +#ifndef DBUG_OFF + rc->cond_mutex= 0; +#endif + DBUG_VOID_RETURN; +} + +static int wt_init_done; + +void wt_init() +{ + DBUG_ENTER("wt_init"); + DBUG_ASSERT(reshash.alloc.constructor != wt_resource_create); + + lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0, + sizeof_WT_RESOURCE_ID, 0, 0); + reshash.alloc.constructor= wt_resource_create; + reshash.alloc.destructor= wt_resource_destroy; + reshash.initializer= (lf_hash_initializer) wt_resource_init; + + bzero(wt_wait_stats, sizeof(wt_wait_stats)); + bzero(wt_cycle_stats, sizeof(wt_cycle_stats)); + wt_success_stats= 0; + { /* initialize wt_wait_table[]. from 1 us to 1 min, log e scale */ + int i; + double from= log(1); /* 1 us */ + double to= log(60e6); /* 1 min */ + for (i= 0; i < WT_WAIT_STATS; i++) + { + wt_wait_table[i]= (ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from); + DBUG_ASSERT(i == 0 || wt_wait_table[i-1] != wt_wait_table[i]); + } + } + wt_init_done= 1; + DBUG_VOID_RETURN; +} + +void wt_end() +{ + DBUG_ENTER("wt_end"); + if (!wt_init_done) + DBUG_VOID_RETURN; + + DBUG_ASSERT(reshash.count == 0); + lf_hash_destroy(&reshash); + reshash.alloc.constructor= NULL; + wt_init_done= 0; + DBUG_VOID_RETURN; +} + +/** + Lazy WT_THD initialization + + Cheap initialization of WT_THD. Only initialize fields that don't require + memory allocations - basically, it only does assignments. The rest of the + WT_THD structure will be initialized on demand, on the first use. + This allows one to initialize lazily all WT_THD structures, even if some + (or even most) of them will never be used for deadlock detection. + + @param ds a pointer to deadlock search depth short value + @param ts a pointer to deadlock timeout short value (microseconds) + @param dl a pointer to deadlock search depth long value + @param tl a pointer to deadlock timeout long value (microseconds) + + @note these are pointers to values, and WT_THD stores them as pointers. + It allows one later to change search depths and timeouts for existing + threads. It also means that the pointers must stay valid for the lifetime + of WT_THD. +*/ +void wt_thd_lazy_init(WT_THD *thd, const ulong *ds, const ulong *ts, + const ulong *dl, const ulong *tl) +{ + DBUG_ENTER("wt_thd_lazy_init"); + thd->waiting_for= 0; + thd->weight= 0; + thd->deadlock_search_depth_short= ds; + thd->timeout_short= ts; + thd->deadlock_search_depth_long= dl; + thd->timeout_long= tl; + /* dynamic array is also initialized lazily - without memory allocations */ + my_init_dynamic_array(PSI_INSTRUMENT_ME, &thd->my_resources, + sizeof(WT_RESOURCE *), 0, 5, MYF(0)); +#ifndef DBUG_OFF + thd->name= my_thread_name(); +#endif + DBUG_VOID_RETURN; +} + +/** + Finalize WT_THD initialization + + After lazy WT_THD initialization, parts of the structure are still + uninitialized. This function completes the initialization, allocating + memory, if necessary. It's called automatically on demand, when WT_THD + is about to be used. +*/ +static int fix_thd_pins(WT_THD *thd) +{ + if (unlikely(thd->pins == 0)) + { + thd->pins= lf_hash_get_pins(&reshash); +#ifndef DBUG_OFF + thd->name= my_thread_name(); +#endif + } + return thd->pins == 0; +} + +void wt_thd_destroy(WT_THD *thd) +{ + DBUG_ENTER("wt_thd_destroy"); + + DBUG_ASSERT(thd->my_resources.elements == 0); + DBUG_ASSERT(thd->waiting_for == 0); + + if (thd->pins != 0) + lf_hash_put_pins(thd->pins); + + delete_dynamic(&thd->my_resources); + DBUG_VOID_RETURN; +} +/** + Trivial resource id comparison function - bytewise memcmp. + + It can be used in WT_RESOURCE_TYPE structures where bytewise + comparison of values is sufficient. +*/ +my_bool wt_resource_id_memcmp(const void *a, const void *b) +{ + /* we use the fact that there's no padding in the middle of WT_RESOURCE_ID */ + compile_time_assert(offsetof(WT_RESOURCE_ID, type) == sizeof(ulonglong)); + return MY_TEST(memcmp(a, b, sizeof_WT_RESOURCE_ID)); +} + +/** + arguments for the recursive deadlock_search function +*/ +struct deadlock_arg { + WT_THD * const thd; /**< starting point of a search */ + uint const max_depth; /**< search depth limit */ + WT_THD *victim; /**< a thread to be killed to resolve a deadlock */ + WT_RESOURCE *last_locked_rc; /**< see comment at the end of deadlock_search() */ +}; + +/** + helper function to change the victim, according to the weight +*/ +static void change_victim(WT_THD* found, struct deadlock_arg *arg) +{ + if (found->weight < arg->victim->weight) + { + if (arg->victim != arg->thd) + { + rc_unlock(arg->victim->waiting_for); /* release the previous victim */ + DBUG_ASSERT(arg->last_locked_rc == found->waiting_for); + } + arg->victim= found; + arg->last_locked_rc= 0; + } +} + +/** + recursive loop detection in a wait-for graph with a limited search depth +*/ +static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker, + uint depth) +{ + WT_RESOURCE *rc, *volatile *shared_ptr= &blocker->waiting_for; + WT_THD *cursor; + size_t i; + int ret= WT_OK; + DBUG_ENTER("deadlock_search"); + DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, depth=%u", + arg->thd->name, blocker->name, depth)); + + arg->last_locked_rc= 0; + + if (depth > arg->max_depth) + { + DBUG_PRINT("wt", ("exit: WT_DEPTH_EXCEEDED (early)")); + DBUG_RETURN(WT_DEPTH_EXCEEDED); + } + +retry: + /* + safe dereference as explained in lf_alloc-pin.c + (in short: protects against lf_alloc_free() in lf_hash_delete()) + */ + do + { + rc= *shared_ptr; + lf_pin(arg->thd->pins, 0, rc); + } while (rc != *shared_ptr && LF_BACKOFF()); + + if (rc == 0) + { + DBUG_PRINT("wt", ("exit: OK (early)")); + DBUG_RETURN(0); + } + + rc_rdlock(rc); + if (rc->state != ACTIVE || *shared_ptr != rc) + { + /* blocker is not waiting on this resource anymore */ + rc_unlock(rc); + lf_unpin(arg->thd->pins, 0); + goto retry; + } + /* as the state is locked, we can unpin now */ + lf_unpin(arg->thd->pins, 0); + + /* + Below is not a pure depth-first search. It's a depth-first with a + slightest hint of breadth-first. Depth-first is: + + check(element, X): + foreach current in element->nodes[] do: + if current == X return error; + check(current, X); + + while we do + + check(element, X): + foreach current in element->nodes[] do: + if current == X return error; + foreach current in element->nodes[] do: + check(current, X); + + preferring shorter deadlocks over longer ones. + */ + for (i= 0; i < rc->owners.elements; i++) + { + cursor= *dynamic_element(&rc->owners, i, WT_THD**); + /* + We're only looking for (and detecting) cycles that include 'arg->thd'. + That is, only deadlocks that *we* have created. For example, + thd->A->B->thd + (thd waits for A, A waits for B, while B is waiting for thd). + While walking the graph we can encounter other cicles, e.g. + thd->A->B->C->A + This will not be detected. Instead we will walk it in circles until + the search depth limit is reached (the latter guarantees that an + infinite loop is impossible). We expect the thread that has created + the cycle (one of A, B, and C) to detect its deadlock. + */ + if (cursor == arg->thd) + { + ret= WT_DEADLOCK; + increment_cycle_stats(depth, arg->max_depth == + *arg->thd->deadlock_search_depth_long); + arg->victim= cursor; + goto end; + } + } + for (i= 0; i < rc->owners.elements; i++) + { + cursor= *dynamic_element(&rc->owners, i, WT_THD**); + switch (deadlock_search(arg, cursor, depth+1)) { + case WT_OK: + break; + case WT_DEPTH_EXCEEDED: + ret= WT_DEPTH_EXCEEDED; + break; + case WT_DEADLOCK: + ret= WT_DEADLOCK; + change_victim(cursor, arg); /* also sets arg->last_locked_rc to 0 */ + i= rc->owners.elements; /* jump out of the loop */ + break; + default: + DBUG_ASSERT(0); + } + if (arg->last_locked_rc) + rc_unlock(arg->last_locked_rc); + } +end: + /* + Note that 'rc' is locked in this function, but it's never unlocked here. + Instead it's saved in arg->last_locked_rc and the *caller* is + expected to unlock it. It's done to support different killing + strategies. This is how it works: + Assuming a graph + + thd->A->B->C->thd + + deadlock_search() function starts from thd, locks it (in fact it locks not + a thd, but a resource it is waiting on, but below, for simplicity, I'll + talk about "locking a thd"). Then it goes down recursively, locks A, and so + on. Goes down recursively, locks B. Goes down recursively, locks C. + Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd. + Returns from the last deadlock_search() call. C stays locked! + Now it checks whether C is a more appropriate victim than 'thd'. + If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked. + Now it checks whether B is a more appropriate victim than arg->victim. + If yes - old arg->victim is unlocked and arg->victim=B, + otherwise B is unlocked. Return. + And so on. + + In short, a resource is locked in a frame. But it's not unlocked in the + same frame, it's unlocked by the caller, and only after the caller checks + that it doesn't need to use current WT_THD as a victim. If it does - the + lock is kept and the old victim's resource is unlocked. When the recursion + is unrolled and we are back to deadlock() function, there are only two + locks left - on thd and on the victim. + */ + arg->last_locked_rc= rc; + DBUG_PRINT("wt", ("exit: %s", + ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" : + ret ? "WT_DEADLOCK" : "OK")); + DBUG_RETURN(ret); +} + +/** + Deadlock detection in a wait-for graph + + A wrapper for recursive deadlock_search() - prepares deadlock_arg structure, + invokes deadlock_search(), increments statistics, notifies the victim. + + @param thd thread that is going to wait. Deadlock is detected + if, while walking the graph, we reach a thread that + is waiting on thd + @param blocker starting point of a search. In wt_thd_cond_timedwait() + it's thd, in wt_thd_will_wait_for() it's a thread that + thd is going to wait for + @param depth starting search depth. In general it's the number of + edges in the wait-for graph between thd and the + blocker. Practically only two values are used (and + supported) - when thd == blocker it's 0, when thd + waits directly for blocker, it's 1 + @param max_depth search depth limit +*/ +static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, + uint max_depth) +{ + struct deadlock_arg arg= {thd, max_depth, 0, 0}; + int ret; + DBUG_ENTER("deadlock"); + DBUG_ASSERT(depth < 2); + ret= deadlock_search(&arg, blocker, depth); + if (ret == WT_DEPTH_EXCEEDED) + { + increment_cycle_stats(WT_CYCLE_STATS, max_depth == + *thd->deadlock_search_depth_long); + ret= WT_OK; + } + /* + if we started with depth==1, blocker was never considered for a victim + in deadlock_search(). Do it here. + */ + if (ret == WT_DEADLOCK && depth) + change_victim(blocker, &arg); + if (arg.last_locked_rc) + { + /* + Special return code if there's nobody to wait for. + + depth == 0 means that we start the search from thd (thd == blocker). + ret == WT_OK means that no cycle was found and + arg.last_locked_rc == thd->waiting_for. + and arg.last_locked_rc->owners.elements == 0 means that + (applying the rule above) thd->waiting_for->owners.elements == 0, + and thd doesn't have anybody to wait for. + */ + if (depth == 0 && ret == WT_OK && arg.last_locked_rc->owners.elements == 0) + { + DBUG_ASSERT(thd == blocker); + DBUG_ASSERT(arg.last_locked_rc == thd->waiting_for); + ret= WT_FREE_TO_GO; + } + rc_unlock(arg.last_locked_rc); + } + /* notify the victim, if appropriate */ + if (ret == WT_DEADLOCK && arg.victim != thd) + { + DBUG_PRINT("wt", ("killing %s", arg.victim->name)); + arg.victim->killed= 1; + mysql_cond_broadcast(&arg.victim->waiting_for->cond); + rc_unlock(arg.victim->waiting_for); + ret= WT_OK; + } + DBUG_RETURN(ret); +} + + +/** + Delete an element from reshash if it has no waiters or owners + + rc->lock must be locked by the caller and it's unlocked on return. +*/ +static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) +{ + uint keylen; + const void *key; + DBUG_ENTER("unlock_lock_and_free_resource"); + + DBUG_ASSERT(rc->state == ACTIVE); + + if (rc->owners.elements || rc->waiter_count) + { + DBUG_PRINT("wt", ("nothing to do, %u owners, %u waiters", + rc->owners.elements, rc->waiter_count)); + rc_unlock(rc); + DBUG_RETURN(0); + } + + if (fix_thd_pins(thd)) + { + rc_unlock(rc); + DBUG_RETURN(1); + } + + /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */ + { + key= &rc->id; + keylen= sizeof_WT_RESOURCE_ID; + } + + /* + To free the element correctly we need to: + 1. take its lock (already done). + 2. set the state to FREE + 3. release the lock + 4. remove from the hash + */ + rc->state= FREE; + rc_unlock(rc); + DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1); +} + + +/** + register the fact that thd is not waiting anymore + + decrease waiter_count, clear waiting_for, free the resource if appropriate. + thd->waiting_for must be locked! +*/ +static int stop_waiting_locked(WT_THD *thd) +{ + int ret; + WT_RESOURCE *rc= thd->waiting_for; + DBUG_ENTER("stop_waiting_locked"); + + DBUG_ASSERT(rc->waiter_count); + DBUG_ASSERT(rc->state == ACTIVE); + rc->waiter_count--; + thd->waiting_for= 0; + ret= unlock_lock_and_free_resource(thd, rc); + DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK); +} + +/** + register the fact that thd is not waiting anymore + + locks thd->waiting_for and calls stop_waiting_locked(). +*/ +static int stop_waiting(WT_THD *thd) +{ + int ret; + WT_RESOURCE *rc= thd->waiting_for; + DBUG_ENTER("stop_waiting"); + + if (!rc) + DBUG_RETURN(WT_OK); + /* + nobody's trying to free the resource now, + as its waiter_count is guaranteed to be non-zero + */ + rc_wrlock(rc); + ret= stop_waiting_locked(thd); + DBUG_RETURN(ret); +} + +/** + notify the system that a thread needs to wait for another thread + + called by a *waiter* to declare that it (thd) will wait for another + thread (blocker) on a specific resource (resid). + can be called many times, if many blockers own a blocking resource. + but must always be called with the same resource id - a thread cannot + wait for more than one resource at a time. + + @return WT_OK or WT_DEADLOCK + + As a new edge is added to the wait-for graph, a deadlock detection is + performed for this new edge. +*/ +int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, + const WT_RESOURCE_ID *resid) +{ + uint i; + WT_RESOURCE *rc; + DBUG_ENTER("wt_thd_will_wait_for"); + + DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%lu", + thd->name, blocker->name, (ulong)resid->value)); + + if (fix_thd_pins(thd)) + DBUG_RETURN(WT_DEADLOCK); + + if (thd->waiting_for == 0) + { + uint keylen; + const void *key; + /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else */ + { + key= resid; + keylen= sizeof_WT_RESOURCE_ID; + } + + DBUG_PRINT("wt", ("first blocker")); + +retry: + while ((rc= lf_hash_search(&reshash, thd->pins, key, keylen)) == 0) + { + DBUG_PRINT("wt", ("failed to find rc in hash, inserting")); + + if (lf_hash_insert(&reshash, thd->pins, resid) == -1) /* if OOM */ + DBUG_RETURN(WT_DEADLOCK); + /* + Two cases: either lf_hash_insert() failed - because another thread + has just inserted a resource with the same id - and we need to retry. + Or lf_hash_insert() succeeded, and then we need to repeat + lf_hash_search() to find a real address of the newly inserted element. + That is, we don't care what lf_hash_insert() has returned. + And we need to repeat the loop anyway. + */ + } + if (rc == MY_ERRPTR) + DBUG_RETURN(WT_DEADLOCK); + + DBUG_PRINT("wt", ("found in hash rc=%p", rc)); + + rc_wrlock(rc); + if (rc->state != ACTIVE) + { + DBUG_PRINT("wt", ("but it's not active, retrying")); + /* Somebody has freed the element while we weren't looking */ + rc_unlock(rc); + lf_hash_search_unpin(thd->pins); + goto retry; + } + + lf_hash_search_unpin(thd->pins); /* the element cannot go away anymore */ + thd->waiting_for= rc; + rc->waiter_count++; + thd->killed= 0; + } + else + { + DBUG_ASSERT(thd->waiting_for->id.type == resid->type); + DBUG_ASSERT(resid->type->compare(&thd->waiting_for->id, resid) == 0); + DBUG_PRINT("wt", ("adding another blocker")); + + /* + we can safely access the resource here, it's in the hash as it has + non-zero waiter_count + */ + rc= thd->waiting_for; + rc_wrlock(rc); + DBUG_ASSERT(rc->waiter_count); + DBUG_ASSERT(rc->state == ACTIVE); + + if (thd->killed) + { + stop_waiting_locked(thd); + DBUG_RETURN(WT_DEADLOCK); + } + } + /* + Another thread could be waiting on this resource for this very 'blocker'. + In this case we should not add it to the list for the second time. + */ + for (i= 0; i < rc->owners.elements; i++) + if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker) + break; + if (i >= rc->owners.elements) + { + if (push_dynamic(&blocker->my_resources, (void*)&rc)) + { + stop_waiting_locked(thd); + DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */ + } + if (push_dynamic(&rc->owners, (void*)&blocker)) + { + pop_dynamic(&blocker->my_resources); + stop_waiting_locked(thd); + DBUG_RETURN(WT_DEADLOCK); + } + } + rc_unlock(rc); + + if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short) != WT_OK) + { + stop_waiting(thd); + DBUG_RETURN(WT_DEADLOCK); + } + DBUG_RETURN(WT_OK); +} + +/** + called by a *waiter* (thd) to start waiting + + It's supposed to be a drop-in replacement for + mysql_cond_timedwait(), and it takes mutex as an argument. + + @return one of WT_TIMEOUT, WT_DEADLOCK, WT_OK +*/ +int wt_thd_cond_timedwait(WT_THD *thd, mysql_mutex_t *mutex) +{ + int ret= WT_TIMEOUT; + struct timespec timeout; + my_hrtime_t before, after, starttime; + WT_RESOURCE *rc= thd->waiting_for; + ulonglong end_wait_time; + DBUG_ENTER("wt_thd_cond_timedwait"); + DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc)); + +#ifndef DBUG_OFF + if (rc->cond_mutex) + DBUG_ASSERT(rc->cond_mutex == mutex); + else + rc->cond_mutex= mutex; + mysql_mutex_assert_owner(mutex); +#endif + + before= starttime= my_hrtime(); + + rc_wrlock(rc); + if (rc->owners.elements == 0) + ret= WT_OK; + rc_unlock(rc); + + end_wait_time= starttime.val *1000 + (*thd->timeout_short)*1000000ULL; + set_timespec_time_nsec(timeout, end_wait_time); + if (ret == WT_TIMEOUT && !thd->killed) + ret= mysql_cond_timedwait(&rc->cond, mutex, &timeout); + if (ret == WT_TIMEOUT && !thd->killed) + { + int r= deadlock(thd, thd, 0, *thd->deadlock_search_depth_long); + if (r == WT_FREE_TO_GO) + ret= WT_OK; + else if (r != WT_OK) + ret= WT_DEADLOCK; + else if (*thd->timeout_long > *thd->timeout_short) + { + end_wait_time= starttime.val *1000 + (*thd->timeout_long)*1000000ULL; + set_timespec_time_nsec(timeout, end_wait_time); + if (!thd->killed) + ret= mysql_cond_timedwait(&rc->cond, mutex, &timeout); + } + } + after= my_hrtime(); + if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */ + ret= WT_DEADLOCK; + increment_wait_stats(after.val-before.val, ret); + if (ret == WT_OK) + increment_success_stats(); + DBUG_RETURN(ret); +} + +/** + called by a *blocker* when it releases a resource + + it's conceptually similar to pthread_cond_broadcast, and must be done + under the same mutex as wt_thd_cond_timedwait(). + + @param resid a resource to release. 0 to release all resources +*/ + +void wt_thd_release(WT_THD *thd, const WT_RESOURCE_ID *resid) +{ + uint i; + DBUG_ENTER("wt_thd_release"); + + for (i= 0; i < thd->my_resources.elements; i++) + { + WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**); + if (!resid || (resid->type->compare(&rc->id, resid) == 0)) + { + uint j; + + rc_wrlock(rc); + /* + nobody's trying to free the resource now, + as its owners[] array is not empty (at least thd must be there) + */ + DBUG_ASSERT(rc->state == ACTIVE); + for (j= 0; j < rc->owners.elements; j++) + if (*dynamic_element(&rc->owners, j, WT_THD**) == thd) + break; + DBUG_ASSERT(j < rc->owners.elements); + delete_dynamic_element(&rc->owners, j); + if (rc->owners.elements == 0) + { + mysql_cond_broadcast(&rc->cond); +#ifndef DBUG_OFF + if (rc->cond_mutex) + mysql_mutex_assert_owner(rc->cond_mutex); +#endif + } + unlock_lock_and_free_resource(thd, rc); + if (resid) + { + delete_dynamic_element(&thd->my_resources, i); + DBUG_VOID_RETURN; + } + } + } + if (!resid) + reset_dynamic(&thd->my_resources); + DBUG_VOID_RETURN; +} + diff --git a/mysys/wqueue.c b/mysys/wqueue.c new file mode 100644 index 00000000..84fac01f --- /dev/null +++ b/mysys/wqueue.c @@ -0,0 +1,242 @@ +/* + Copyright (c) 2007, 2008, Sun Microsystems, Inc, + Copyright (c) 2011, 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + wqueue_link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + thread->prev= last->next->prev; + last->next->prev= &thread->next; + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + wqueue_add_to_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. +*/ + +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } +#ifndef DBUG_OFF + thread->prev= NULL; /* force segfault if used */ +#endif + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + wqueue_unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + if (thread->next == thread) + /* The queue contains only one member */ + wqueue->last_thread= NULL; + else + { + thread->next->prev= thread->prev; + *thread->prev= thread->next; + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + wqueue_realease_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + See notes for add_to_queue + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +void wqueue_release_queue(WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + do + { + thread= next; + mysql_cond_signal(&thread->suspend); + next= thread->next; + thread->next= NULL; + } + while (thread != last); + wqueue->last_thread= NULL; +} + + +/** + @brief Removes all threads waiting for read or first one waiting for write. + + @param wqueue pointer to the queue structure + @param thread pointer to the thread to be added to the queue + + @note This function is applicable only to single linked lists. +*/ + +void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + struct st_my_thread_var *new_list= NULL; + uint first_type= next->lock_type; + if (first_type == MY_PTHREAD_LOCK_WRITE) + { + /* release first waiting for write lock */ + mysql_cond_signal(&next->suspend); + if (next == last) + wqueue->last_thread= NULL; + else + last->next= next->next; + next->next= NULL; + return; + } + do + { + thread= next; + next= thread->next; + if (thread->lock_type == MY_PTHREAD_LOCK_WRITE) + { + /* skip waiting for write lock */ + if (new_list) + { + thread->next= new_list->next; + new_list= new_list->next= thread; + } + else + new_list= thread->next= thread; + } + else + { + /* release waiting for read lock */ + mysql_cond_signal(&thread->suspend); + thread->next= NULL; + } + } while (thread != last); + wqueue->last_thread= new_list; +} + + +/* + Add thread and wait + + SYNOPSIS + wqueue_add_and_wait() + wqueue queue to add to + thread thread which is waiting + lock mutex need for the operation +*/ + +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, + mysql_mutex_t *lock) +{ + DBUG_ENTER("wqueue_add_and_wait"); + DBUG_PRINT("enter", + ("thread: %p cond: %p mutex: %p", + thread, &thread->suspend, lock)); + wqueue_add_to_queue(wqueue, thread); + do + { + DBUG_PRINT("info", ("wait... cond: %p mutex: %p", + &thread->suspend, lock)); + mysql_cond_wait(&thread->suspend, lock); + DBUG_PRINT("info", ("wait done cond: %p mutex: %p next: %p", + &thread->suspend, lock, + thread->next)); + } + while (thread->next); + DBUG_VOID_RETURN; +} -- cgit v1.2.3