summaryrefslogtreecommitdiffstats
path: root/storage/innobase/sync
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
commita175314c3e5827eb193872241446f2f8f5c9d33c (patch)
treecd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/innobase/sync
parentInitial commit. (diff)
downloadmariadb-10.5-upstream/1%10.5.12.tar.xz
mariadb-10.5-upstream/1%10.5.12.zip
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/sync')
-rw-r--r--storage/innobase/sync/sync0arr.cc1296
-rw-r--r--storage/innobase/sync/sync0debug.cc1423
-rw-r--r--storage/innobase/sync/sync0rw.cc1216
-rw-r--r--storage/innobase/sync/sync0sync.cc246
4 files changed, 4181 insertions, 0 deletions
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
new file mode 100644
index 00000000..5f39325d
--- /dev/null
+++ b/storage/innobase/sync/sync0arr.cc
@@ -0,0 +1,1296 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+Copyright (c) 2013, 2020, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0arr.cc
+The wait array used in synchronization primitives
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0arr.h"
+#include <mysqld_error.h>
+#include <mysql/plugin.h>
+#include <hash.h>
+#include <myisampack.h>
+#include <sql_acl.h>
+#include <mysys_err.h>
+#include <my_sys.h>
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "i_s.h"
+#include <sql_plugin.h>
+#include <innodb_priv.h>
+
+#include "lock0lock.h"
+#include "sync0rw.h"
+
+/*
+ WAIT ARRAY
+ ==========
+
+The wait array consists of cells each of which has an an event object created
+for it. The threads waiting for a mutex, for example, can reserve a cell
+in the array and suspend themselves to wait for the event to become signaled.
+When using the wait array, remember to make sure that some thread holding
+the synchronization object will eventually know that there is a waiter in
+the array and signal the object, to prevent infinite wait. Why we chose
+to implement a wait array? First, to make mutexes fast, we had to code
+our own implementation of them, which only in usually uncommon cases
+resorts to using slow operating system primitives. Then we had the choice of
+assigning a unique OS event for each mutex, which would be simpler, or
+using a global wait array. In some operating systems, the global wait
+array solution is more efficient and flexible, because we can do with
+a very small number of OS events, say 200. In NT 3.51, allocating events
+seems to be a quadratic algorithm, because 10 000 events are created fast,
+but 100 000 events takes a couple of minutes to create.
+
+As of 5.0.30 the above mentioned design is changed. Since now OS can handle
+millions of wait events efficiently, we no longer have this concept of each
+cell of wait array having one event. Instead, now the event that a thread
+wants to wait on is embedded in the wait object (mutex or rw_lock). We still
+keep the global wait array for the sake of diagnostics and also to avoid
+infinite wait The error_monitor thread scans the global wait array to signal
+any waiting threads who have missed the signal. */
+
+typedef TTASEventMutex<GenericPolicy> WaitMutex;
+
+/** The latch types that use the sync array. */
+union sync_object_t {
+
+ /** RW lock instance */
+ rw_lock_t* lock;
+
+ /** Mutex instance */
+ WaitMutex* mutex;
+};
+
+/** A cell where an individual thread may wait suspended until a resource
+is released. The suspending is implemented using an operating system
+event semaphore. */
+
+struct sync_cell_t {
+ sync_object_t latch; /*!< pointer to the object the
+ thread is waiting for; if NULL
+ the cell is free for use */
+ ulint request_type; /*!< lock type requested on the
+ object */
+ const char* file; /*!< in debug version file where
+ requested */
+ ulint line; /*!< in debug version line where
+ requested, or ULINT_UNDEFINED */
+ os_thread_id_t thread_id; /*!< thread id of this waiting
+ thread */
+ bool waiting; /*!< TRUE if the thread has already
+ called sync_array_event_wait
+ on this cell */
+ int64_t signal_count; /*!< We capture the signal_count
+ of the latch when we
+ reset the event. This value is
+ then passed on to os_event_wait
+ and we wait only if the event
+ has not been signalled in the
+ period between the reset and
+ wait call. */
+ /** time(NULL) when the wait cell was reserved.
+ FIXME: sync_array_print_long_waits_low() may display bogus
+ warnings when the system time is adjusted to the past! */
+ time_t reservation_time;
+};
+
+/* NOTE: It is allowed for a thread to wait for an event allocated for
+the array without owning the protecting mutex (depending on the case:
+OS or database mutex), but all changes (set or reset) to the state of
+the event must be made while owning the mutex. */
+
+/** Synchronization array */
+struct sync_array_t {
+
+ /** Constructor
+ Creates a synchronization wait array. It is protected by a mutex
+ which is automatically reserved when the functions operating on it
+ are called.
+ @param[in] num_cells Number of cells to create */
+ sync_array_t(ulint num_cells)
+ UNIV_NOTHROW;
+
+ /** Destructor */
+ ~sync_array_t()
+ UNIV_NOTHROW;
+
+ ulint n_reserved; /*!< number of currently reserved
+ cells in the wait array */
+ ulint n_cells; /*!< number of cells in the
+ wait array */
+ sync_cell_t* array; /*!< pointer to wait array */
+ SysMutex mutex; /*!< System mutex protecting the
+ data structure. As this data
+ structure is used in constructing
+ the database mutex, to prevent
+ infinite recursion in implementation,
+ we fall back to an OS mutex. */
+ ulint res_count; /*!< count of cell reservations
+ since creation of the array */
+ ulint next_free_slot; /*!< the next free cell in the array */
+ ulint first_free_slot;/*!< the last slot that was freed */
+};
+
+/** User configured sync array size */
+ulong srv_sync_array_size = 1;
+
+/** Locally stored copy of srv_sync_array_size */
+ulint sync_array_size;
+
+/** The global array of wait cells for implementation of the database's own
+mutexes and read-write locks */
+sync_array_t** sync_wait_array;
+
+/** count of how many times an object has been signalled */
+ulint sg_count;
+
+#define sync_array_exit(a) mutex_exit(&(a)->mutex)
+#define sync_array_enter(a) mutex_enter(&(a)->mutex)
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+This function is called only in the debug version. Detects a deadlock
+of one or more threads because of waits of semaphores.
+@return TRUE if deadlock detected */
+static
+bool
+sync_array_detect_deadlock(
+/*=======================*/
+ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /*!< in: cell where recursive search started */
+ sync_cell_t* cell, /*!< in: cell to search */
+ ulint depth); /*!< in: recursion depth */
+#endif /* UNIV_DEBUG */
+
+/** Constructor
+Creates a synchronization wait array. It is protected by a mutex
+which is automatically reserved when the functions operating on it
+are called.
+@param[in] num_cells Number of cells to create */
+sync_array_t::sync_array_t(ulint num_cells)
+ UNIV_NOTHROW
+ :
+ n_reserved(),
+ n_cells(num_cells),
+ array(UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells)),
+ mutex(),
+ res_count(),
+ next_free_slot(),
+ first_free_slot(ULINT_UNDEFINED)
+{
+ ut_a(num_cells > 0);
+
+ memset(array, 0x0, sizeof(sync_cell_t) * n_cells);
+
+ /* Then create the mutex to protect the wait array */
+ mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex);
+}
+
+/** Validate the integrity of the wait array. Check
+that the number of reserved cells equals the count variable.
+@param[in,out] arr sync wait array */
+static
+void
+sync_array_validate(sync_array_t* arr)
+{
+ ulint i;
+ ulint count = 0;
+
+ sync_array_enter(arr);
+
+ for (i = 0; i < arr->n_cells; i++) {
+ sync_cell_t* cell;
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->latch.mutex != NULL) {
+ count++;
+ }
+ }
+
+ ut_a(count == arr->n_reserved);
+
+ sync_array_exit(arr);
+}
+
+/** Destructor */
+sync_array_t::~sync_array_t()
+ UNIV_NOTHROW
+{
+ ut_a(n_reserved == 0);
+
+ sync_array_validate(this);
+
+ /* Release the mutex protecting the wait array */
+
+ mutex_free(&mutex);
+
+ UT_DELETE_ARRAY(array);
+}
+
+/*****************************************************************//**
+Gets the nth cell in array.
+@return cell */
+UNIV_INTERN
+sync_cell_t*
+sync_array_get_nth_cell(
+/*====================*/
+ sync_array_t* arr, /*!< in: sync array */
+ ulint n) /*!< in: index */
+{
+ ut_a(n < arr->n_cells);
+
+ return(arr->array + n);
+}
+
+/******************************************************************//**
+Frees the resources in a wait array. */
+static
+void
+sync_array_free(
+/*============*/
+ sync_array_t* arr) /*!< in, own: sync wait array */
+{
+ UT_DELETE(arr);
+}
+
+/*******************************************************************//**
+Returns the event that the thread owning the cell waits for. */
+static
+os_event_t
+sync_cell_get_event(
+/*================*/
+ sync_cell_t* cell) /*!< in: non-empty sync array cell */
+{
+ switch(cell->request_type) {
+ case SYNC_MUTEX:
+ return(cell->latch.mutex->event());
+ case RW_LOCK_X_WAIT:
+ return(cell->latch.lock->wait_ex_event);
+ default:
+ return(cell->latch.lock->event);
+ }
+}
+
+/******************************************************************//**
+Reserves a wait array cell for waiting for an object.
+The event of the cell is reset to nonsignalled state.
+@return sync cell to wait on */
+sync_cell_t*
+sync_array_reserve_cell(
+/*====================*/
+ sync_array_t* arr, /*!< in: wait array */
+ void* object, /*!< in: pointer to the object to wait for */
+ ulint type, /*!< in: lock request type */
+ const char* file, /*!< in: file where requested */
+ unsigned line) /*!< in: line where requested */
+{
+ sync_cell_t* cell;
+
+ sync_array_enter(arr);
+
+ if (arr->first_free_slot != ULINT_UNDEFINED) {
+ /* Try and find a slot in the free list */
+ ut_ad(arr->first_free_slot < arr->next_free_slot);
+ cell = sync_array_get_nth_cell(arr, arr->first_free_slot);
+ arr->first_free_slot = cell->line;
+ } else if (arr->next_free_slot < arr->n_cells) {
+ /* Try and find a slot after the currently allocated slots */
+ cell = sync_array_get_nth_cell(arr, arr->next_free_slot);
+ ++arr->next_free_slot;
+ } else {
+ sync_array_exit(arr);
+
+ // We should return NULL and if there is more than
+ // one sync array, try another sync array instance.
+ return(NULL);
+ }
+
+ ++arr->res_count;
+
+ ut_ad(arr->n_reserved < arr->n_cells);
+ ut_ad(arr->next_free_slot <= arr->n_cells);
+
+ ++arr->n_reserved;
+
+ /* Reserve the cell. */
+ ut_ad(cell->latch.mutex == NULL);
+
+ cell->request_type = type;
+
+ if (cell->request_type == SYNC_MUTEX) {
+ cell->latch.mutex = reinterpret_cast<WaitMutex*>(object);
+ } else {
+ cell->latch.lock = reinterpret_cast<rw_lock_t*>(object);
+ }
+
+ cell->waiting = false;
+
+ cell->file = file;
+ cell->line = line;
+
+ sync_array_exit(arr);
+
+ cell->thread_id = os_thread_get_curr_id();
+
+ cell->reservation_time = time(NULL);
+
+ /* Make sure the event is reset and also store the value of
+ signal_count at which the event was reset. */
+ os_event_t event = sync_cell_get_event(cell);
+ cell->signal_count = os_event_reset(event);
+
+ return(cell);
+}
+
+/******************************************************************//**
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+void
+sync_array_free_cell(
+/*=================*/
+ sync_array_t* arr, /*!< in: wait array */
+ sync_cell_t*& cell) /*!< in/out: the cell in the array */
+{
+ sync_array_enter(arr);
+
+ ut_a(cell->latch.mutex != NULL);
+
+ cell->waiting = false;
+ cell->signal_count = 0;
+ cell->latch.mutex = NULL;
+
+ /* Setup the list of free slots in the array */
+ cell->line = arr->first_free_slot;
+
+ arr->first_free_slot = cell - arr->array;
+
+ ut_a(arr->n_reserved > 0);
+ arr->n_reserved--;
+
+ if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) {
+#ifdef UNIV_DEBUG
+ for (ulint i = 0; i < arr->next_free_slot; ++i) {
+ cell = sync_array_get_nth_cell(arr, i);
+
+ ut_ad(!cell->waiting);
+ ut_ad(cell->latch.mutex == 0);
+ ut_ad(cell->signal_count == 0);
+ }
+#endif /* UNIV_DEBUG */
+ arr->next_free_slot = 0;
+ arr->first_free_slot = ULINT_UNDEFINED;
+ }
+ sync_array_exit(arr);
+
+ cell = 0;
+}
+
+/******************************************************************//**
+This function should be called when a thread starts to wait on
+a wait array cell. In the debug version this function checks
+if the wait for a semaphore will result in a deadlock, in which
+case prints info and asserts. */
+void
+sync_array_wait_event(
+/*==================*/
+ sync_array_t* arr, /*!< in: wait array */
+ sync_cell_t*& cell) /*!< in: index of the reserved cell */
+{
+ sync_array_enter(arr);
+
+ ut_ad(!cell->waiting);
+ ut_ad(cell->latch.mutex);
+ ut_ad(os_thread_get_curr_id() == cell->thread_id);
+
+ cell->waiting = true;
+
+#ifdef UNIV_DEBUG
+
+ /* We use simple enter to the mutex below, because if
+ we cannot acquire it at once, mutex_enter would call
+ recursively sync_array routines, leading to trouble.
+ rw_lock_debug_mutex freezes the debug lists. */
+
+ rw_lock_debug_mutex_enter();
+
+ if (sync_array_detect_deadlock(arr, cell, cell, 0)) {
+
+ ib::fatal() << "########################################"
+ " Deadlock Detected!";
+ }
+
+ rw_lock_debug_mutex_exit();
+#endif /* UNIV_DEBUG */
+ sync_array_exit(arr);
+
+ tpool::tpool_wait_begin();
+ os_event_wait_low(sync_cell_get_event(cell), cell->signal_count);
+ tpool::tpool_wait_end();
+
+ sync_array_free_cell(arr, cell);
+
+ cell = 0;
+}
+
+/******************************************************************//**
+Reports info of a wait array cell. */
+static
+void
+sync_array_cell_print(
+/*==================*/
+ FILE* file, /*!< in: file where to print */
+ sync_cell_t* cell) /*!< in: sync cell */
+{
+ rw_lock_t* rwlock;
+ ulint type;
+ ulint writer;
+
+ type = cell->request_type;
+
+ fprintf(file,
+ "--Thread " ULINTPF " has waited at %s line " ULINTPF
+ " for %.2f seconds the semaphore:\n",
+ ulint(cell->thread_id),
+ innobase_basename(cell->file), cell->line,
+ difftime(time(NULL), cell->reservation_time));
+
+ switch (type) {
+ default:
+ ut_error;
+ case RW_LOCK_X:
+ case RW_LOCK_X_WAIT:
+ case RW_LOCK_SX:
+ case RW_LOCK_S:
+ fputs(type == RW_LOCK_X ? "X-lock on"
+ : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on"
+ : type == RW_LOCK_SX ? "SX-lock on"
+ : "S-lock on", file);
+
+ rwlock = cell->latch.lock;
+
+ if (rwlock) {
+ fprintf(file,
+ " RW-latch at %p created in file %s line %u\n",
+ (void*) rwlock, innobase_basename(rwlock->cfile_name),
+ rwlock->cline);
+
+ writer = rw_lock_get_writer(rwlock);
+
+ if (writer != RW_LOCK_NOT_LOCKED) {
+
+ fprintf(file,
+ "a writer (thread id " ULINTPF ") has"
+ " reserved it in mode %s",
+ ulint(rwlock->writer_thread),
+ writer == RW_LOCK_X ? " exclusive\n"
+ : writer == RW_LOCK_SX ? " SX\n"
+ : " wait exclusive\n");
+ }
+
+ fprintf(file,
+ "number of readers " ULINTPF
+ ", waiters flag %d, "
+ "lock_word: %x\n"
+ "Last time write locked in file %s line %u"
+#if 0 /* JAN: TODO: FIX LATER */
+ "\nHolder thread " ULINTPF
+ " file %s line " ULINTPF
+#endif
+ "\n",
+ rw_lock_get_reader_count(rwlock),
+ uint32_t{rwlock->waiters},
+ int32_t{rwlock->lock_word},
+ innobase_basename(rwlock->last_x_file_name),
+ rwlock->last_x_line
+#if 0 /* JAN: TODO: FIX LATER */
+ , ulint(rwlock->thread_id),
+ innobase_basename(rwlock->file_name),
+ rwlock->line
+#endif
+ );
+ }
+ break;
+ case SYNC_MUTEX:
+ WaitMutex* mutex = cell->latch.mutex;
+ const WaitMutex::MutexPolicy& policy = mutex->policy();
+#ifdef UNIV_DEBUG
+ const char* name = policy.context.get_enter_filename();
+ if (name == NULL) {
+ /* The mutex might have been released. */
+ name = "NULL";
+ }
+#endif /* UNIV_DEBUG */
+
+ if (mutex) {
+ fprintf(file,
+ "Mutex at %p, %s, lock var %x\n"
+#ifdef UNIV_DEBUG
+ "Last time reserved in file %s line %u"
+#endif /* UNIV_DEBUG */
+ "\n",
+ (void*) mutex,
+ policy.to_string().c_str(),
+ mutex->state()
+#ifdef UNIV_DEBUG
+ ,name,
+ policy.context.get_enter_line()
+#endif /* UNIV_DEBUG */
+ );
+ }
+ break;
+ }
+
+ if (!cell->waiting) {
+ fputs("wait has ended\n", file);
+ }
+}
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Looks for a cell with the given thread id.
+@return pointer to cell or NULL if not found */
+static
+sync_cell_t*
+sync_array_find_thread(
+/*===================*/
+ sync_array_t* arr, /*!< in: wait array */
+ os_thread_id_t thread) /*!< in: thread id */
+{
+ ulint i;
+
+ for (i = 0; i < arr->n_cells; i++) {
+ sync_cell_t* cell;
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->latch.mutex != NULL
+ && os_thread_eq(cell->thread_id, thread)) {
+
+ return(cell); /* Found */
+ }
+ }
+
+ return(NULL); /* Not found */
+}
+
+/******************************************************************//**
+Recursion step for deadlock detection.
+@return TRUE if deadlock detected */
+static
+ibool
+sync_array_deadlock_step(
+/*=====================*/
+ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /*!< in: cell where recursive search
+ started */
+ os_thread_id_t thread, /*!< in: thread to look at */
+ ulint pass, /*!< in: pass value */
+ ulint depth) /*!< in: recursion depth */
+{
+ sync_cell_t* new_cell;
+
+ if (pass != 0) {
+ /* If pass != 0, then we do not know which threads are
+ responsible of releasing the lock, and no deadlock can
+ be detected. */
+
+ return(FALSE);
+ }
+
+ new_cell = sync_array_find_thread(arr, thread);
+
+ if (new_cell == start) {
+ /* Deadlock */
+ fputs("########################################\n"
+ "DEADLOCK of threads detected!\n", stderr);
+
+ return(TRUE);
+
+ } else if (new_cell) {
+ return(sync_array_detect_deadlock(
+ arr, start, new_cell, depth + 1));
+ }
+ return(FALSE);
+}
+
+/**
+Report an error to stderr.
+@param lock rw-lock instance
+@param debug rw-lock debug information
+@param cell thread context */
+static
+void
+sync_array_report_error(
+ rw_lock_t* lock,
+ rw_lock_debug_t* debug,
+ sync_cell_t* cell)
+{
+ fprintf(stderr, "rw-lock %p ", (void*) lock);
+ sync_array_cell_print(stderr, cell);
+ rw_lock_debug_print(stderr, debug);
+}
+
+/******************************************************************//**
+This function is called only in the debug version. Detects a deadlock
+of one or more threads because of waits of semaphores.
+@return TRUE if deadlock detected */
+static
+bool
+sync_array_detect_deadlock(
+/*=======================*/
+ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /*!< in: cell where recursive search started */
+ sync_cell_t* cell, /*!< in: cell to search */
+ ulint depth) /*!< in: recursion depth */
+{
+ rw_lock_t* lock;
+ os_thread_id_t thread;
+ ibool ret;
+ rw_lock_debug_t*debug;
+
+ ut_a(arr);
+ ut_a(start);
+ ut_a(cell);
+ ut_ad(cell->latch.mutex != 0);
+ ut_ad(os_thread_get_curr_id() == start->thread_id);
+ ut_ad(depth < 100);
+
+ depth++;
+
+ if (!cell->waiting) {
+ /* No deadlock here */
+ return(false);
+ }
+
+ switch (cell->request_type) {
+ case SYNC_MUTEX: {
+
+ WaitMutex* mutex = cell->latch.mutex;
+ const WaitMutex::MutexPolicy& policy = mutex->policy();
+
+ if (mutex->state() != MUTEX_STATE_UNLOCKED) {
+ thread = policy.context.get_thread_id();
+
+ /* Note that mutex->thread_id above may be
+ also OS_THREAD_ID_UNDEFINED, because the
+ thread which held the mutex maybe has not
+ yet updated the value, or it has already
+ released the mutex: in this case no deadlock
+ can occur, as the wait array cannot contain
+ a thread with ID_UNDEFINED value. */
+ ret = sync_array_deadlock_step(
+ arr, start, thread, 0, depth);
+
+ if (ret) {
+ const char* name;
+
+ name = policy.context.get_enter_filename();
+
+ if (name == NULL) {
+ /* The mutex might have been
+ released. */
+ name = "NULL";
+ }
+
+ ib::info()
+ << "Mutex " << mutex << " owned by"
+ " thread " << thread
+ << " file " << name << " line "
+ << policy.context.get_enter_line();
+
+ sync_array_cell_print(stderr, cell);
+
+ return(true);
+ }
+ }
+
+ /* No deadlock */
+ return(false);
+ }
+
+ case RW_LOCK_X:
+ case RW_LOCK_X_WAIT:
+
+ lock = cell->latch.lock;
+
+ for (debug = UT_LIST_GET_FIRST(lock->debug_list);
+ debug != NULL;
+ debug = UT_LIST_GET_NEXT(list, debug)) {
+
+ thread = debug->thread_id;
+
+ switch (debug->lock_type) {
+ case RW_LOCK_X:
+ case RW_LOCK_SX:
+ case RW_LOCK_X_WAIT:
+ if (os_thread_eq(thread, cell->thread_id)) {
+ break;
+ }
+ /* fall through */
+ case RW_LOCK_S:
+
+ /* The (wait) x-lock request can block
+ infinitely only if someone (can be also cell
+ thread) is holding s-lock, or someone
+ (cannot be cell thread) (wait) x-lock or
+ sx-lock, and he is blocked by start thread */
+
+ ret = sync_array_deadlock_step(
+ arr, start, thread, debug->pass,
+ depth);
+
+ if (ret) {
+ sync_array_report_error(
+ lock, debug, cell);
+ rw_lock_debug_print(stderr, debug);
+ return(TRUE);
+ }
+ }
+ }
+
+ return(false);
+
+ case RW_LOCK_SX:
+
+ lock = cell->latch.lock;
+
+ for (debug = UT_LIST_GET_FIRST(lock->debug_list);
+ debug != 0;
+ debug = UT_LIST_GET_NEXT(list, debug)) {
+
+ thread = debug->thread_id;
+
+ switch (debug->lock_type) {
+ case RW_LOCK_X:
+ case RW_LOCK_SX:
+ case RW_LOCK_X_WAIT:
+
+ if (os_thread_eq(thread, cell->thread_id)) {
+ break;
+ }
+
+ /* The sx-lock request can block infinitely
+ only if someone (can be also cell thread) is
+ holding (wait) x-lock or sx-lock, and he is
+ blocked by start thread */
+
+ ret = sync_array_deadlock_step(
+ arr, start, thread, debug->pass,
+ depth);
+
+ if (ret) {
+ sync_array_report_error(
+ lock, debug, cell);
+ return(TRUE);
+ }
+ }
+ }
+
+ return(false);
+
+ case RW_LOCK_S:
+
+ lock = cell->latch.lock;
+
+ for (debug = UT_LIST_GET_FIRST(lock->debug_list);
+ debug != 0;
+ debug = UT_LIST_GET_NEXT(list, debug)) {
+
+ thread = debug->thread_id;
+
+ if (debug->lock_type == RW_LOCK_X
+ || debug->lock_type == RW_LOCK_X_WAIT) {
+
+ /* The s-lock request can block infinitely
+ only if someone (can also be cell thread) is
+ holding (wait) x-lock, and he is blocked by
+ start thread */
+
+ ret = sync_array_deadlock_step(
+ arr, start, thread, debug->pass,
+ depth);
+
+ if (ret) {
+ sync_array_report_error(
+ lock, debug, cell);
+ return(TRUE);
+ }
+ }
+ }
+
+ return(false);
+
+ default:
+ ut_error;
+ }
+
+ return(true);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return TRUE if fatal semaphore wait threshold was exceeded */
+static
+bool
+sync_array_print_long_waits_low(
+/*============================*/
+ sync_array_t* arr, /*!< in: sync array instance */
+ os_thread_id_t* waiter, /*!< out: longest waiting thread */
+ const void** sema, /*!< out: longest-waited-for semaphore */
+ ibool* noticed)/*!< out: TRUE if long wait noticed */
+{
+ double fatal_timeout = static_cast<double>(
+ srv_fatal_semaphore_wait_threshold);
+ ibool fatal = FALSE;
+ double longest_diff = 0;
+ ulint i;
+
+ /* For huge tables, skip the check during CHECK TABLE etc... */
+ if (btr_validate_index_running) {
+ return(false);
+ }
+
+#if defined HAVE_valgrind && !__has_feature(memory_sanitizer)
+ /* Increase the timeouts if running under valgrind because it executes
+ extremely slowly. HAVE_valgrind does not necessary mean that
+ we are running under valgrind but we have no better way to tell.
+ See Bug#58432 innodb.innodb_bug56143 fails under valgrind
+ for an example */
+# define SYNC_ARRAY_TIMEOUT 2400
+ fatal_timeout *= 10;
+#else
+# define SYNC_ARRAY_TIMEOUT 240
+#endif
+ const time_t now = time(NULL);
+
+ for (ulint i = 0; i < arr->n_cells; i++) {
+
+ sync_cell_t* cell;
+ void* latch;
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ latch = cell->latch.mutex;
+
+ if (latch == NULL || !cell->waiting) {
+
+ continue;
+ }
+
+ double diff = difftime(now, cell->reservation_time);
+
+ if (diff > SYNC_ARRAY_TIMEOUT) {
+ ib::warn() << "A long semaphore wait:";
+ sync_array_cell_print(stderr, cell);
+ *noticed = TRUE;
+ }
+
+ if (diff > fatal_timeout) {
+ fatal = TRUE;
+ }
+
+ if (diff > longest_diff) {
+ longest_diff = diff;
+ *sema = latch;
+ *waiter = cell->thread_id;
+ }
+ }
+
+ /* We found a long semaphore wait, print all threads that are
+ waiting for a semaphore. */
+ if (*noticed) {
+ for (i = 0; i < arr->n_cells; i++) {
+ void* wait_object;
+ sync_cell_t* cell;
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ wait_object = cell->latch.mutex;
+
+ if (wait_object == NULL || !cell->waiting) {
+
+ continue;
+ }
+
+ ib::info() << "A semaphore wait:";
+ sync_array_cell_print(stderr, cell);
+ }
+ }
+
+#undef SYNC_ARRAY_TIMEOUT
+
+ return(fatal);
+}
+
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return TRUE if fatal semaphore wait threshold was exceeded */
+ibool
+sync_array_print_long_waits(
+/*========================*/
+ os_thread_id_t* waiter, /*!< out: longest waiting thread */
+ const void** sema) /*!< out: longest-waited-for semaphore */
+{
+ ulint i;
+ ibool fatal = FALSE;
+ ibool noticed = FALSE;
+
+ for (i = 0; i < sync_array_size; ++i) {
+
+ sync_array_t* arr = sync_wait_array[i];
+
+ sync_array_enter(arr);
+
+ if (sync_array_print_long_waits_low(
+ arr, waiter, sema, &noticed)) {
+
+ fatal = TRUE;
+ }
+
+ sync_array_exit(arr);
+ }
+
+ if (noticed) {
+ /* If some crucial semaphore is reserved, then also the InnoDB
+ Monitor can hang, and we do not get diagnostics. Since in
+ many cases an InnoDB hang is caused by a pwrite() or a pread()
+ call hanging inside the operating system, let us print right
+ now the values of pending calls of these. */
+
+ fprintf(stderr,
+ "InnoDB: Pending reads " UINT64PF
+ ", writes " UINT64PF "\n",
+ MONITOR_VALUE(MONITOR_OS_PENDING_READS),
+ MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
+
+ lock_wait_timeout_task(nullptr);
+ }
+
+ return(fatal);
+}
+
+/**********************************************************************//**
+Prints info of the wait array. */
+static
+void
+sync_array_print_info_low(
+/*======================*/
+ FILE* file, /*!< in: file where to print */
+ sync_array_t* arr) /*!< in: wait array */
+{
+ ulint i;
+ ulint count = 0;
+
+ fprintf(file,
+ "OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
+ arr->res_count);
+
+ for (i = 0; count < arr->n_reserved; ++i) {
+ sync_cell_t* cell;
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->latch.mutex != 0) {
+ count++;
+ sync_array_cell_print(file, cell);
+ }
+ }
+}
+
+/**********************************************************************//**
+Prints info of the wait array. */
+static
+void
+sync_array_print_info(
+/*==================*/
+ FILE* file, /*!< in: file where to print */
+ sync_array_t* arr) /*!< in: wait array */
+{
+ sync_array_enter(arr);
+
+ sync_array_print_info_low(file, arr);
+
+ sync_array_exit(arr);
+}
+
+/** Create the primary system wait arrays */
+void sync_array_init()
+{
+ ut_a(sync_wait_array == NULL);
+ ut_a(srv_sync_array_size > 0);
+ ut_a(srv_max_n_threads > 0);
+
+ sync_array_size = srv_sync_array_size;
+
+ sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
+
+ ulint n_slots = 1 + (srv_max_n_threads - 1) / sync_array_size;
+
+ for (ulint i = 0; i < sync_array_size; ++i) {
+
+ sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots));
+ }
+}
+
+/** Destroy the sync array wait sub-system. */
+void sync_array_close()
+{
+ for (ulint i = 0; i < sync_array_size; ++i) {
+ sync_array_free(sync_wait_array[i]);
+ }
+
+ UT_DELETE_ARRAY(sync_wait_array);
+ sync_wait_array = NULL;
+}
+
+/**********************************************************************//**
+Print info about the sync array(s). */
+void
+sync_array_print(
+/*=============*/
+ FILE* file) /*!< in/out: Print to this stream */
+{
+ for (ulint i = 0; i < sync_array_size; ++i) {
+ sync_array_print_info(file, sync_wait_array[i]);
+ }
+
+ fprintf(file,
+ "OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count);
+
+}
+
+/**********************************************************************//**
+Prints info of the wait array without using any mutexes/semaphores. */
+UNIV_INTERN
+void
+sync_array_print_innodb(void)
+/*=========================*/
+{
+ ulint i;
+ sync_array_t* arr = sync_array_get();
+
+ fputs("InnoDB: Semaphore wait debug output started for InnoDB:\n", stderr);
+
+ for (i = 0; i < arr->n_cells; i++) {
+ void* wait_object;
+ sync_cell_t* cell;
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ wait_object = cell->latch.mutex;
+
+ if (wait_object == NULL || !cell->waiting) {
+
+ continue;
+ }
+
+ fputs("InnoDB: Warning: semaphore wait:\n",
+ stderr);
+ sync_array_cell_print(stderr, cell);
+ }
+
+ fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
+
+}
+
+/**********************************************************************//**
+Get number of items on sync array. */
+UNIV_INTERN
+ulint
+sync_arr_get_n_items(void)
+/*======================*/
+{
+ sync_array_t* sync_arr = sync_array_get();
+ return (ulint) sync_arr->n_cells;
+}
+
+/******************************************************************//**
+Get specified item from sync array if it is reserved. Set given
+pointer to array item if it is reserved.
+@return true if item is reserved, false othervise */
+UNIV_INTERN
+ibool
+sync_arr_get_item(
+/*==============*/
+ ulint i, /*!< in: requested item */
+ sync_cell_t **cell) /*!< out: cell contents if item
+ reserved */
+{
+ sync_array_t* sync_arr;
+ sync_cell_t* wait_cell;
+ void* wait_object;
+ ibool found = FALSE;
+
+ sync_arr = sync_array_get();
+ wait_cell = sync_array_get_nth_cell(sync_arr, i);
+
+ if (wait_cell) {
+ wait_object = wait_cell->latch.mutex;
+
+ if(wait_object != NULL && wait_cell->waiting) {
+ found = TRUE;
+ *cell = wait_cell;
+ }
+ }
+
+ return found;
+}
+
+/*******************************************************************//**
+Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
+Loop through each item on sync array, and extract the column
+information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
+@return 0 on success */
+UNIV_INTERN
+int
+sync_arr_fill_sys_semphore_waits_table(
+/*===================================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* ) /*!< in: condition (not used) */
+{
+ Field** fields;
+ ulint n_items;
+
+ DBUG_ENTER("i_s_sys_semaphore_waits_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
+
+ /* deny access to user without PROCESS_ACL privilege */
+ if (check_global_access(thd, PROCESS_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ fields = tables->table->field;
+ n_items = sync_arr_get_n_items();
+ ulint type;
+
+ for(ulint i=0; i < n_items;i++) {
+ sync_cell_t *cell=NULL;
+ if (sync_arr_get_item(i, &cell)) {
+ WaitMutex* mutex;
+ type = cell->request_type;
+ /* JAN: FIXME
+ OK(fields[SYS_SEMAPHORE_WAITS_THREAD_ID]->store(,
+ ulint(cell->thread), true));
+ */
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
+ OK(fields[SYS_SEMAPHORE_WAITS_LINE]->store(cell->line, true));
+ fields[SYS_SEMAPHORE_WAITS_LINE]->set_notnull();
+ OK(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME]->store(
+ difftime(time(NULL),
+ cell->reservation_time)));
+
+ if (type == SYNC_MUTEX) {
+ mutex = static_cast<WaitMutex*>(cell->latch.mutex);
+
+ if (mutex) {
+ // JAN: FIXME
+ // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
+ OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)mutex, true));
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
+ //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(mutex->thread_id, true));
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(mutex->line, true));
+ //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->store(mutex->cline, true));
+ //fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->set_notnull();
+ //OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(mutex->waiters, true));
+ //OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(mutex->lock_word, true));
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(mutex->line, true));
+ //fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
+ //OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(mutex->count_os_wait, true));
+ }
+ } else if (type == RW_LOCK_X_WAIT
+ || type == RW_LOCK_X
+ || type == RW_LOCK_SX
+ || type == RW_LOCK_S) {
+ rw_lock_t* rwlock=NULL;
+
+ rwlock = static_cast<rw_lock_t *> (cell->latch.lock);
+
+ if (rwlock) {
+ ulint writer = rw_lock_get_writer(rwlock);
+
+ OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)rwlock, true));
+ if (type == RW_LOCK_X) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X"));
+ } else if (type == RW_LOCK_X_WAIT) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT"));
+ } else if (type == RW_LOCK_S) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S"));
+ } else if (type == RW_LOCK_SX) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX"));
+ }
+
+ if (writer != RW_LOCK_NOT_LOCKED) {
+ // JAN: FIXME
+ // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
+ OK(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD]->store(ulint(rwlock->writer_thread), true));
+
+ if (writer == RW_LOCK_X) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X"));
+ } else if (writer == RW_LOCK_X_WAIT) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT"));
+ } else if (type == RW_LOCK_SX) {
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX"));
+ }
+
+ //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(rwlock->thread_id, true));
+ //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
+ //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(rwlock->line, true));
+ //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
+ OK(fields[SYS_SEMAPHORE_WAITS_READERS]->store(rw_lock_get_reader_count(rwlock), true));
+ OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(
+ rwlock->waiters,
+ true));
+ OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(
+ rwlock->lock_word,
+ true));
+ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name)));
+ OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(rwlock->last_x_line, true));
+ fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
+ OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(rwlock->count_os_wait, true));
+ }
+ }
+ }
+
+ OK(schema_table_store_record(thd, tables->table));
+ }
+ }
+
+ DBUG_RETURN(0);
+}
diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc
new file mode 100644
index 00000000..7c3e4c05
--- /dev/null
+++ b/storage/innobase/sync/sync0debug.cc
@@ -0,0 +1,1423 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0debug.cc
+Debug checks for latches.
+
+Created 2012-08-21 Sunny Bains
+*******************************************************/
+
+#include "sync0sync.h"
+#include "sync0debug.h"
+#include "srv0start.h"
+#include "fil0fil.h"
+
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <map>
+
+#ifdef UNIV_DEBUG
+
+my_bool srv_sync_debug;
+
+/** The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+acquired in addition to the mutex protecting the lock. */
+static SysMutex rw_lock_debug_mutex;
+
+/** The latch held by a thread */
+struct Latched {
+
+ /** Constructor */
+ Latched() : m_latch(), m_level(SYNC_UNKNOWN) { }
+
+ /** Constructor
+ @param[in] latch Latch instance
+ @param[in] level Level of latch held */
+ Latched(const latch_t* latch,
+ latch_level_t level)
+ :
+ m_latch(latch),
+ m_level(level)
+ {
+ /* No op */
+ }
+
+ /** @return the latch level */
+ latch_level_t get_level() const
+ {
+ return(m_level);
+ }
+
+ /** Check if the rhs latch and level match
+ @param[in] rhs instance to compare with
+ @return true on match */
+ bool operator==(const Latched& rhs) const
+ {
+ return(m_latch == rhs.m_latch && m_level == rhs.m_level);
+ }
+
+ /** The latch instance */
+ const latch_t* m_latch;
+
+ /** The latch level. For buffer blocks we can pass a separate latch
+ level to check against, see buf_block_dbg_add_level() */
+ latch_level_t m_level;
+};
+
+/** Thread specific latches. This is ordered on level in descending order. */
+typedef std::vector<Latched, ut_allocator<Latched> > Latches;
+
+/** The deadlock detector. */
+struct LatchDebug {
+
+ /** Debug mutex for control structures, should not be tracked
+ by this module. */
+ typedef OSMutex Mutex;
+
+ /** Comparator for the ThreadMap. */
+ struct os_thread_id_less
+ : public std::binary_function<
+ os_thread_id_t,
+ os_thread_id_t,
+ bool>
+ {
+ /** @return true if lhs < rhs */
+ bool operator()(
+ const os_thread_id_t& lhs,
+ const os_thread_id_t& rhs) const
+ UNIV_NOTHROW
+ {
+ return(ulint(lhs) < ulint(rhs));
+ }
+ };
+
+ /** For tracking a thread's latches. */
+ typedef std::map<
+ os_thread_id_t,
+ Latches*,
+ os_thread_id_less,
+ ut_allocator<std::pair<const os_thread_id_t, Latches*> > >
+ ThreadMap;
+
+ /** Constructor */
+ LatchDebug()
+ UNIV_NOTHROW;
+
+ /** Destructor */
+ ~LatchDebug()
+ UNIV_NOTHROW
+ {
+ m_mutex.destroy();
+ }
+
+ /** Create a new instance if one doesn't exist else return
+ the existing one.
+ @param[in] add add an empty entry if one is not
+ found (default no)
+ @return pointer to a thread's acquired latches. */
+ Latches* thread_latches(bool add = false)
+ UNIV_NOTHROW;
+
+ /** Check that all the latches already owned by a thread have a lower
+ level than limit.
+ @param[in] latches the thread's existing (acquired) latches
+ @param[in] limit to check against
+ @return latched if there is one with a level <= limit . */
+ const Latched* less(
+ const Latches* latches,
+ latch_level_t limit) const
+ UNIV_NOTHROW;
+
+ /** Checks if the level value exists in the thread's acquired latches.
+ @param[in] latches the thread's existing (acquired) latches
+ @param[in] level to lookup
+ @return latch if found or 0 */
+ const latch_t* find(
+ const Latches* Latches,
+ latch_level_t level) const
+ UNIV_NOTHROW;
+
+ /**
+ Checks if the level value exists in the thread's acquired latches.
+ @param[in] level to lookup
+ @return latch if found or 0 */
+ const latch_t* find(latch_level_t level)
+ UNIV_NOTHROW;
+
+ /** Report error and abort.
+ @param[in] latches thread's existing latches
+ @param[in] latched The existing latch causing the
+ invariant to fail
+ @param[in] level The new level request that breaks
+ the order */
+ void crash(
+ const Latches* latches,
+ const Latched* latched,
+ latch_level_t level) const
+ UNIV_NOTHROW;
+
+ /** Do a basic ordering check.
+ @param[in] latches thread's existing latches
+ @param[in] requested_level Level requested by latch
+ @param[in] level declared ulint so that we can
+ do level - 1. The level of the
+ latch that the thread is trying
+ to acquire
+ @return true if passes, else crash with error message. */
+ inline bool basic_check(
+ const Latches* latches,
+ latch_level_t requested_level,
+ lint level) const
+ UNIV_NOTHROW;
+
+ /** Adds a latch and its level in the thread level array. Allocates
+ the memory for the array if called for the first time for this
+ OS thread. Makes the checks against other latch levels stored
+ in the array for this thread.
+
+ @param[in] latch latch that the thread wants to acqire.
+ @param[in] level latch level to check against */
+ void lock_validate(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW
+ {
+ /* Ignore diagnostic latches, starting with '.' */
+
+ if (*latch->get_name() != '.'
+ && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+ ut_ad(level != SYNC_LEVEL_VARYING);
+
+ Latches* latches = check_order(latch, level);
+
+ ut_a(latches->empty()
+ || level == SYNC_LEVEL_VARYING
+ || level == SYNC_NO_ORDER_CHECK
+ || latches->back().get_level()
+ == SYNC_NO_ORDER_CHECK
+ || latches->back().m_latch->get_level()
+ == SYNC_LEVEL_VARYING
+ || latches->back().get_level() >= level);
+ }
+ }
+
+ /** Adds a latch and its level in the thread level array. Allocates
+ the memory for the array if called for the first time for this
+ OS thread. Makes the checks against other latch levels stored
+ in the array for this thread.
+
+ @param[in] latch latch that the thread wants to acqire.
+ @param[in] level latch level to check against */
+ void lock_granted(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW
+ {
+ /* Ignore diagnostic latches, starting with '.' */
+
+ if (*latch->get_name() != '.'
+ && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+ Latches* latches = thread_latches(true);
+
+ latches->push_back(Latched(latch, level));
+ }
+ }
+
+ /** For recursive X rw-locks.
+ @param[in] latch The RW-Lock to relock */
+ void relock(const latch_t* latch)
+ UNIV_NOTHROW
+ {
+ ut_a(latch->m_rw_lock);
+
+ latch_level_t level = latch->get_level();
+
+ /* Ignore diagnostic latches, starting with '.' */
+
+ if (*latch->get_name() != '.'
+ && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+ Latches* latches = thread_latches(true);
+
+ Latches::iterator it = std::find(
+ latches->begin(), latches->end(),
+ Latched(latch, level));
+
+ ut_a(latches->empty()
+ || level == SYNC_LEVEL_VARYING
+ || level == SYNC_NO_ORDER_CHECK
+ || latches->back().m_latch->get_level()
+ == SYNC_LEVEL_VARYING
+ || latches->back().m_latch->get_level()
+ == SYNC_NO_ORDER_CHECK
+ || latches->back().get_level() >= level
+ || it != latches->end());
+
+ if (it == latches->end()) {
+ latches->push_back(Latched(latch, level));
+ } else {
+ latches->insert(it, Latched(latch, level));
+ }
+ }
+ }
+
+ /** Iterate over a thread's latches.
+ @param[in] functor The callback
+ @return true if the functor returns true. */
+ bool for_each(const sync_check_functor_t& functor)
+ UNIV_NOTHROW
+ {
+ if (const Latches* latches = thread_latches()) {
+ Latches::const_iterator end = latches->end();
+ for (Latches::const_iterator it = latches->begin();
+ it != end; ++it) {
+
+ if (functor(it->m_level)) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+ }
+
+ /** Removes a latch from the thread level array if it is found there.
+ @param[in] latch The latch that was released
+ @return true if found in the array; it is not an error if the latch is
+ not found, as we presently are not able to determine the level for
+ every latch reservation the program does */
+ void unlock(const latch_t* latch) UNIV_NOTHROW;
+
+ /** Get the level name
+ @param[in] level The level ID to lookup
+ @return level name */
+ const std::string& get_level_name(latch_level_t level) const
+ UNIV_NOTHROW
+ {
+ Levels::const_iterator it = m_levels.find(level);
+
+ ut_ad(it != m_levels.end());
+
+ return(it->second);
+ }
+
+ /** Initialise the debug data structures */
+ static void init()
+ UNIV_NOTHROW;
+
+ /** Shutdown the latch debug checking */
+ static void shutdown()
+ UNIV_NOTHROW;
+
+ /** @return the singleton instance */
+ static LatchDebug* instance()
+ UNIV_NOTHROW
+ {
+ return(s_instance);
+ }
+
+ /** Create the singleton instance */
+ static void create_instance()
+ UNIV_NOTHROW
+ {
+ ut_ad(s_instance == NULL);
+
+ s_instance = UT_NEW_NOKEY(LatchDebug());
+ }
+
+private:
+ /** Disable copying */
+ LatchDebug(const LatchDebug&);
+ LatchDebug& operator=(const LatchDebug&);
+
+ /** Adds a latch and its level in the thread level array. Allocates
+ the memory for the array if called first time for this OS thread.
+ Makes the checks against other latch levels stored in the array
+ for this thread.
+
+ @param[in] latch pointer to a mutex or an rw-lock
+ @param[in] level level in the latching order
+ @return the thread's latches */
+ Latches* check_order(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW;
+
+ /** Print the latches acquired by a thread
+ @param[in] latches Latches acquired by a thread */
+ void print_latches(const Latches* latches) const
+ UNIV_NOTHROW;
+
+ /** Special handling for the RTR mutexes. We need to add proper
+ levels for them if possible.
+ @param[in] latch Latch to check
+ @return true if it is a an _RTR_ mutex */
+ bool is_rtr_mutex(const latch_t* latch) const
+ UNIV_NOTHROW
+ {
+ return(latch->get_id() == LATCH_ID_RTR_ACTIVE_MUTEX
+ || latch->get_id() == LATCH_ID_RTR_PATH_MUTEX
+ || latch->get_id() == LATCH_ID_RTR_MATCH_MUTEX);
+ }
+
+private:
+ /** Comparator for the Levels . */
+ struct latch_level_less
+ : public std::binary_function<
+ latch_level_t,
+ latch_level_t,
+ bool>
+ {
+ /** @return true if lhs < rhs */
+ bool operator()(
+ const latch_level_t& lhs,
+ const latch_level_t& rhs) const
+ UNIV_NOTHROW
+ {
+ return(lhs < rhs);
+ }
+ };
+
+ typedef std::map<
+ latch_level_t,
+ std::string,
+ latch_level_less,
+ ut_allocator<std::pair<const latch_level_t, std::string> > >
+ Levels;
+
+ /** Mutex protecting the deadlock detector data structures. */
+ Mutex m_mutex;
+
+ /** Thread specific data. Protected by m_mutex. */
+ ThreadMap m_threads;
+
+ /** Mapping from latche level to its string representation. */
+ Levels m_levels;
+
+ /** The singleton instance. Must be created in single threaded mode. */
+ static LatchDebug* s_instance;
+
+public:
+ /** For checking whether this module has been initialised or not. */
+ static bool s_initialized;
+};
+
+/** The latch order checking infra-structure */
+LatchDebug* LatchDebug::s_instance = NULL;
+bool LatchDebug::s_initialized = false;
+
+#define LEVEL_MAP_INSERT(T) \
+do { \
+ std::pair<Levels::iterator, bool> result = \
+ m_levels.insert(Levels::value_type(T, #T)); \
+ ut_ad(result.second); \
+} while(0)
+
+/** Setup the mapping from level ID to level name mapping */
+LatchDebug::LatchDebug()
+{
+ m_mutex.init();
+
+ LEVEL_MAP_INSERT(SYNC_UNKNOWN);
+ LEVEL_MAP_INSERT(SYNC_MUTEX);
+ LEVEL_MAP_INSERT(RW_LOCK_SX);
+ LEVEL_MAP_INSERT(RW_LOCK_X_WAIT);
+ LEVEL_MAP_INSERT(RW_LOCK_S);
+ LEVEL_MAP_INSERT(RW_LOCK_X);
+ LEVEL_MAP_INSERT(RW_LOCK_NOT_LOCKED);
+ LEVEL_MAP_INSERT(SYNC_ANY_LATCH);
+ LEVEL_MAP_INSERT(SYNC_POOL);
+ LEVEL_MAP_INSERT(SYNC_POOL_MANAGER);
+ LEVEL_MAP_INSERT(SYNC_SEARCH_SYS);
+ LEVEL_MAP_INSERT(SYNC_WORK_QUEUE);
+ LEVEL_MAP_INSERT(SYNC_FTS_TOKENIZE);
+ LEVEL_MAP_INSERT(SYNC_FTS_OPTIMIZE);
+ LEVEL_MAP_INSERT(SYNC_FTS_CACHE_INIT);
+ LEVEL_MAP_INSERT(SYNC_RECV);
+ LEVEL_MAP_INSERT(SYNC_PURGE_QUEUE);
+ LEVEL_MAP_INSERT(SYNC_TRX_SYS_HEADER);
+ LEVEL_MAP_INSERT(SYNC_TRX);
+ LEVEL_MAP_INSERT(SYNC_RW_TRX_HASH_ELEMENT);
+ LEVEL_MAP_INSERT(SYNC_READ_VIEW);
+ LEVEL_MAP_INSERT(SYNC_TRX_SYS);
+ LEVEL_MAP_INSERT(SYNC_LOCK_SYS);
+ LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS);
+ LEVEL_MAP_INSERT(SYNC_INDEX_ONLINE_LOG);
+ LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP);
+ LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE);
+ LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE_NEW);
+ LEVEL_MAP_INSERT(SYNC_IBUF_INDEX_TREE);
+ LEVEL_MAP_INSERT(SYNC_IBUF_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_FSP_PAGE);
+ LEVEL_MAP_INSERT(SYNC_FSP);
+ LEVEL_MAP_INSERT(SYNC_EXTERN_STORAGE);
+ LEVEL_MAP_INSERT(SYNC_TRX_UNDO_PAGE);
+ LEVEL_MAP_INSERT(SYNC_RSEG_HEADER);
+ LEVEL_MAP_INSERT(SYNC_RSEG_HEADER_NEW);
+ LEVEL_MAP_INSERT(SYNC_NOREDO_RSEG);
+ LEVEL_MAP_INSERT(SYNC_REDO_RSEG);
+ LEVEL_MAP_INSERT(SYNC_PURGE_LATCH);
+ LEVEL_MAP_INSERT(SYNC_TREE_NODE);
+ LEVEL_MAP_INSERT(SYNC_TREE_NODE_FROM_HASH);
+ LEVEL_MAP_INSERT(SYNC_TREE_NODE_NEW);
+ LEVEL_MAP_INSERT(SYNC_INDEX_TREE);
+ LEVEL_MAP_INSERT(SYNC_IBUF_PESS_INSERT_MUTEX);
+ LEVEL_MAP_INSERT(SYNC_IBUF_HEADER);
+ LEVEL_MAP_INSERT(SYNC_DICT_HEADER);
+ LEVEL_MAP_INSERT(SYNC_STATS_AUTO_RECALC);
+ LEVEL_MAP_INSERT(SYNC_DICT);
+ LEVEL_MAP_INSERT(SYNC_FTS_CACHE);
+ LEVEL_MAP_INSERT(SYNC_DICT_OPERATION);
+ LEVEL_MAP_INSERT(SYNC_TRX_I_S_RWLOCK);
+ LEVEL_MAP_INSERT(SYNC_LEVEL_VARYING);
+ LEVEL_MAP_INSERT(SYNC_NO_ORDER_CHECK);
+
+ /* Enum count starts from 0 */
+ ut_ad(m_levels.size() == SYNC_LEVEL_MAX + 1);
+}
+
+/** Print the latches acquired by a thread
+@param[in] latches Latches acquired by a thread */
+void
+LatchDebug::print_latches(const Latches* latches) const
+ UNIV_NOTHROW
+{
+ ib::error() << "Latches already owned by this thread: ";
+
+ Latches::const_iterator end = latches->end();
+
+ for (Latches::const_iterator it = latches->begin();
+ it != end;
+ ++it) {
+
+ ib::error()
+ << sync_latch_get_name(it->m_latch->get_id())
+ << " -> "
+ << it->m_level << " "
+ << "(" << get_level_name(it->m_level) << ")";
+ }
+}
+
+/** Report error and abort
+@param[in] latches thread's existing latches
+@param[in] latched The existing latch causing the invariant to fail
+@param[in] level The new level request that breaks the order */
+void
+LatchDebug::crash(
+ const Latches* latches,
+ const Latched* latched,
+ latch_level_t level) const
+ UNIV_NOTHROW
+{
+ const latch_t* latch = latched->m_latch;
+ const std::string& in_level_name = get_level_name(level);
+
+ const std::string& latch_level_name =
+ get_level_name(latched->m_level);
+
+ ib::error()
+ << "Thread " << os_thread_get_curr_id()
+ << " already owns a latch "
+ << sync_latch_get_name(latch->m_id) << " at level"
+ << " " << latched->m_level << " (" << latch_level_name
+ << " ), which is at a lower/same level than the"
+ << " requested latch: "
+ << level << " (" << in_level_name << "). "
+ << latch->to_string();
+
+ print_latches(latches);
+
+ ut_error;
+}
+
+/** Check that all the latches already owned by a thread have a lower
+level than limit.
+@param[in] latches the thread's existing (acquired) latches
+@param[in] limit to check against
+@return latched info if there is one with a level <= limit . */
+const Latched*
+LatchDebug::less(
+ const Latches* latches,
+ latch_level_t limit) const
+ UNIV_NOTHROW
+{
+ Latches::const_iterator end = latches->end();
+
+ for (Latches::const_iterator it = latches->begin(); it != end; ++it) {
+
+ if (it->m_level <= limit) {
+ return(&(*it));
+ }
+ }
+
+ return(NULL);
+}
+
+/** Do a basic ordering check.
+@param[in] latches thread's existing latches
+@param[in] requested_level Level requested by latch
+@param[in] in_level declared ulint so that we can do level - 1.
+ The level of the latch that the thread is
+ trying to acquire
+@return true if passes, else crash with error message. */
+inline bool
+LatchDebug::basic_check(
+ const Latches* latches,
+ latch_level_t requested_level,
+ lint in_level) const
+ UNIV_NOTHROW
+{
+ latch_level_t level = latch_level_t(in_level);
+
+ ut_ad(level < SYNC_LEVEL_MAX);
+
+ const Latched* latched = less(latches, level);
+
+ if (latched != NULL) {
+ crash(latches, latched, requested_level);
+ return(false);
+ }
+
+ return(true);
+}
+
+/** Create a new instance if one doesn't exist else return the existing one.
+@param[in] add add an empty entry if one is not found
+ (default no)
+@return pointer to a thread's acquired latches. */
+Latches*
+LatchDebug::thread_latches(bool add)
+ UNIV_NOTHROW
+{
+ m_mutex.enter();
+
+ os_thread_id_t thread_id = os_thread_get_curr_id();
+ ThreadMap::iterator lb = m_threads.lower_bound(thread_id);
+
+ if (lb != m_threads.end()
+ && !(m_threads.key_comp()(thread_id, lb->first))) {
+
+ Latches* latches = lb->second;
+
+ m_mutex.exit();
+
+ return(latches);
+
+ } else if (!add) {
+
+ m_mutex.exit();
+
+ return(NULL);
+
+ } else {
+ typedef ThreadMap::value_type value_type;
+
+ Latches* latches = UT_NEW_NOKEY(Latches());
+
+ ut_a(latches != NULL);
+
+ latches->reserve(32);
+
+ m_threads.insert(lb, value_type(thread_id, latches));
+
+ m_mutex.exit();
+
+ return(latches);
+ }
+}
+
+/** Checks if the level value exists in the thread's acquired latches.
+@param[in] levels the thread's existing (acquired) latches
+@param[in] level to lookup
+@return latch if found or 0 */
+const latch_t*
+LatchDebug::find(
+ const Latches* latches,
+ latch_level_t level) const UNIV_NOTHROW
+{
+ Latches::const_iterator end = latches->end();
+
+ for (Latches::const_iterator it = latches->begin(); it != end; ++it) {
+
+ if (it->m_level == level) {
+
+ return(it->m_latch);
+ }
+ }
+
+ return(0);
+}
+
+/** Checks if the level value exists in the thread's acquired latches.
+@param[in] level The level to lookup
+@return latch if found or NULL */
+const latch_t*
+LatchDebug::find(latch_level_t level)
+ UNIV_NOTHROW
+{
+ return(find(thread_latches(), level));
+}
+
+/**
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread.
+@param[in] latch pointer to a mutex or an rw-lock
+@param[in] level level in the latching order
+@return the thread's latches */
+Latches*
+LatchDebug::check_order(
+ const latch_t* latch,
+ latch_level_t level)
+ UNIV_NOTHROW
+{
+ ut_ad(latch->get_level() != SYNC_LEVEL_VARYING);
+
+ Latches* latches = thread_latches(true);
+
+ /* NOTE that there is a problem with _NODE and _LEAF levels: if the
+ B-tree height changes, then a leaf can change to an internal node
+ or the other way around. We do not know at present if this can cause
+ unnecessary assertion failures below. */
+
+ switch (level) {
+ case SYNC_NO_ORDER_CHECK:
+ case SYNC_EXTERN_STORAGE:
+ case SYNC_TREE_NODE_FROM_HASH:
+ /* Do no order checking */
+ break;
+
+ case SYNC_TRX_SYS_HEADER:
+
+ if (srv_is_being_started) {
+ /* This is violated during trx_sys_create_rsegs()
+ when creating additional rollback segments when
+ upgrading in srv_start(). */
+ break;
+ }
+
+ /* Fall through */
+
+ case SYNC_RECV:
+ case SYNC_WORK_QUEUE:
+ case SYNC_FTS_TOKENIZE:
+ case SYNC_FTS_OPTIMIZE:
+ case SYNC_FTS_CACHE:
+ case SYNC_FTS_CACHE_INIT:
+ case SYNC_SEARCH_SYS:
+ case SYNC_LOCK_SYS:
+ case SYNC_LOCK_WAIT_SYS:
+ case SYNC_RW_TRX_HASH_ELEMENT:
+ case SYNC_READ_VIEW:
+ case SYNC_TRX_SYS:
+ case SYNC_IBUF_BITMAP_MUTEX:
+ case SYNC_REDO_RSEG:
+ case SYNC_NOREDO_RSEG:
+ case SYNC_PURGE_LATCH:
+ case SYNC_PURGE_QUEUE:
+ case SYNC_DICT_OPERATION:
+ case SYNC_DICT_HEADER:
+ case SYNC_TRX_I_S_RWLOCK:
+ case SYNC_IBUF_MUTEX:
+ case SYNC_INDEX_ONLINE_LOG:
+ case SYNC_STATS_AUTO_RECALC:
+ case SYNC_POOL:
+ case SYNC_POOL_MANAGER:
+ basic_check(latches, level, level);
+ break;
+
+ case SYNC_ANY_LATCH:
+
+ /* Temporary workaround for LATCH_ID_RTR_*_MUTEX */
+ if (is_rtr_mutex(latch)) {
+
+ const Latched* latched = less(latches, level);
+
+ if (latched == NULL
+ || (latched != NULL
+ && is_rtr_mutex(latched->m_latch))) {
+
+ /* No violation */
+ break;
+
+ }
+
+ crash(latches, latched, level);
+
+ } else {
+ basic_check(latches, level, level);
+ }
+
+ break;
+
+ case SYNC_TRX:
+
+ /* Either the thread must own the lock_sys.mutex, or
+ it is allowed to own only ONE trx_t::mutex. */
+
+ if (less(latches, level) != NULL) {
+ basic_check(latches, level, level - 1);
+ ut_a(find(latches, SYNC_LOCK_SYS) != 0);
+ }
+ break;
+
+ case SYNC_IBUF_BITMAP:
+
+ /* Either the thread must own the master mutex to all
+ the bitmap pages, or it is allowed to latch only ONE
+ bitmap page. */
+
+ if (find(latches, SYNC_IBUF_BITMAP_MUTEX) != 0) {
+
+ basic_check(latches, level, SYNC_IBUF_BITMAP - 1);
+
+ } else if (!srv_is_being_started) {
+
+ /* This is violated during trx_sys_create_rsegs()
+ when creating additional rollback segments during
+ upgrade. */
+
+ basic_check(latches, level, SYNC_IBUF_BITMAP);
+ }
+ break;
+
+ case SYNC_FSP_PAGE:
+ ut_a(find(latches, SYNC_FSP) != 0);
+ break;
+
+ case SYNC_FSP:
+
+ ut_a(find(latches, SYNC_FSP) != 0
+ || basic_check(latches, level, SYNC_FSP));
+ break;
+
+ case SYNC_TRX_UNDO_PAGE:
+
+ /* Purge is allowed to read in as many UNDO pages as it likes.
+ The purge thread can read the UNDO pages without any covering
+ mutex. */
+
+ ut_a(find(latches, SYNC_REDO_RSEG) != 0
+ || find(latches, SYNC_NOREDO_RSEG) != 0
+ || basic_check(latches, level, level - 1));
+ break;
+
+ case SYNC_RSEG_HEADER:
+
+ ut_a(find(latches, SYNC_REDO_RSEG) != 0
+ || find(latches, SYNC_NOREDO_RSEG) != 0);
+ break;
+
+ case SYNC_RSEG_HEADER_NEW:
+
+ ut_a(find(latches, SYNC_FSP_PAGE) != 0);
+ break;
+
+ case SYNC_TREE_NODE:
+
+ ut_a(find(latches, SYNC_FSP) == &fil_system.temp_space->latch
+ || find(latches, SYNC_INDEX_TREE)
+ || find(latches, SYNC_DICT_OPERATION)
+ || basic_check(latches, level, SYNC_TREE_NODE - 1));
+ break;
+
+ case SYNC_TREE_NODE_NEW:
+
+ ut_a(find(latches, SYNC_FSP_PAGE) != 0);
+ break;
+
+ case SYNC_INDEX_TREE:
+
+ basic_check(latches, level, SYNC_TREE_NODE - 1);
+ break;
+
+ case SYNC_IBUF_TREE_NODE:
+
+ ut_a(find(latches, SYNC_IBUF_INDEX_TREE) != 0
+ || basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1));
+ break;
+
+ case SYNC_IBUF_TREE_NODE_NEW:
+
+ /* ibuf_add_free_page() allocates new pages for the change
+ buffer while only holding the tablespace x-latch. These
+ pre-allocated new pages may only be used while holding
+ ibuf_mutex, in btr_page_alloc_for_ibuf(). */
+
+ ut_a(find(latches, SYNC_IBUF_MUTEX) != 0
+ || find(latches, SYNC_FSP) != 0);
+ break;
+
+ case SYNC_IBUF_INDEX_TREE:
+
+ if (find(latches, SYNC_FSP) != 0) {
+ basic_check(latches, level, level - 1);
+ } else {
+ basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1);
+ }
+ break;
+
+ case SYNC_IBUF_PESS_INSERT_MUTEX:
+
+ basic_check(latches, level, SYNC_FSP - 1);
+ ut_a(find(latches, SYNC_IBUF_MUTEX) == 0);
+ break;
+
+ case SYNC_IBUF_HEADER:
+
+ basic_check(latches, level, SYNC_FSP - 1);
+ ut_a(find(latches, SYNC_IBUF_MUTEX) == NULL);
+ ut_a(find(latches, SYNC_IBUF_PESS_INSERT_MUTEX) == NULL);
+ break;
+
+ case SYNC_DICT:
+ basic_check(latches, level, SYNC_DICT);
+ break;
+
+ case SYNC_MUTEX:
+ case SYNC_UNKNOWN:
+ case SYNC_LEVEL_VARYING:
+ case RW_LOCK_X:
+ case RW_LOCK_X_WAIT:
+ case RW_LOCK_S:
+ case RW_LOCK_SX:
+ case RW_LOCK_NOT_LOCKED:
+ /* These levels should never be set for a latch. */
+ ut_error;
+ break;
+ }
+
+ return(latches);
+}
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in] latch that was released/unlocked
+@param[in] level level of the latch
+@return true if found in the array; it is not an error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+void
+LatchDebug::unlock(const latch_t* latch)
+ UNIV_NOTHROW
+{
+ if (latch->get_level() == SYNC_LEVEL_VARYING) {
+ // We don't have varying level mutexes
+ ut_ad(latch->m_rw_lock);
+ }
+
+ Latches* latches;
+
+ if (*latch->get_name() == '.') {
+
+ /* Ignore diagnostic latches, starting with '.' */
+
+ } else if ((latches = thread_latches()) != NULL) {
+
+ Latches::reverse_iterator rend = latches->rend();
+
+ for (Latches::reverse_iterator it = latches->rbegin();
+ it != rend;
+ ++it) {
+
+ if (it->m_latch != latch) {
+
+ continue;
+ }
+
+ Latches::iterator i = it.base();
+
+ latches->erase(--i);
+
+ /* If this thread doesn't own any more
+ latches remove from the map.
+
+ FIXME: Perhaps use the master thread
+ to do purge. Or, do it from close connection.
+ This could be expensive. */
+
+ if (latches->empty()) {
+
+ m_mutex.enter();
+
+ os_thread_id_t thread_id;
+
+ thread_id = os_thread_get_curr_id();
+
+ m_threads.erase(thread_id);
+
+ m_mutex.exit();
+
+ UT_DELETE(latches);
+ }
+
+ return;
+ }
+
+ if (latch->get_level() != SYNC_LEVEL_VARYING) {
+ ib::error()
+ << "Couldn't find latch "
+ << sync_latch_get_name(latch->get_id());
+
+ print_latches(latches);
+
+ /** Must find the latch. */
+ ut_error;
+ }
+ }
+}
+
+/** Get the latch id from a latch name.
+@param[in] name Latch name
+@return latch id if found else LATCH_ID_NONE. */
+latch_id_t
+sync_latch_get_id(const char* name)
+{
+ LatchMetaData::const_iterator end = latch_meta.end();
+
+ /* Linear scan should be OK, this should be extremely rare. */
+
+ for (LatchMetaData::const_iterator it = latch_meta.begin();
+ it != end;
+ ++it) {
+
+ if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) {
+
+ continue;
+
+ } else if (strcmp((*it)->get_name(), name) == 0) {
+
+ return((*it)->get_id());
+ }
+ }
+
+ return(LATCH_ID_NONE);
+}
+
+/** Get the latch name from a sync level
+@param[in] level Latch level to lookup
+@return NULL if not found. */
+const char*
+sync_latch_get_name(latch_level_t level)
+{
+ LatchMetaData::const_iterator end = latch_meta.end();
+
+ /* Linear scan should be OK, this should be extremely rare. */
+
+ for (LatchMetaData::const_iterator it = latch_meta.begin();
+ it != end;
+ ++it) {
+
+ if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) {
+
+ continue;
+
+ } else if ((*it)->get_level() == level) {
+
+ return((*it)->get_name());
+ }
+ }
+
+ return(0);
+}
+
+/** Check if it is OK to acquire the latch.
+@param[in] latch latch type */
+void
+sync_check_lock_validate(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->lock_validate(
+ latch, latch->get_level());
+ }
+}
+
+/** Note that the lock has been granted
+@param[in] latch latch type */
+void
+sync_check_lock_granted(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->lock_granted(latch, latch->get_level());
+ }
+}
+
+/** Check if it is OK to acquire the latch.
+@param[in] latch latch type
+@param[in] level Latch level */
+void
+sync_check_lock(
+ const latch_t* latch,
+ latch_level_t level)
+{
+ if (LatchDebug::instance() != NULL) {
+
+ ut_ad(latch->get_level() == SYNC_LEVEL_VARYING);
+ ut_ad(latch->get_id() == LATCH_ID_BUF_BLOCK_LOCK);
+
+ LatchDebug::instance()->lock_validate(latch, level);
+ LatchDebug::instance()->lock_granted(latch, level);
+ }
+}
+
+/** Check if it is OK to re-acquire the lock.
+@param[in] latch RW-LOCK to relock (recursive X locks) */
+void
+sync_check_relock(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->relock(latch);
+ }
+}
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in] latch The latch to unlock */
+void
+sync_check_unlock(const latch_t* latch)
+{
+ if (LatchDebug::instance() != NULL) {
+ LatchDebug::instance()->unlock(latch);
+ }
+}
+
+/** Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@param[in] level to find
+@return a matching latch, or NULL if not found */
+const latch_t*
+sync_check_find(latch_level_t level)
+{
+ if (LatchDebug::instance() != NULL) {
+ return(LatchDebug::instance()->find(level));
+ }
+
+ return(NULL);
+}
+
+/** Iterate over the thread's latches.
+@param[in,out] functor called for each element.
+@return true if the functor returns true for any element */
+bool
+sync_check_iterate(const sync_check_functor_t& functor)
+{
+ if (LatchDebug* debug = LatchDebug::instance()) {
+ return(debug->for_each(functor));
+ }
+
+ return(false);
+}
+
+/** Enable sync order checking.
+
+Note: We don't enforce any synchronisation checks. The caller must ensure
+that no races can occur */
+static void sync_check_enable()
+{
+ if (!srv_sync_debug) {
+
+ return;
+ }
+
+ /* We should always call this before we create threads. */
+
+ LatchDebug::create_instance();
+}
+
+/** Initialise the debug data structures */
+void
+LatchDebug::init()
+ UNIV_NOTHROW
+{
+ mutex_create(LATCH_ID_RW_LOCK_DEBUG, &rw_lock_debug_mutex);
+}
+
+/** Shutdown the latch debug checking
+
+Note: We don't enforce any synchronisation checks. The caller must ensure
+that no races can occur */
+void
+LatchDebug::shutdown()
+ UNIV_NOTHROW
+{
+ mutex_free(&rw_lock_debug_mutex);
+
+ ut_a(s_initialized);
+
+ s_initialized = false;
+
+ UT_DELETE(s_instance);
+
+ LatchDebug::s_instance = NULL;
+}
+
+/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+void
+rw_lock_debug_mutex_enter()
+{
+ mutex_enter(&rw_lock_debug_mutex);
+}
+
+/** Releases the debug mutex. */
+void
+rw_lock_debug_mutex_exit()
+{
+ mutex_exit(&rw_lock_debug_mutex);
+}
+#endif /* UNIV_DEBUG */
+
+/* Meta data for all the InnoDB latches. If the latch is not in recorded
+here then it will be be considered for deadlock checks. */
+LatchMetaData latch_meta;
+
+/** Load the latch meta data. */
+static
+void
+sync_latch_meta_init()
+ UNIV_NOTHROW
+{
+ latch_meta.resize(LATCH_ID_MAX + 1);
+
+ /* The latches should be ordered on latch_id_t. So that we can
+ index directly into the vector to update and fetch meta-data. */
+
+ LATCH_ADD_MUTEX(DICT_FOREIGN_ERR, SYNC_NO_ORDER_CHECK,
+ dict_foreign_err_mutex_key);
+
+ LATCH_ADD_MUTEX(DICT_SYS, SYNC_DICT, dict_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(FIL_SYSTEM, SYNC_ANY_LATCH, fil_system_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_DELETE, SYNC_FTS_OPTIMIZE, fts_delete_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_DOC_ID, SYNC_FTS_OPTIMIZE, fts_doc_id_mutex_key);
+
+ LATCH_ADD_MUTEX(FTS_PLL_TOKENIZE, SYNC_FTS_TOKENIZE,
+ fts_pll_tokenize_mutex_key);
+
+ LATCH_ADD_MUTEX(IBUF_BITMAP, SYNC_IBUF_BITMAP_MUTEX,
+ ibuf_bitmap_mutex_key);
+
+ LATCH_ADD_MUTEX(IBUF, SYNC_IBUF_MUTEX, ibuf_mutex_key);
+
+ LATCH_ADD_MUTEX(IBUF_PESSIMISTIC_INSERT, SYNC_IBUF_PESS_INSERT_MUTEX,
+ ibuf_pessimistic_insert_mutex_key);
+
+ LATCH_ADD_MUTEX(PURGE_SYS_PQ, SYNC_PURGE_QUEUE,
+ purge_sys_pq_mutex_key);
+
+ LATCH_ADD_MUTEX(RECALC_POOL, SYNC_STATS_AUTO_RECALC,
+ recalc_pool_mutex_key);
+
+ LATCH_ADD_MUTEX(RECV_SYS, SYNC_RECV, recv_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(REDO_RSEG, SYNC_REDO_RSEG, redo_rseg_mutex_key);
+
+ LATCH_ADD_MUTEX(NOREDO_RSEG, SYNC_NOREDO_RSEG, noredo_rseg_mutex_key);
+
+#ifdef UNIV_DEBUG
+ /* Mutex names starting with '.' are not tracked. They are assumed
+ to be diagnostic mutexes used in debugging. */
+ latch_meta[LATCH_ID_RW_LOCK_DEBUG] =
+ LATCH_ADD_MUTEX(RW_LOCK_DEBUG,
+ SYNC_NO_ORDER_CHECK,
+ rw_lock_debug_mutex_key);
+#endif /* UNIV_DEBUG */
+
+ LATCH_ADD_MUTEX(RTR_ACTIVE_MUTEX, SYNC_ANY_LATCH,
+ rtr_active_mutex_key);
+
+ LATCH_ADD_MUTEX(RTR_MATCH_MUTEX, SYNC_ANY_LATCH, rtr_match_mutex_key);
+
+ LATCH_ADD_MUTEX(RTR_PATH_MUTEX, SYNC_ANY_LATCH, rtr_path_mutex_key);
+
+ LATCH_ADD_MUTEX(RW_LOCK_LIST, SYNC_NO_ORDER_CHECK,
+ rw_lock_list_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_INNODB_MONITOR, SYNC_NO_ORDER_CHECK,
+ srv_innodb_monitor_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_MISC_TMPFILE, SYNC_ANY_LATCH,
+ srv_misc_tmpfile_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_MONITOR_FILE, SYNC_NO_ORDER_CHECK,
+ srv_monitor_file_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX_POOL, SYNC_POOL, trx_pool_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX_POOL_MANAGER, SYNC_POOL_MANAGER,
+ trx_pool_manager_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX, SYNC_TRX, trx_mutex_key);
+
+ LATCH_ADD_MUTEX(LOCK_SYS, SYNC_LOCK_SYS, lock_mutex_key);
+
+ LATCH_ADD_MUTEX(LOCK_SYS_WAIT, SYNC_LOCK_WAIT_SYS,
+ lock_wait_mutex_key);
+
+ LATCH_ADD_MUTEX(TRX_SYS, SYNC_TRX_SYS, trx_sys_mutex_key);
+
+ LATCH_ADD_MUTEX(SRV_SYS_TASKS, SYNC_ANY_LATCH, srv_threads_mutex_key);
+
+ LATCH_ADD_MUTEX(PAGE_ZIP_STAT_PER_INDEX, SYNC_ANY_LATCH,
+ page_zip_stat_per_index_mutex_key);
+
+ LATCH_ADD_MUTEX(SYNC_ARRAY_MUTEX, SYNC_NO_ORDER_CHECK,
+ sync_array_mutex_key);
+
+ LATCH_ADD_MUTEX(ROW_DROP_LIST, SYNC_NO_ORDER_CHECK,
+ row_drop_list_mutex_key);
+
+ LATCH_ADD_MUTEX(INDEX_ONLINE_LOG, SYNC_INDEX_ONLINE_LOG,
+ index_online_log_key);
+
+ LATCH_ADD_MUTEX(WORK_QUEUE, SYNC_WORK_QUEUE, PFS_NOT_INSTRUMENTED);
+
+ // Add the RW locks
+ LATCH_ADD_RWLOCK(BTR_SEARCH, SYNC_SEARCH_SYS, btr_search_latch_key);
+
+ LATCH_ADD_RWLOCK(BUF_BLOCK_LOCK, SYNC_LEVEL_VARYING,
+ PFS_NOT_INSTRUMENTED);
+
+#ifdef UNIV_DEBUG
+ LATCH_ADD_RWLOCK(BUF_BLOCK_DEBUG, SYNC_LEVEL_VARYING,
+ PFS_NOT_INSTRUMENTED);
+#endif /* UNIV_DEBUG */
+
+ LATCH_ADD_RWLOCK(DICT_OPERATION, SYNC_DICT_OPERATION,
+ dict_operation_lock_key);
+
+ LATCH_ADD_RWLOCK(FIL_SPACE, SYNC_FSP, fil_space_latch_key);
+
+ LATCH_ADD_RWLOCK(FTS_CACHE, SYNC_FTS_CACHE, fts_cache_rw_lock_key);
+
+ LATCH_ADD_RWLOCK(FTS_CACHE_INIT, SYNC_FTS_CACHE_INIT,
+ fts_cache_init_rw_lock_key);
+
+ LATCH_ADD_RWLOCK(TRX_I_S_CACHE, SYNC_TRX_I_S_RWLOCK,
+ trx_i_s_cache_lock_key);
+
+ LATCH_ADD_RWLOCK(TRX_PURGE, SYNC_PURGE_LATCH, trx_purge_latch_key);
+
+ LATCH_ADD_RWLOCK(IBUF_INDEX_TREE, SYNC_IBUF_INDEX_TREE,
+ index_tree_rw_lock_key);
+
+ LATCH_ADD_RWLOCK(INDEX_TREE, SYNC_INDEX_TREE, index_tree_rw_lock_key);
+
+ /* JAN: TODO: Add PFS instrumentation */
+ LATCH_ADD_MUTEX(DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(BTR_DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(FIL_CRYPT_STAT_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(FIL_CRYPT_DATA_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(FIL_CRYPT_THREADS_MUTEX, SYNC_NO_ORDER_CHECK,
+ PFS_NOT_INSTRUMENTED);
+ LATCH_ADD_MUTEX(RW_TRX_HASH_ELEMENT, SYNC_RW_TRX_HASH_ELEMENT,
+ rw_trx_hash_element_mutex_key);
+ LATCH_ADD_MUTEX(READ_VIEW, SYNC_READ_VIEW, read_view_mutex_key);
+
+ latch_id_t id = LATCH_ID_NONE;
+
+ /* The array should be ordered on latch ID.We need to
+ index directly into it from the mutex policy to update
+ the counters and access the meta-data. */
+
+ for (LatchMetaData::iterator it = latch_meta.begin();
+ it != latch_meta.end();
+ ++it) {
+
+ const latch_meta_t* meta = *it;
+
+
+ /* Skip blank entries */
+ if (meta == NULL || meta->get_id() == LATCH_ID_NONE) {
+ continue;
+ }
+
+ ut_a(id < meta->get_id());
+
+ id = meta->get_id();
+ }
+}
+
+/** Destroy the latch meta data */
+static
+void
+sync_latch_meta_destroy()
+{
+ for (LatchMetaData::iterator it = latch_meta.begin();
+ it != latch_meta.end();
+ ++it) {
+
+ UT_DELETE(*it);
+ }
+
+ latch_meta.clear();
+}
+
+/** Initializes the synchronization data structures. */
+void
+sync_check_init()
+{
+ ut_ad(!LatchDebug::s_initialized);
+ ut_d(LatchDebug::s_initialized = true);
+
+ sync_latch_meta_init();
+
+ /* create the mutex to protect rw_lock list. */
+
+ mutex_create(LATCH_ID_RW_LOCK_LIST, &rw_lock_list_mutex);
+
+ ut_d(LatchDebug::init());
+
+ sync_array_init();
+
+ ut_d(sync_check_enable());
+}
+
+/** Free the InnoDB synchronization data structures. */
+void
+sync_check_close()
+{
+ ut_d(LatchDebug::shutdown());
+
+ mutex_free(&rw_lock_list_mutex);
+
+ sync_array_close();
+
+ sync_latch_meta_destroy();
+}
+
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
new file mode 100644
index 00000000..2624ffb9
--- /dev/null
+++ b/storage/innobase/sync/sync0rw.cc
@@ -0,0 +1,1216 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+Copyright (c) 2017, 2020, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0rw.cc
+The read-write lock (for thread synchronization)
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0rw.h"
+#include "my_cpu.h"
+#include <my_sys.h>
+
+/*
+ IMPLEMENTATION OF THE RW_LOCK
+ =============================
+The status of a rw_lock is held in lock_word. The initial value of lock_word is
+X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
+or 1 for each x-lock. This describes the lock state for each value of lock_word:
+
+lock_word == X_LOCK_DECR: Unlocked.
+X_LOCK_HALF_DECR < lock_word < X_LOCK_DECR:
+ S locked, no waiting writers.
+ (X_LOCK_DECR - lock_word) is the number
+ of S locks.
+lock_word == X_LOCK_HALF_DECR: SX locked, no waiting writers.
+0 < lock_word < X_LOCK_HALF_DECR:
+ SX locked AND S locked, no waiting writers.
+ (X_LOCK_HALF_DECR - lock_word) is the number
+ of S locks.
+lock_word == 0: X locked, no waiting writers.
+-X_LOCK_HALF_DECR < lock_word < 0:
+ S locked, with a waiting writer.
+ (-lock_word) is the number of S locks.
+lock_word == -X_LOCK_HALF_DECR: X locked and SX locked, no waiting writers.
+-X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR:
+ S locked, with a waiting writer
+ which has SX lock.
+ -(lock_word + X_LOCK_HALF_DECR) is the number
+ of S locks.
+lock_word == -X_LOCK_DECR: X locked with recursive X lock (2 X locks).
+-(X_LOCK_DECR + X_LOCK_HALF_DECR) < lock_word < -X_LOCK_DECR:
+ X locked. The number of the X locks is:
+ 2 - (lock_word + X_LOCK_DECR)
+lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR):
+ X locked with recursive X lock (2 X locks)
+ and SX locked.
+lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR):
+ X locked and SX locked.
+ The number of the X locks is:
+ 2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
+
+ LOCK COMPATIBILITY MATRIX
+
+ | S|SX| X|
+ --+--+--+--+
+ S| +| +| -|
+ --+--+--+--+
+ SX| +| -| -|
+ --+--+--+--+
+ X| -| -| -|
+ --+--+--+--+
+
+The lock_word is always read and updated atomically and consistently, so that
+it always represents the state of the lock, and the state of the lock changes
+with a single atomic operation. This lock_word holds all of the information
+that a thread needs in order to determine if it is eligible to gain the lock
+or if it must spin or sleep. The one exception to this is that writer_thread
+must be verified before recursive write locks: to solve this scenario, we make
+writer_thread readable by all threads, but only writeable by the x-lock or
+sx-lock holder.
+
+The other members of the lock obey the following rules to remain consistent:
+
+writer_thread: Is used only in recursive x-locking or sx-locking.
+ This field is 0 at lock creation time and is updated
+ when x-lock is acquired or when move_ownership is called.
+ A thread is only allowed to set the value of this field to
+ it's thread_id i.e.: a thread cannot set writer_thread to
+ some other thread's id.
+waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
+ signals, it should only be set to 1 when there are threads
+ waiting on event. Must be 1 when a writer starts waiting to
+ ensure the current x-locking thread sends a wake-up signal
+ during unlock. May only be reset to 0 immediately before a
+ a wake-up signal is sent to event. On most platforms, a
+ memory barrier is required after waiters is set, and before
+ verifying lock_word is still held, to ensure some unlocker
+ really does see the flags new value.
+event: Threads wait on event for read or writer lock when another
+ thread has an x-lock or an x-lock reservation (wait_ex). A
+ thread may only wait on event after performing the following
+ actions in order:
+ (1) Record the counter value of event (with os_event_reset).
+ (2) Set waiters to 1.
+ (3) Verify lock_word <= 0.
+ (1) must come before (2) to ensure signal is not missed.
+ (2) must come before (3) to ensure a signal is sent.
+ These restrictions force the above ordering.
+ Immediately before sending the wake-up signal, we should:
+ (1) Verify lock_word == X_LOCK_DECR (unlocked)
+ (2) Reset waiters to 0.
+wait_ex_event: A thread may only wait on the wait_ex_event after it has
+ performed the following actions in order:
+ (1) Decrement lock_word by X_LOCK_DECR.
+ (2) Record counter value of wait_ex_event (os_event_reset,
+ called from sync_array_reserve_cell).
+ (3) Verify that lock_word < 0.
+ (1) must come first to ensures no other threads become reader
+ or next writer, and notifies unlocker that signal must be sent.
+ (2) must come before (3) to ensure the signal is not missed.
+ These restrictions force the above ordering.
+ Immediately before sending the wake-up signal, we should:
+ Verify lock_word == 0 (waiting thread holds x_lock)
+*/
+
+rw_lock_stats_t rw_lock_stats;
+
+/* The global list of rw-locks */
+ilist<rw_lock_t> rw_lock_list;
+ib_mutex_t rw_lock_list_mutex;
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Creates a debug info struct. */
+static
+rw_lock_debug_t*
+rw_lock_debug_create(void);
+/*======================*/
+/******************************************************************//**
+Frees a debug info struct. */
+static
+void
+rw_lock_debug_free(
+/*===============*/
+ rw_lock_debug_t* info);
+
+/******************************************************************//**
+Creates a debug info struct.
+@return own: debug info struct */
+static
+rw_lock_debug_t*
+rw_lock_debug_create(void)
+/*======================*/
+{
+ return((rw_lock_debug_t*) ut_malloc_nokey(sizeof(rw_lock_debug_t)));
+}
+
+/******************************************************************//**
+Frees a debug info struct. */
+static
+void
+rw_lock_debug_free(
+/*===============*/
+ rw_lock_debug_t* info)
+{
+ ut_free(info);
+}
+#endif /* UNIV_DEBUG */
+
+/******************************************************************//**
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+void
+rw_lock_create_func(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to memory */
+#ifdef UNIV_DEBUG
+ latch_level_t level, /*!< in: level */
+#endif /* UNIV_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ unsigned cline) /*!< in: file line where created */
+{
+#if defined(UNIV_DEBUG) && !defined(UNIV_PFS_RWLOCK)
+ /* It should have been created in pfs_rw_lock_create_func() */
+ new(lock) rw_lock_t();
+#endif /* UNIV_DEBUG */
+
+ lock->lock_word = X_LOCK_DECR;
+ lock->waiters = 0;
+
+ lock->sx_recursive = 0;
+ lock->writer_thread= 0;
+
+#ifdef UNIV_DEBUG
+ lock->m_rw_lock = true;
+
+ UT_LIST_INIT(lock->debug_list, &rw_lock_debug_t::list);
+
+ lock->m_id = sync_latch_get_id(sync_latch_get_name(level));
+ ut_a(lock->m_id != LATCH_ID_NONE);
+
+ lock->level = level;
+#endif /* UNIV_DEBUG */
+
+ lock->cfile_name = cfile_name;
+
+ /* This should hold in practice. If it doesn't then we need to
+ split the source file anyway. Or create the locks on lines
+ less than 8192. cline is unsigned:13. */
+ ut_ad(cline <= ((1U << 13) - 1));
+ lock->cline = cline & ((1U << 13) - 1);
+ lock->count_os_wait = 0;
+ lock->last_x_file_name = "not yet reserved";
+ lock->last_x_line = 0;
+ lock->event = os_event_create(0);
+ lock->wait_ex_event = os_event_create(0);
+
+ lock->is_block_lock = 0;
+
+ ut_d(lock->created = true);
+
+ mutex_enter(&rw_lock_list_mutex);
+ rw_lock_list.push_front(*lock);
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+/******************************************************************//**
+Calling this function is obligatory only if the memory buffer containing
+the rw-lock is freed. Removes an rw-lock object from the global list. The
+rw-lock is checked to be in the non-locked state. */
+void
+rw_lock_free_func(
+/*==============*/
+ rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ ut_ad(rw_lock_validate(lock));
+ ut_a(lock->lock_word == X_LOCK_DECR);
+
+ ut_d(lock->created = false);
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ os_event_destroy(lock->event);
+
+ os_event_destroy(lock->wait_ex_event);
+
+ rw_lock_list.remove(*lock);
+
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+/******************************************************************//**
+Lock an rw-lock in shared mode for the current thread. If the rw-lock is
+locked in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
+for the lock, before suspending the thread. */
+void
+rw_lock_s_lock_spin(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock
+ will be passed to another thread to unlock */
+ const char* file_name, /*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+{
+ ulint i = 0; /* spin round count */
+ sync_array_t* sync_arr;
+ lint spin_count = 0;
+ int64_t count_os_wait = 0;
+
+ /* We reuse the thread id to index into the counter, cache
+ it here for efficiency. */
+
+ ut_ad(rw_lock_validate(lock));
+
+ rw_lock_stats.rw_s_spin_wait_count.inc();
+
+lock_loop:
+
+ /* Spin waiting for the writer field to become free */
+ HMT_low();
+ ulint j = i;
+ while (i < srv_n_spin_wait_rounds &&
+ lock->lock_word <= 0) {
+ ut_delay(srv_spin_wait_delay);
+ i++;
+ }
+
+ HMT_medium();
+ if (i >= srv_n_spin_wait_rounds) {
+ os_thread_yield();
+ }
+
+ spin_count += lint(i - j);
+
+ /* We try once again to obtain the lock */
+ if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_s_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_s_spin_round_count.add(spin_count);
+
+ return; /* Success */
+ } else {
+
+ if (i < srv_n_spin_wait_rounds) {
+ goto lock_loop;
+ }
+
+
+ ++count_os_wait;
+
+ sync_cell_t* cell;
+
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_S, file_name, line, &cell);
+
+ /* Set waiters before checking lock_word to ensure wake-up
+ signal is sent. This may lead to some unnecessary signals. */
+ lock->waiters.exchange(1, std::memory_order_acquire);
+
+ if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+ sync_array_free_cell(sync_arr, cell);
+
+ if (count_os_wait > 0) {
+
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+
+ rw_lock_stats.rw_s_os_wait_count.add(
+ count_os_wait);
+ }
+
+ rw_lock_stats.rw_s_spin_round_count.add(spin_count);
+
+ return; /* Success */
+ }
+
+ /* see comments in trx_commit_low() to
+ before_trx_state_committed_in_memory explaining
+ this care to invoke the following sync check.*/
+#ifndef DBUG_OFF
+#ifdef UNIV_DEBUG
+ if (lock->get_level() != SYNC_DICT_OPERATION) {
+ DEBUG_SYNC_C("rw_s_lock_waiting");
+ }
+#endif
+#endif
+ sync_array_wait_event(sync_arr, cell);
+
+ i = 0;
+
+ goto lock_loop;
+ }
+}
+
+/******************************************************************//**
+This function is used in the insert buffer to move the ownership of an
+x-latch on a buffer frame to the current thread. The x-latch was set by
+the buffer read operation and it protected the buffer frame while the
+read was done. The ownership is moved because we want that the current
+thread is able to acquire a second x-latch which is stored in an mtr.
+This, in turn, is needed to pass the debug checks of index page
+operations. */
+void
+rw_lock_x_lock_move_ownership(
+/*==========================*/
+ rw_lock_t* lock) /*!< in: lock which was x-locked in the
+ buffer read */
+{
+ ut_ad(rw_lock_is_locked(lock, RW_LOCK_X));
+
+ lock->writer_thread = os_thread_get_curr_id();
+}
+
+/******************************************************************//**
+Function for the next writer to call. Waits for readers to exit.
+The caller must have already decremented lock_word by X_LOCK_DECR. */
+UNIV_INLINE
+void
+rw_lock_x_lock_wait_func(
+/*=====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+#ifdef UNIV_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+#endif
+ lint threshold,/*!< in: threshold to wait for */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+{
+ ulint i = 0;
+ lint n_spins = 0;
+ sync_array_t* sync_arr;
+ int64_t count_os_wait = 0;
+
+ ut_ad(lock->lock_word <= threshold);
+
+ HMT_low();
+ while (lock->lock_word < threshold) {
+ ut_delay(srv_spin_wait_delay);
+
+ if (i < srv_n_spin_wait_rounds) {
+ i++;
+ continue;
+ }
+
+ /* If there is still a reader, then go to sleep.*/
+ n_spins += i;
+
+ sync_cell_t* cell;
+
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_X_WAIT, file_name, line, &cell);
+
+ i = 0;
+
+ /* Check lock_word to ensure wake-up isn't missed.*/
+ if (lock->lock_word < threshold) {
+ ++count_os_wait;
+
+ /* Add debug info as it is needed to detect possible
+ deadlock. We must add info for WAIT_EX thread for
+ deadlock detection to work properly. */
+ ut_d(rw_lock_add_debug_info(
+ lock, pass, RW_LOCK_X_WAIT,
+ file_name, line));
+
+ sync_array_wait_event(sync_arr, cell);
+
+ ut_d(rw_lock_remove_debug_info(
+ lock, pass, RW_LOCK_X_WAIT));
+
+ /* It is possible to wake when lock_word < 0.
+ We must pass the while-loop check to proceed.*/
+
+ } else {
+ sync_array_free_cell(sync_arr, cell);
+ break;
+ }
+ }
+ HMT_medium();
+ rw_lock_stats.rw_x_spin_round_count.add(n_spins);
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait += static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+ }
+}
+
+#ifdef UNIV_DEBUG
+# define rw_lock_x_lock_wait(L, P, T, F, O) \
+ rw_lock_x_lock_wait_func(L, P, T, F, O)
+#else
+# define rw_lock_x_lock_wait(L, P, T, F, O) \
+ rw_lock_x_lock_wait_func(L, T, F, O)
+#endif /* UNIV_DBEUG */
+
+/******************************************************************//**
+Low-level function for acquiring an exclusive lock.
+@return FALSE if did not succeed, TRUE if success. */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_low(
+/*===============*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+{
+ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, X_LOCK_HALF_DECR)) {
+
+ /* As we are going to write our own thread id in that field it
+ must be that the current writer_thread value is not active. */
+ ut_a(!lock->writer_thread);
+
+ /* Decrement occurred: we are writer or next-writer. */
+ if (!pass)
+ {
+ lock->writer_thread = os_thread_get_curr_id();
+ }
+
+ rw_lock_x_lock_wait(lock, pass, 0, file_name, line);
+
+ } else {
+ os_thread_id_t thread_id = os_thread_get_curr_id();
+
+ /* Decrement failed: An X or SX lock is held by either
+ this thread or another. Try to relock. */
+ if (!pass && os_thread_eq(lock->writer_thread, thread_id)) {
+ /* Other s-locks can be allowed. If it is request x
+ recursively while holding sx lock, this x lock should
+ be along with the latching-order. */
+
+ /* The existing X or SX lock is from this thread */
+ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, 0)) {
+ /* There is at least one SX-lock from this
+ thread, but no X-lock. */
+
+ /* Wait for any the other S-locks to be
+ released. */
+ rw_lock_x_lock_wait(
+ lock, pass, -X_LOCK_HALF_DECR,
+ file_name, line);
+
+ } else {
+ int32_t lock_word = lock->lock_word;
+ /* At least one X lock by this thread already
+ exists. Add another. */
+ if (lock_word == 0
+ || lock_word == -X_LOCK_HALF_DECR) {
+ lock->lock_word.fetch_sub(X_LOCK_DECR);
+ } else {
+ ut_ad(lock_word <= -X_LOCK_DECR);
+ lock->lock_word.fetch_sub(1);
+ }
+ }
+
+ } else {
+ /* Another thread locked before us */
+ return(FALSE);
+ }
+ }
+
+ ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_X, file_name, line));
+
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line & ((1U << 14) - 1);
+
+ return(TRUE);
+}
+
+/******************************************************************//**
+Low-level function for acquiring an sx lock.
+@return FALSE if did not succeed, TRUE if success. */
+ibool
+rw_lock_sx_lock_low(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+{
+ if (rw_lock_lock_word_decr(lock, X_LOCK_HALF_DECR, X_LOCK_HALF_DECR)) {
+
+ /* As we are going to write our own thread id in that field it
+ must be that the current writer_thread value is not active. */
+ ut_a(!lock->writer_thread);
+
+ /* Decrement occurred: we are the SX lock owner. */
+ if (!pass)
+ {
+ lock->writer_thread = os_thread_get_curr_id();
+ }
+
+ lock->sx_recursive = 1;
+ } else {
+ os_thread_id_t thread_id = os_thread_get_curr_id();
+
+ /* Decrement failed: It already has an X or SX lock by this
+ thread or another thread. If it is this thread, relock,
+ else fail. */
+ if (!pass && os_thread_eq(lock->writer_thread, thread_id)) {
+ /* This thread owns an X or SX lock */
+ if (lock->sx_recursive++ == 0) {
+ /* This thread is making first SX-lock request
+ and it must be holding at least one X-lock here
+ because:
+
+ * There can't be a WAIT_EX thread because we are
+ the thread which has it's thread_id written in
+ the writer_thread field and we are not waiting.
+
+ * Any other X-lock thread cannot exist because
+ it must update recursive flag only after
+ updating the thread_id. Had there been
+ a concurrent X-locking thread which succeeded
+ in decrementing the lock_word it must have
+ written it's thread_id before setting the
+ recursive flag. As we cleared the if()
+ condition above therefore we must be the only
+ thread working on this lock and it is safe to
+ read and write to the lock_word. */
+
+#ifdef UNIV_DEBUG
+ auto lock_word =
+#endif
+ lock->lock_word.fetch_sub(X_LOCK_HALF_DECR,
+ std::memory_order_relaxed);
+
+ ut_ad((lock_word == 0)
+ || ((lock_word <= -X_LOCK_DECR)
+ && (lock_word
+ > -(X_LOCK_DECR
+ + X_LOCK_HALF_DECR))));
+ }
+ } else {
+ /* Another thread locked before us */
+ return(FALSE);
+ }
+ }
+
+ ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_SX, file_name, line));
+
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line & ((1U << 14) - 1);
+
+ return(TRUE);
+}
+
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread. If the rw-lock is locked
+in shared or exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
+for the lock before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single x-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+void
+rw_lock_x_lock_func(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+{
+ ulint i = 0;
+ sync_array_t* sync_arr;
+ lint spin_count = 0;
+ int64_t count_os_wait = 0;
+
+ ut_ad(rw_lock_validate(lock));
+ ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+ /* Locking succeeded */
+ return;
+ }
+ rw_lock_stats.rw_x_spin_wait_count.inc();
+
+lock_loop:
+
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_x_spin_round_count.add(spin_count);
+
+ /* Locking succeeded */
+ return;
+
+ } else {
+
+ /* Spin waiting for the lock_word to become free */
+ HMT_low();
+ ulint j = i;
+ while (i < srv_n_spin_wait_rounds
+ && lock->lock_word <= X_LOCK_HALF_DECR) {
+ ut_delay(srv_spin_wait_delay);
+ i++;
+ }
+
+ HMT_medium();
+ spin_count += lint(i - j);
+
+ if (i >= srv_n_spin_wait_rounds) {
+
+ os_thread_yield();
+
+ } else {
+
+ goto lock_loop;
+ }
+ }
+
+ sync_cell_t* cell;
+
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_X, file_name, line, &cell);
+
+ /* Waiters must be set before checking lock_word, to ensure signal
+ is sent. This could lead to a few unnecessary wake-up signals. */
+ lock->waiters.exchange(1, std::memory_order_acquire);
+
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+ sync_array_free_cell(sync_arr, cell);
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_x_spin_round_count.add(spin_count);
+
+ /* Locking succeeded */
+ return;
+ }
+
+ ++count_os_wait;
+
+ sync_array_wait_event(sync_arr, cell);
+
+ i = 0;
+
+ goto lock_loop;
+}
+
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in SX mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single sx-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+void
+rw_lock_sx_lock_func(
+/*=================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ unsigned line) /*!< in: line where requested */
+
+{
+ ulint i = 0;
+ sync_array_t* sync_arr;
+ lint spin_count = 0;
+ int64_t count_os_wait = 0;
+
+ ut_ad(rw_lock_validate(lock));
+ ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+
+ if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
+ /* Locking succeeded */
+ return;
+ }
+
+ rw_lock_stats.rw_sx_spin_wait_count.inc();
+
+lock_loop:
+
+ if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_sx_spin_round_count.add(spin_count);
+
+ /* Locking succeeded */
+ return;
+
+ } else {
+
+ /* Spin waiting for the lock_word to become free */
+ ulint j = i;
+ while (i < srv_n_spin_wait_rounds
+ && lock->lock_word <= X_LOCK_HALF_DECR) {
+ ut_delay(srv_spin_wait_delay);
+ i++;
+ }
+
+ spin_count += lint(i - j);
+
+ if (i >= srv_n_spin_wait_rounds) {
+
+ os_thread_yield();
+
+ } else {
+
+ goto lock_loop;
+ }
+ }
+
+ sync_cell_t* cell;
+
+ sync_arr = sync_array_get_and_reserve_cell(
+ lock, RW_LOCK_SX, file_name, line, &cell);
+
+ /* Waiters must be set before checking lock_word, to ensure signal
+ is sent. This could lead to a few unnecessary wake-up signals. */
+ lock->waiters.exchange(1, std::memory_order_acquire);
+
+ if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
+
+ sync_array_free_cell(sync_arr, cell);
+
+ if (count_os_wait > 0) {
+ lock->count_os_wait +=
+ static_cast<uint32_t>(count_os_wait);
+ rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait);
+ }
+
+ rw_lock_stats.rw_sx_spin_round_count.add(spin_count);
+
+ /* Locking succeeded */
+ return;
+ }
+
+ ++count_os_wait;
+
+ sync_array_wait_event(sync_arr, cell);
+
+ i = 0;
+
+ goto lock_loop;
+}
+
+#ifdef UNIV_DEBUG
+
+/******************************************************************//**
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks.
+@return true */
+bool
+rw_lock_validate(
+/*=============*/
+ const rw_lock_t* lock) /*!< in: rw-lock */
+{
+ ut_ad(lock);
+
+ ut_ad(lock->created);
+
+ int32_t lock_word = lock->lock_word;
+
+ ut_ad(lock->waiters < 2);
+ ut_ad(lock_word > -(2 * X_LOCK_DECR));
+ ut_ad(lock_word <= X_LOCK_DECR);
+
+ return(true);
+}
+
+/******************************************************************//**
+Checks if somebody has locked the rw-lock in the specified mode.
+@return true if locked */
+bool
+rw_lock_is_locked(
+/*==============*/
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_S,
+ RW_LOCK_X or RW_LOCK_SX */
+{
+ ut_ad(rw_lock_validate(lock));
+
+ switch (lock_type) {
+ case RW_LOCK_S:
+ return(rw_lock_get_reader_count(lock) > 0);
+
+ case RW_LOCK_X:
+ return(rw_lock_get_writer(lock) == RW_LOCK_X);
+
+ case RW_LOCK_SX:
+ return(rw_lock_get_sx_lock_count(lock) > 0);
+
+ default:
+ ut_error;
+ }
+ return(false); /* avoid compiler warnings */
+}
+
+/******************************************************************//**
+Inserts the debug information for an rw-lock. */
+void
+rw_lock_add_debug_info(
+/*===================*/
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint pass, /*!< in: pass value */
+ ulint lock_type, /*!< in: lock type */
+ const char* file_name, /*!< in: file where requested */
+ unsigned line) /*!< in: line where requested */
+{
+ ut_ad(file_name != NULL);
+
+ rw_lock_debug_t* info = rw_lock_debug_create();
+
+ rw_lock_debug_mutex_enter();
+
+ info->pass = pass;
+ info->line = line;
+ info->lock_type = lock_type;
+ info->file_name = file_name;
+ info->thread_id = os_thread_get_curr_id();
+
+ UT_LIST_ADD_FIRST(lock->debug_list, info);
+
+ rw_lock_debug_mutex_exit();
+
+ if (pass == 0 && lock_type != RW_LOCK_X_WAIT) {
+ int32_t lock_word = lock->lock_word;
+
+ /* Recursive x while holding SX
+ (lock_type == RW_LOCK_X && lock_word == -X_LOCK_HALF_DECR)
+ is treated as not-relock (new lock). */
+
+ if ((lock_type == RW_LOCK_X
+ && lock_word < -X_LOCK_HALF_DECR)
+ || (lock_type == RW_LOCK_SX
+ && (lock_word < 0 || lock->sx_recursive == 1))) {
+
+ sync_check_lock_validate(lock);
+ sync_check_lock_granted(lock);
+ } else {
+ sync_check_relock(lock);
+ }
+ }
+}
+
+/******************************************************************//**
+Removes a debug information struct for an rw-lock. */
+void
+rw_lock_remove_debug_info(
+/*======================*/
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint pass, /*!< in: pass value */
+ ulint lock_type) /*!< in: lock type */
+{
+ rw_lock_debug_t* info;
+
+ ut_ad(lock);
+
+ if (pass == 0 && lock_type != RW_LOCK_X_WAIT) {
+ sync_check_unlock(lock);
+ }
+
+ rw_lock_debug_mutex_enter();
+
+ for (info = UT_LIST_GET_FIRST(lock->debug_list);
+ info != 0;
+ info = UT_LIST_GET_NEXT(list, info)) {
+
+ if (pass == info->pass
+ && (pass != 0
+ || os_thread_eq(info->thread_id,
+ os_thread_get_curr_id()))
+ && info->lock_type == lock_type) {
+
+ /* Found! */
+ UT_LIST_REMOVE(lock->debug_list, info);
+
+ rw_lock_debug_mutex_exit();
+
+ rw_lock_debug_free(info);
+
+ return;
+ }
+ }
+
+ ut_error;
+}
+
+/******************************************************************//**
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0.
+@return TRUE if locked */
+bool
+rw_lock_own(
+/*========*/
+ const rw_lock_t*lock, /*!< in: rw-lock */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_S,
+ RW_LOCK_X */
+{
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
+ const os_thread_id_t thread_id = os_thread_get_curr_id();
+
+ if (!os_thread_eq(lock->writer_thread, thread_id)) {
+ } else if (lock_type == RW_LOCK_X && rw_lock_get_x_lock_count(lock)) {
+ return TRUE;
+ } else if (lock_type == RW_LOCK_SX && rw_lock_get_sx_lock_count(lock)) {
+ return TRUE;
+ }
+
+ rw_lock_debug_mutex_enter();
+
+ for (const rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
+
+ if (os_thread_eq(info->thread_id, thread_id)
+ && info->pass == 0
+ && info->lock_type == lock_type) {
+
+ rw_lock_debug_mutex_exit();
+ /* Found! */
+
+ return(true);
+ }
+ }
+ rw_lock_debug_mutex_exit();
+
+ return(false);
+}
+
+/** Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0.
+@param[in] lock rw-lock
+@param[in] flags specify lock types with OR of the
+ rw_lock_flag_t values
+@return true if locked */
+bool rw_lock_own_flagged(const rw_lock_t* lock, rw_lock_flags_t flags)
+{
+ ut_ad(rw_lock_validate(lock));
+
+ const os_thread_id_t thread_id = os_thread_get_curr_id();
+
+ if (!os_thread_eq(lock->writer_thread, thread_id)) {
+ } else if ((flags & RW_LOCK_FLAG_X)
+ && rw_lock_get_x_lock_count(lock)) {
+ return true;
+ } else if ((flags & RW_LOCK_FLAG_SX)
+ && rw_lock_get_sx_lock_count(lock)) {
+ return true;
+ }
+
+ rw_lock_debug_mutex_enter();
+
+ for (rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
+ if (!os_thread_eq(info->thread_id, thread_id)
+ || info->pass) {
+ continue;
+ }
+
+ switch (info->lock_type) {
+ case RW_LOCK_S:
+ if (!(flags & RW_LOCK_FLAG_S)) {
+ continue;
+ }
+ break;
+
+ case RW_LOCK_X:
+ if (!(flags & RW_LOCK_FLAG_X)) {
+ continue;
+ }
+ break;
+
+ case RW_LOCK_SX:
+ if (!(flags & RW_LOCK_FLAG_SX)) {
+ continue;
+ }
+ break;
+ }
+
+ rw_lock_debug_mutex_exit();
+ return true;
+ }
+
+ rw_lock_debug_mutex_exit();
+ return false;
+}
+
+/***************************************************************//**
+Prints debug info of currently locked rw-locks. */
+void
+rw_lock_list_print_info(
+/*====================*/
+ FILE* file) /*!< in: file where to print */
+{
+ ulint count = 0;
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ fputs("-------------\n"
+ "RW-LATCH INFO\n"
+ "-------------\n", file);
+
+ for (const rw_lock_t& lock : rw_lock_list) {
+
+ count++;
+
+ if (lock.lock_word != X_LOCK_DECR) {
+
+ fprintf(file, "RW-LOCK: %p ", (void*) &lock);
+
+ if (int32_t waiters= lock.waiters) {
+ fprintf(file, " (%d waiters)\n", waiters);
+ } else {
+ putc('\n', file);
+ }
+
+ rw_lock_debug_t* info;
+
+ rw_lock_debug_mutex_enter();
+
+ for (info = UT_LIST_GET_FIRST(lock.debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
+
+ rw_lock_debug_print(file, info);
+ }
+
+ rw_lock_debug_mutex_exit();
+ }
+ }
+
+ fprintf(file, "Total number of rw-locks " ULINTPF "\n", count);
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+/*********************************************************************//**
+Prints info of a debug struct. */
+void
+rw_lock_debug_print(
+/*================*/
+ FILE* f, /*!< in: output stream */
+ const rw_lock_debug_t* info) /*!< in: debug struct */
+{
+ ulint rwt = info->lock_type;
+
+ fprintf(f, "Locked: thread " ULINTPF " file %s line %u ",
+ ulint(info->thread_id),
+ sync_basename(info->file_name),
+ info->line);
+
+ switch (rwt) {
+ case RW_LOCK_S:
+ fputs("S-LOCK", f);
+ break;
+ case RW_LOCK_X:
+ fputs("X-LOCK", f);
+ break;
+ case RW_LOCK_SX:
+ fputs("SX-LOCK", f);
+ break;
+ case RW_LOCK_X_WAIT:
+ fputs("WAIT X-LOCK", f);
+ break;
+ default:
+ ut_error;
+ }
+
+ if (info->pass != 0) {
+ fprintf(f, " pass value %lu", (ulong) info->pass);
+ }
+
+ fprintf(f, "\n");
+}
+
+/** Print the rw-lock information.
+@return the string representation */
+std::string
+rw_lock_t::to_string() const
+{
+ /* Note: For X locks it can be locked form multiple places because
+ the same thread can call X lock recursively. */
+
+ std::ostringstream msg;
+ bool written = false;
+
+ ut_ad(rw_lock_validate(this));
+
+ msg << "RW-LATCH: "
+ << "thread id " << os_thread_get_curr_id()
+ << " addr: " << this
+ << " Locked from: ";
+
+ rw_lock_debug_mutex_enter();
+
+ for (rw_lock_debug_t* info = UT_LIST_GET_FIRST(debug_list);
+ info != NULL;
+ info = UT_LIST_GET_NEXT(list, info)) {
+ if (!os_thread_eq(info->thread_id, os_thread_get_curr_id())) {
+ continue;
+ }
+
+ if (written) {
+ msg << ", ";
+ }
+
+ written = true;
+
+ msg << info->file_name << ":" << info->line;
+ }
+
+ rw_lock_debug_mutex_exit();
+
+ return(msg.str());
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
new file mode 100644
index 00000000..0a6f8bfb
--- /dev/null
+++ b/storage/innobase/sync/sync0sync.cc
@@ -0,0 +1,246 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2020, MariaDB Corporation.
+Copyright (c) 2008, Google Inc.
+Copyright (c) 2020, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0sync.cc
+Mutex, the basic synchronization primitive
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0rw.h"
+#include "sync0sync.h"
+
+#ifdef UNIV_PFS_MUTEX
+mysql_pfs_key_t buf_pool_mutex_key;
+mysql_pfs_key_t dict_foreign_err_mutex_key;
+mysql_pfs_key_t dict_sys_mutex_key;
+mysql_pfs_key_t fil_system_mutex_key;
+mysql_pfs_key_t flush_list_mutex_key;
+mysql_pfs_key_t fts_delete_mutex_key;
+mysql_pfs_key_t fts_doc_id_mutex_key;
+mysql_pfs_key_t fts_pll_tokenize_mutex_key;
+mysql_pfs_key_t ibuf_bitmap_mutex_key;
+mysql_pfs_key_t ibuf_mutex_key;
+mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
+mysql_pfs_key_t log_sys_mutex_key;
+mysql_pfs_key_t log_cmdq_mutex_key;
+mysql_pfs_key_t log_flush_order_mutex_key;
+mysql_pfs_key_t recalc_pool_mutex_key;
+mysql_pfs_key_t purge_sys_pq_mutex_key;
+mysql_pfs_key_t recv_sys_mutex_key;
+mysql_pfs_key_t redo_rseg_mutex_key;
+mysql_pfs_key_t noredo_rseg_mutex_key;
+mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+# ifdef UNIV_DEBUG
+mysql_pfs_key_t rw_lock_debug_mutex_key;
+# endif /* UNIV_DEBUG */
+mysql_pfs_key_t rtr_active_mutex_key;
+mysql_pfs_key_t rtr_match_mutex_key;
+mysql_pfs_key_t rtr_path_mutex_key;
+mysql_pfs_key_t rw_lock_list_mutex_key;
+mysql_pfs_key_t srv_innodb_monitor_mutex_key;
+mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
+mysql_pfs_key_t srv_monitor_file_mutex_key;
+mysql_pfs_key_t buf_dblwr_mutex_key;
+mysql_pfs_key_t trx_mutex_key;
+mysql_pfs_key_t trx_pool_mutex_key;
+mysql_pfs_key_t trx_pool_manager_mutex_key;
+mysql_pfs_key_t lock_mutex_key;
+mysql_pfs_key_t lock_wait_mutex_key;
+mysql_pfs_key_t trx_sys_mutex_key;
+mysql_pfs_key_t srv_threads_mutex_key;
+mysql_pfs_key_t sync_array_mutex_key;
+mysql_pfs_key_t thread_mutex_key;
+mysql_pfs_key_t row_drop_list_mutex_key;
+mysql_pfs_key_t rw_trx_hash_element_mutex_key;
+mysql_pfs_key_t read_view_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+#ifdef UNIV_PFS_RWLOCK
+mysql_pfs_key_t btr_search_latch_key;
+mysql_pfs_key_t dict_operation_lock_key;
+mysql_pfs_key_t index_tree_rw_lock_key;
+mysql_pfs_key_t index_online_log_key;
+mysql_pfs_key_t fil_space_latch_key;
+mysql_pfs_key_t fts_cache_rw_lock_key;
+mysql_pfs_key_t fts_cache_init_rw_lock_key;
+mysql_pfs_key_t trx_i_s_cache_lock_key;
+mysql_pfs_key_t trx_purge_latch_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+/** For monitoring active mutexes */
+MutexMonitor mutex_monitor;
+
+/**
+Prints wait info of the sync system.
+@param file - where to print */
+static
+void
+sync_print_wait_info(FILE* file)
+{
+ fprintf(file,
+ "RW-shared spins " UINT64PF ", rounds " UINT64PF ","
+ " OS waits " UINT64PF "\n"
+ "RW-excl spins " UINT64PF ", rounds " UINT64PF ","
+ " OS waits " UINT64PF "\n"
+ "RW-sx spins " UINT64PF ", rounds " UINT64PF ","
+ " OS waits " UINT64PF "\n",
+ (ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_s_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_s_os_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_os_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_sx_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_sx_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_sx_os_wait_count);
+
+ fprintf(file,
+ "Spin rounds per wait: %.2f RW-shared,"
+ " %.2f RW-excl, %.2f RW-sx\n",
+ rw_lock_stats.rw_s_spin_wait_count
+ ? static_cast<double>(rw_lock_stats.rw_s_spin_round_count) /
+ static_cast<double>(rw_lock_stats.rw_s_spin_wait_count)
+ : static_cast<double>(rw_lock_stats.rw_s_spin_round_count),
+ rw_lock_stats.rw_x_spin_wait_count
+ ? static_cast<double>(rw_lock_stats.rw_x_spin_round_count) /
+ static_cast<double>(rw_lock_stats.rw_x_spin_wait_count)
+ : static_cast<double>(rw_lock_stats.rw_x_spin_round_count),
+ rw_lock_stats.rw_sx_spin_wait_count
+ ? static_cast<double>(rw_lock_stats.rw_sx_spin_round_count) /
+ static_cast<double>(rw_lock_stats.rw_sx_spin_wait_count)
+ : static_cast<double>(rw_lock_stats.rw_sx_spin_round_count));
+}
+
+/**
+Prints info of the sync system.
+@param file - where to print */
+void
+sync_print(FILE* file)
+{
+#ifdef UNIV_DEBUG
+ rw_lock_list_print_info(file);
+#endif /* UNIV_DEBUG */
+
+ sync_array_print(file);
+
+ sync_print_wait_info(file);
+}
+
+/** Print the filename "basename" e.g., p = "/a/b/c/d/e.cc" -> p = "e.cc"
+@param[in] filename Name from where to extract the basename
+@return the basename */
+const char*
+sync_basename(const char* filename)
+{
+ const char* ptr = filename + strlen(filename) - 1;
+
+ while (ptr > filename && *ptr != '/' && *ptr != '\\') {
+ --ptr;
+ }
+
+ ++ptr;
+
+ return(ptr);
+}
+
+/** String representation of the filename and line number where the
+latch was created
+@param[in] id Latch ID
+@param[in] created Filename and line number where it was crated
+@return the string representation */
+std::string
+sync_mutex_to_string(
+ latch_id_t id,
+ const std::string& created)
+{
+ std::ostringstream msg;
+
+ msg << "Mutex " << sync_latch_get_name(id) << " "
+ << "created " << created;
+
+ return(msg.str());
+}
+
+/** Enable the mutex monitoring */
+void
+MutexMonitor::enable()
+{
+ /** Note: We don't add any latch meta-data after startup. Therefore
+ there is no need to use a mutex here. */
+
+ LatchMetaData::iterator end = latch_meta.end();
+
+ for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
+
+ if (*it != NULL) {
+ (*it)->get_counter()->enable();
+ }
+ }
+}
+
+/** Disable the mutex monitoring */
+void
+MutexMonitor::disable()
+{
+ /** Note: We don't add any latch meta-data after startup. Therefore
+ there is no need to use a mutex here. */
+
+ LatchMetaData::iterator end = latch_meta.end();
+
+ for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
+
+ if (*it != NULL) {
+ (*it)->get_counter()->disable();
+ }
+ }
+}
+
+/** Reset the mutex monitoring counters */
+void
+MutexMonitor::reset()
+{
+ /** Note: We don't add any latch meta-data after startup. Therefore
+ there is no need to use a mutex here. */
+
+ LatchMetaData::iterator end = latch_meta.end();
+
+ for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
+
+ if (*it != NULL) {
+ (*it)->get_counter()->reset();
+ }
+ }
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ for (rw_lock_t& rw_lock : rw_lock_list) {
+ rw_lock.count_os_wait = 0;
+ }
+
+ mutex_exit(&rw_lock_list_mutex);
+}