diff options
Diffstat (limited to 'storage/innobase/sync')
-rw-r--r-- | storage/innobase/sync/sync0arr.cc | 1296 | ||||
-rw-r--r-- | storage/innobase/sync/sync0debug.cc | 1423 | ||||
-rw-r--r-- | storage/innobase/sync/sync0rw.cc | 1216 | ||||
-rw-r--r-- | storage/innobase/sync/sync0sync.cc | 246 |
4 files changed, 4181 insertions, 0 deletions
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc new file mode 100644 index 00000000..5f39325d --- /dev/null +++ b/storage/innobase/sync/sync0arr.cc @@ -0,0 +1,1296 @@ +/***************************************************************************** + +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2008, Google Inc. +Copyright (c) 2013, 2020, MariaDB Corporation. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file sync/sync0arr.cc +The wait array used in synchronization primitives + +Created 9/5/1995 Heikki Tuuri +*******************************************************/ + +#include "sync0arr.h" +#include <mysqld_error.h> +#include <mysql/plugin.h> +#include <hash.h> +#include <myisampack.h> +#include <sql_acl.h> +#include <mysys_err.h> +#include <my_sys.h> +#include "srv0srv.h" +#include "srv0start.h" +#include "i_s.h" +#include <sql_plugin.h> +#include <innodb_priv.h> + +#include "lock0lock.h" +#include "sync0rw.h" + +/* + WAIT ARRAY + ========== + +The wait array consists of cells each of which has an an event object created +for it. The threads waiting for a mutex, for example, can reserve a cell +in the array and suspend themselves to wait for the event to become signaled. +When using the wait array, remember to make sure that some thread holding +the synchronization object will eventually know that there is a waiter in +the array and signal the object, to prevent infinite wait. Why we chose +to implement a wait array? First, to make mutexes fast, we had to code +our own implementation of them, which only in usually uncommon cases +resorts to using slow operating system primitives. Then we had the choice of +assigning a unique OS event for each mutex, which would be simpler, or +using a global wait array. In some operating systems, the global wait +array solution is more efficient and flexible, because we can do with +a very small number of OS events, say 200. In NT 3.51, allocating events +seems to be a quadratic algorithm, because 10 000 events are created fast, +but 100 000 events takes a couple of minutes to create. + +As of 5.0.30 the above mentioned design is changed. Since now OS can handle +millions of wait events efficiently, we no longer have this concept of each +cell of wait array having one event. Instead, now the event that a thread +wants to wait on is embedded in the wait object (mutex or rw_lock). We still +keep the global wait array for the sake of diagnostics and also to avoid +infinite wait The error_monitor thread scans the global wait array to signal +any waiting threads who have missed the signal. */ + +typedef TTASEventMutex<GenericPolicy> WaitMutex; + +/** The latch types that use the sync array. */ +union sync_object_t { + + /** RW lock instance */ + rw_lock_t* lock; + + /** Mutex instance */ + WaitMutex* mutex; +}; + +/** A cell where an individual thread may wait suspended until a resource +is released. The suspending is implemented using an operating system +event semaphore. */ + +struct sync_cell_t { + sync_object_t latch; /*!< pointer to the object the + thread is waiting for; if NULL + the cell is free for use */ + ulint request_type; /*!< lock type requested on the + object */ + const char* file; /*!< in debug version file where + requested */ + ulint line; /*!< in debug version line where + requested, or ULINT_UNDEFINED */ + os_thread_id_t thread_id; /*!< thread id of this waiting + thread */ + bool waiting; /*!< TRUE if the thread has already + called sync_array_event_wait + on this cell */ + int64_t signal_count; /*!< We capture the signal_count + of the latch when we + reset the event. This value is + then passed on to os_event_wait + and we wait only if the event + has not been signalled in the + period between the reset and + wait call. */ + /** time(NULL) when the wait cell was reserved. + FIXME: sync_array_print_long_waits_low() may display bogus + warnings when the system time is adjusted to the past! */ + time_t reservation_time; +}; + +/* NOTE: It is allowed for a thread to wait for an event allocated for +the array without owning the protecting mutex (depending on the case: +OS or database mutex), but all changes (set or reset) to the state of +the event must be made while owning the mutex. */ + +/** Synchronization array */ +struct sync_array_t { + + /** Constructor + Creates a synchronization wait array. It is protected by a mutex + which is automatically reserved when the functions operating on it + are called. + @param[in] num_cells Number of cells to create */ + sync_array_t(ulint num_cells) + UNIV_NOTHROW; + + /** Destructor */ + ~sync_array_t() + UNIV_NOTHROW; + + ulint n_reserved; /*!< number of currently reserved + cells in the wait array */ + ulint n_cells; /*!< number of cells in the + wait array */ + sync_cell_t* array; /*!< pointer to wait array */ + SysMutex mutex; /*!< System mutex protecting the + data structure. As this data + structure is used in constructing + the database mutex, to prevent + infinite recursion in implementation, + we fall back to an OS mutex. */ + ulint res_count; /*!< count of cell reservations + since creation of the array */ + ulint next_free_slot; /*!< the next free cell in the array */ + ulint first_free_slot;/*!< the last slot that was freed */ +}; + +/** User configured sync array size */ +ulong srv_sync_array_size = 1; + +/** Locally stored copy of srv_sync_array_size */ +ulint sync_array_size; + +/** The global array of wait cells for implementation of the database's own +mutexes and read-write locks */ +sync_array_t** sync_wait_array; + +/** count of how many times an object has been signalled */ +ulint sg_count; + +#define sync_array_exit(a) mutex_exit(&(a)->mutex) +#define sync_array_enter(a) mutex_enter(&(a)->mutex) + +#ifdef UNIV_DEBUG +/******************************************************************//** +This function is called only in the debug version. Detects a deadlock +of one or more threads because of waits of semaphores. +@return TRUE if deadlock detected */ +static +bool +sync_array_detect_deadlock( +/*=======================*/ + sync_array_t* arr, /*!< in: wait array; NOTE! the caller must + own the mutex to array */ + sync_cell_t* start, /*!< in: cell where recursive search started */ + sync_cell_t* cell, /*!< in: cell to search */ + ulint depth); /*!< in: recursion depth */ +#endif /* UNIV_DEBUG */ + +/** Constructor +Creates a synchronization wait array. It is protected by a mutex +which is automatically reserved when the functions operating on it +are called. +@param[in] num_cells Number of cells to create */ +sync_array_t::sync_array_t(ulint num_cells) + UNIV_NOTHROW + : + n_reserved(), + n_cells(num_cells), + array(UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells)), + mutex(), + res_count(), + next_free_slot(), + first_free_slot(ULINT_UNDEFINED) +{ + ut_a(num_cells > 0); + + memset(array, 0x0, sizeof(sync_cell_t) * n_cells); + + /* Then create the mutex to protect the wait array */ + mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex); +} + +/** Validate the integrity of the wait array. Check +that the number of reserved cells equals the count variable. +@param[in,out] arr sync wait array */ +static +void +sync_array_validate(sync_array_t* arr) +{ + ulint i; + ulint count = 0; + + sync_array_enter(arr); + + for (i = 0; i < arr->n_cells; i++) { + sync_cell_t* cell; + + cell = sync_array_get_nth_cell(arr, i); + + if (cell->latch.mutex != NULL) { + count++; + } + } + + ut_a(count == arr->n_reserved); + + sync_array_exit(arr); +} + +/** Destructor */ +sync_array_t::~sync_array_t() + UNIV_NOTHROW +{ + ut_a(n_reserved == 0); + + sync_array_validate(this); + + /* Release the mutex protecting the wait array */ + + mutex_free(&mutex); + + UT_DELETE_ARRAY(array); +} + +/*****************************************************************//** +Gets the nth cell in array. +@return cell */ +UNIV_INTERN +sync_cell_t* +sync_array_get_nth_cell( +/*====================*/ + sync_array_t* arr, /*!< in: sync array */ + ulint n) /*!< in: index */ +{ + ut_a(n < arr->n_cells); + + return(arr->array + n); +} + +/******************************************************************//** +Frees the resources in a wait array. */ +static +void +sync_array_free( +/*============*/ + sync_array_t* arr) /*!< in, own: sync wait array */ +{ + UT_DELETE(arr); +} + +/*******************************************************************//** +Returns the event that the thread owning the cell waits for. */ +static +os_event_t +sync_cell_get_event( +/*================*/ + sync_cell_t* cell) /*!< in: non-empty sync array cell */ +{ + switch(cell->request_type) { + case SYNC_MUTEX: + return(cell->latch.mutex->event()); + case RW_LOCK_X_WAIT: + return(cell->latch.lock->wait_ex_event); + default: + return(cell->latch.lock->event); + } +} + +/******************************************************************//** +Reserves a wait array cell for waiting for an object. +The event of the cell is reset to nonsignalled state. +@return sync cell to wait on */ +sync_cell_t* +sync_array_reserve_cell( +/*====================*/ + sync_array_t* arr, /*!< in: wait array */ + void* object, /*!< in: pointer to the object to wait for */ + ulint type, /*!< in: lock request type */ + const char* file, /*!< in: file where requested */ + unsigned line) /*!< in: line where requested */ +{ + sync_cell_t* cell; + + sync_array_enter(arr); + + if (arr->first_free_slot != ULINT_UNDEFINED) { + /* Try and find a slot in the free list */ + ut_ad(arr->first_free_slot < arr->next_free_slot); + cell = sync_array_get_nth_cell(arr, arr->first_free_slot); + arr->first_free_slot = cell->line; + } else if (arr->next_free_slot < arr->n_cells) { + /* Try and find a slot after the currently allocated slots */ + cell = sync_array_get_nth_cell(arr, arr->next_free_slot); + ++arr->next_free_slot; + } else { + sync_array_exit(arr); + + // We should return NULL and if there is more than + // one sync array, try another sync array instance. + return(NULL); + } + + ++arr->res_count; + + ut_ad(arr->n_reserved < arr->n_cells); + ut_ad(arr->next_free_slot <= arr->n_cells); + + ++arr->n_reserved; + + /* Reserve the cell. */ + ut_ad(cell->latch.mutex == NULL); + + cell->request_type = type; + + if (cell->request_type == SYNC_MUTEX) { + cell->latch.mutex = reinterpret_cast<WaitMutex*>(object); + } else { + cell->latch.lock = reinterpret_cast<rw_lock_t*>(object); + } + + cell->waiting = false; + + cell->file = file; + cell->line = line; + + sync_array_exit(arr); + + cell->thread_id = os_thread_get_curr_id(); + + cell->reservation_time = time(NULL); + + /* Make sure the event is reset and also store the value of + signal_count at which the event was reset. */ + os_event_t event = sync_cell_get_event(cell); + cell->signal_count = os_event_reset(event); + + return(cell); +} + +/******************************************************************//** +Frees the cell. NOTE! sync_array_wait_event frees the cell +automatically! */ +void +sync_array_free_cell( +/*=================*/ + sync_array_t* arr, /*!< in: wait array */ + sync_cell_t*& cell) /*!< in/out: the cell in the array */ +{ + sync_array_enter(arr); + + ut_a(cell->latch.mutex != NULL); + + cell->waiting = false; + cell->signal_count = 0; + cell->latch.mutex = NULL; + + /* Setup the list of free slots in the array */ + cell->line = arr->first_free_slot; + + arr->first_free_slot = cell - arr->array; + + ut_a(arr->n_reserved > 0); + arr->n_reserved--; + + if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) { +#ifdef UNIV_DEBUG + for (ulint i = 0; i < arr->next_free_slot; ++i) { + cell = sync_array_get_nth_cell(arr, i); + + ut_ad(!cell->waiting); + ut_ad(cell->latch.mutex == 0); + ut_ad(cell->signal_count == 0); + } +#endif /* UNIV_DEBUG */ + arr->next_free_slot = 0; + arr->first_free_slot = ULINT_UNDEFINED; + } + sync_array_exit(arr); + + cell = 0; +} + +/******************************************************************//** +This function should be called when a thread starts to wait on +a wait array cell. In the debug version this function checks +if the wait for a semaphore will result in a deadlock, in which +case prints info and asserts. */ +void +sync_array_wait_event( +/*==================*/ + sync_array_t* arr, /*!< in: wait array */ + sync_cell_t*& cell) /*!< in: index of the reserved cell */ +{ + sync_array_enter(arr); + + ut_ad(!cell->waiting); + ut_ad(cell->latch.mutex); + ut_ad(os_thread_get_curr_id() == cell->thread_id); + + cell->waiting = true; + +#ifdef UNIV_DEBUG + + /* We use simple enter to the mutex below, because if + we cannot acquire it at once, mutex_enter would call + recursively sync_array routines, leading to trouble. + rw_lock_debug_mutex freezes the debug lists. */ + + rw_lock_debug_mutex_enter(); + + if (sync_array_detect_deadlock(arr, cell, cell, 0)) { + + ib::fatal() << "########################################" + " Deadlock Detected!"; + } + + rw_lock_debug_mutex_exit(); +#endif /* UNIV_DEBUG */ + sync_array_exit(arr); + + tpool::tpool_wait_begin(); + os_event_wait_low(sync_cell_get_event(cell), cell->signal_count); + tpool::tpool_wait_end(); + + sync_array_free_cell(arr, cell); + + cell = 0; +} + +/******************************************************************//** +Reports info of a wait array cell. */ +static +void +sync_array_cell_print( +/*==================*/ + FILE* file, /*!< in: file where to print */ + sync_cell_t* cell) /*!< in: sync cell */ +{ + rw_lock_t* rwlock; + ulint type; + ulint writer; + + type = cell->request_type; + + fprintf(file, + "--Thread " ULINTPF " has waited at %s line " ULINTPF + " for %.2f seconds the semaphore:\n", + ulint(cell->thread_id), + innobase_basename(cell->file), cell->line, + difftime(time(NULL), cell->reservation_time)); + + switch (type) { + default: + ut_error; + case RW_LOCK_X: + case RW_LOCK_X_WAIT: + case RW_LOCK_SX: + case RW_LOCK_S: + fputs(type == RW_LOCK_X ? "X-lock on" + : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on" + : type == RW_LOCK_SX ? "SX-lock on" + : "S-lock on", file); + + rwlock = cell->latch.lock; + + if (rwlock) { + fprintf(file, + " RW-latch at %p created in file %s line %u\n", + (void*) rwlock, innobase_basename(rwlock->cfile_name), + rwlock->cline); + + writer = rw_lock_get_writer(rwlock); + + if (writer != RW_LOCK_NOT_LOCKED) { + + fprintf(file, + "a writer (thread id " ULINTPF ") has" + " reserved it in mode %s", + ulint(rwlock->writer_thread), + writer == RW_LOCK_X ? " exclusive\n" + : writer == RW_LOCK_SX ? " SX\n" + : " wait exclusive\n"); + } + + fprintf(file, + "number of readers " ULINTPF + ", waiters flag %d, " + "lock_word: %x\n" + "Last time write locked in file %s line %u" +#if 0 /* JAN: TODO: FIX LATER */ + "\nHolder thread " ULINTPF + " file %s line " ULINTPF +#endif + "\n", + rw_lock_get_reader_count(rwlock), + uint32_t{rwlock->waiters}, + int32_t{rwlock->lock_word}, + innobase_basename(rwlock->last_x_file_name), + rwlock->last_x_line +#if 0 /* JAN: TODO: FIX LATER */ + , ulint(rwlock->thread_id), + innobase_basename(rwlock->file_name), + rwlock->line +#endif + ); + } + break; + case SYNC_MUTEX: + WaitMutex* mutex = cell->latch.mutex; + const WaitMutex::MutexPolicy& policy = mutex->policy(); +#ifdef UNIV_DEBUG + const char* name = policy.context.get_enter_filename(); + if (name == NULL) { + /* The mutex might have been released. */ + name = "NULL"; + } +#endif /* UNIV_DEBUG */ + + if (mutex) { + fprintf(file, + "Mutex at %p, %s, lock var %x\n" +#ifdef UNIV_DEBUG + "Last time reserved in file %s line %u" +#endif /* UNIV_DEBUG */ + "\n", + (void*) mutex, + policy.to_string().c_str(), + mutex->state() +#ifdef UNIV_DEBUG + ,name, + policy.context.get_enter_line() +#endif /* UNIV_DEBUG */ + ); + } + break; + } + + if (!cell->waiting) { + fputs("wait has ended\n", file); + } +} + +#ifdef UNIV_DEBUG +/******************************************************************//** +Looks for a cell with the given thread id. +@return pointer to cell or NULL if not found */ +static +sync_cell_t* +sync_array_find_thread( +/*===================*/ + sync_array_t* arr, /*!< in: wait array */ + os_thread_id_t thread) /*!< in: thread id */ +{ + ulint i; + + for (i = 0; i < arr->n_cells; i++) { + sync_cell_t* cell; + + cell = sync_array_get_nth_cell(arr, i); + + if (cell->latch.mutex != NULL + && os_thread_eq(cell->thread_id, thread)) { + + return(cell); /* Found */ + } + } + + return(NULL); /* Not found */ +} + +/******************************************************************//** +Recursion step for deadlock detection. +@return TRUE if deadlock detected */ +static +ibool +sync_array_deadlock_step( +/*=====================*/ + sync_array_t* arr, /*!< in: wait array; NOTE! the caller must + own the mutex to array */ + sync_cell_t* start, /*!< in: cell where recursive search + started */ + os_thread_id_t thread, /*!< in: thread to look at */ + ulint pass, /*!< in: pass value */ + ulint depth) /*!< in: recursion depth */ +{ + sync_cell_t* new_cell; + + if (pass != 0) { + /* If pass != 0, then we do not know which threads are + responsible of releasing the lock, and no deadlock can + be detected. */ + + return(FALSE); + } + + new_cell = sync_array_find_thread(arr, thread); + + if (new_cell == start) { + /* Deadlock */ + fputs("########################################\n" + "DEADLOCK of threads detected!\n", stderr); + + return(TRUE); + + } else if (new_cell) { + return(sync_array_detect_deadlock( + arr, start, new_cell, depth + 1)); + } + return(FALSE); +} + +/** +Report an error to stderr. +@param lock rw-lock instance +@param debug rw-lock debug information +@param cell thread context */ +static +void +sync_array_report_error( + rw_lock_t* lock, + rw_lock_debug_t* debug, + sync_cell_t* cell) +{ + fprintf(stderr, "rw-lock %p ", (void*) lock); + sync_array_cell_print(stderr, cell); + rw_lock_debug_print(stderr, debug); +} + +/******************************************************************//** +This function is called only in the debug version. Detects a deadlock +of one or more threads because of waits of semaphores. +@return TRUE if deadlock detected */ +static +bool +sync_array_detect_deadlock( +/*=======================*/ + sync_array_t* arr, /*!< in: wait array; NOTE! the caller must + own the mutex to array */ + sync_cell_t* start, /*!< in: cell where recursive search started */ + sync_cell_t* cell, /*!< in: cell to search */ + ulint depth) /*!< in: recursion depth */ +{ + rw_lock_t* lock; + os_thread_id_t thread; + ibool ret; + rw_lock_debug_t*debug; + + ut_a(arr); + ut_a(start); + ut_a(cell); + ut_ad(cell->latch.mutex != 0); + ut_ad(os_thread_get_curr_id() == start->thread_id); + ut_ad(depth < 100); + + depth++; + + if (!cell->waiting) { + /* No deadlock here */ + return(false); + } + + switch (cell->request_type) { + case SYNC_MUTEX: { + + WaitMutex* mutex = cell->latch.mutex; + const WaitMutex::MutexPolicy& policy = mutex->policy(); + + if (mutex->state() != MUTEX_STATE_UNLOCKED) { + thread = policy.context.get_thread_id(); + + /* Note that mutex->thread_id above may be + also OS_THREAD_ID_UNDEFINED, because the + thread which held the mutex maybe has not + yet updated the value, or it has already + released the mutex: in this case no deadlock + can occur, as the wait array cannot contain + a thread with ID_UNDEFINED value. */ + ret = sync_array_deadlock_step( + arr, start, thread, 0, depth); + + if (ret) { + const char* name; + + name = policy.context.get_enter_filename(); + + if (name == NULL) { + /* The mutex might have been + released. */ + name = "NULL"; + } + + ib::info() + << "Mutex " << mutex << " owned by" + " thread " << thread + << " file " << name << " line " + << policy.context.get_enter_line(); + + sync_array_cell_print(stderr, cell); + + return(true); + } + } + + /* No deadlock */ + return(false); + } + + case RW_LOCK_X: + case RW_LOCK_X_WAIT: + + lock = cell->latch.lock; + + for (debug = UT_LIST_GET_FIRST(lock->debug_list); + debug != NULL; + debug = UT_LIST_GET_NEXT(list, debug)) { + + thread = debug->thread_id; + + switch (debug->lock_type) { + case RW_LOCK_X: + case RW_LOCK_SX: + case RW_LOCK_X_WAIT: + if (os_thread_eq(thread, cell->thread_id)) { + break; + } + /* fall through */ + case RW_LOCK_S: + + /* The (wait) x-lock request can block + infinitely only if someone (can be also cell + thread) is holding s-lock, or someone + (cannot be cell thread) (wait) x-lock or + sx-lock, and he is blocked by start thread */ + + ret = sync_array_deadlock_step( + arr, start, thread, debug->pass, + depth); + + if (ret) { + sync_array_report_error( + lock, debug, cell); + rw_lock_debug_print(stderr, debug); + return(TRUE); + } + } + } + + return(false); + + case RW_LOCK_SX: + + lock = cell->latch.lock; + + for (debug = UT_LIST_GET_FIRST(lock->debug_list); + debug != 0; + debug = UT_LIST_GET_NEXT(list, debug)) { + + thread = debug->thread_id; + + switch (debug->lock_type) { + case RW_LOCK_X: + case RW_LOCK_SX: + case RW_LOCK_X_WAIT: + + if (os_thread_eq(thread, cell->thread_id)) { + break; + } + + /* The sx-lock request can block infinitely + only if someone (can be also cell thread) is + holding (wait) x-lock or sx-lock, and he is + blocked by start thread */ + + ret = sync_array_deadlock_step( + arr, start, thread, debug->pass, + depth); + + if (ret) { + sync_array_report_error( + lock, debug, cell); + return(TRUE); + } + } + } + + return(false); + + case RW_LOCK_S: + + lock = cell->latch.lock; + + for (debug = UT_LIST_GET_FIRST(lock->debug_list); + debug != 0; + debug = UT_LIST_GET_NEXT(list, debug)) { + + thread = debug->thread_id; + + if (debug->lock_type == RW_LOCK_X + || debug->lock_type == RW_LOCK_X_WAIT) { + + /* The s-lock request can block infinitely + only if someone (can also be cell thread) is + holding (wait) x-lock, and he is blocked by + start thread */ + + ret = sync_array_deadlock_step( + arr, start, thread, debug->pass, + depth); + + if (ret) { + sync_array_report_error( + lock, debug, cell); + return(TRUE); + } + } + } + + return(false); + + default: + ut_error; + } + + return(true); +} +#endif /* UNIV_DEBUG */ + +/**********************************************************************//** +Prints warnings of long semaphore waits to stderr. +@return TRUE if fatal semaphore wait threshold was exceeded */ +static +bool +sync_array_print_long_waits_low( +/*============================*/ + sync_array_t* arr, /*!< in: sync array instance */ + os_thread_id_t* waiter, /*!< out: longest waiting thread */ + const void** sema, /*!< out: longest-waited-for semaphore */ + ibool* noticed)/*!< out: TRUE if long wait noticed */ +{ + double fatal_timeout = static_cast<double>( + srv_fatal_semaphore_wait_threshold); + ibool fatal = FALSE; + double longest_diff = 0; + ulint i; + + /* For huge tables, skip the check during CHECK TABLE etc... */ + if (btr_validate_index_running) { + return(false); + } + +#if defined HAVE_valgrind && !__has_feature(memory_sanitizer) + /* Increase the timeouts if running under valgrind because it executes + extremely slowly. HAVE_valgrind does not necessary mean that + we are running under valgrind but we have no better way to tell. + See Bug#58432 innodb.innodb_bug56143 fails under valgrind + for an example */ +# define SYNC_ARRAY_TIMEOUT 2400 + fatal_timeout *= 10; +#else +# define SYNC_ARRAY_TIMEOUT 240 +#endif + const time_t now = time(NULL); + + for (ulint i = 0; i < arr->n_cells; i++) { + + sync_cell_t* cell; + void* latch; + + cell = sync_array_get_nth_cell(arr, i); + + latch = cell->latch.mutex; + + if (latch == NULL || !cell->waiting) { + + continue; + } + + double diff = difftime(now, cell->reservation_time); + + if (diff > SYNC_ARRAY_TIMEOUT) { + ib::warn() << "A long semaphore wait:"; + sync_array_cell_print(stderr, cell); + *noticed = TRUE; + } + + if (diff > fatal_timeout) { + fatal = TRUE; + } + + if (diff > longest_diff) { + longest_diff = diff; + *sema = latch; + *waiter = cell->thread_id; + } + } + + /* We found a long semaphore wait, print all threads that are + waiting for a semaphore. */ + if (*noticed) { + for (i = 0; i < arr->n_cells; i++) { + void* wait_object; + sync_cell_t* cell; + + cell = sync_array_get_nth_cell(arr, i); + + wait_object = cell->latch.mutex; + + if (wait_object == NULL || !cell->waiting) { + + continue; + } + + ib::info() << "A semaphore wait:"; + sync_array_cell_print(stderr, cell); + } + } + +#undef SYNC_ARRAY_TIMEOUT + + return(fatal); +} + +/**********************************************************************//** +Prints warnings of long semaphore waits to stderr. +@return TRUE if fatal semaphore wait threshold was exceeded */ +ibool +sync_array_print_long_waits( +/*========================*/ + os_thread_id_t* waiter, /*!< out: longest waiting thread */ + const void** sema) /*!< out: longest-waited-for semaphore */ +{ + ulint i; + ibool fatal = FALSE; + ibool noticed = FALSE; + + for (i = 0; i < sync_array_size; ++i) { + + sync_array_t* arr = sync_wait_array[i]; + + sync_array_enter(arr); + + if (sync_array_print_long_waits_low( + arr, waiter, sema, ¬iced)) { + + fatal = TRUE; + } + + sync_array_exit(arr); + } + + if (noticed) { + /* If some crucial semaphore is reserved, then also the InnoDB + Monitor can hang, and we do not get diagnostics. Since in + many cases an InnoDB hang is caused by a pwrite() or a pread() + call hanging inside the operating system, let us print right + now the values of pending calls of these. */ + + fprintf(stderr, + "InnoDB: Pending reads " UINT64PF + ", writes " UINT64PF "\n", + MONITOR_VALUE(MONITOR_OS_PENDING_READS), + MONITOR_VALUE(MONITOR_OS_PENDING_WRITES)); + + lock_wait_timeout_task(nullptr); + } + + return(fatal); +} + +/**********************************************************************//** +Prints info of the wait array. */ +static +void +sync_array_print_info_low( +/*======================*/ + FILE* file, /*!< in: file where to print */ + sync_array_t* arr) /*!< in: wait array */ +{ + ulint i; + ulint count = 0; + + fprintf(file, + "OS WAIT ARRAY INFO: reservation count " ULINTPF "\n", + arr->res_count); + + for (i = 0; count < arr->n_reserved; ++i) { + sync_cell_t* cell; + + cell = sync_array_get_nth_cell(arr, i); + + if (cell->latch.mutex != 0) { + count++; + sync_array_cell_print(file, cell); + } + } +} + +/**********************************************************************//** +Prints info of the wait array. */ +static +void +sync_array_print_info( +/*==================*/ + FILE* file, /*!< in: file where to print */ + sync_array_t* arr) /*!< in: wait array */ +{ + sync_array_enter(arr); + + sync_array_print_info_low(file, arr); + + sync_array_exit(arr); +} + +/** Create the primary system wait arrays */ +void sync_array_init() +{ + ut_a(sync_wait_array == NULL); + ut_a(srv_sync_array_size > 0); + ut_a(srv_max_n_threads > 0); + + sync_array_size = srv_sync_array_size; + + sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size); + + ulint n_slots = 1 + (srv_max_n_threads - 1) / sync_array_size; + + for (ulint i = 0; i < sync_array_size; ++i) { + + sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots)); + } +} + +/** Destroy the sync array wait sub-system. */ +void sync_array_close() +{ + for (ulint i = 0; i < sync_array_size; ++i) { + sync_array_free(sync_wait_array[i]); + } + + UT_DELETE_ARRAY(sync_wait_array); + sync_wait_array = NULL; +} + +/**********************************************************************//** +Print info about the sync array(s). */ +void +sync_array_print( +/*=============*/ + FILE* file) /*!< in/out: Print to this stream */ +{ + for (ulint i = 0; i < sync_array_size; ++i) { + sync_array_print_info(file, sync_wait_array[i]); + } + + fprintf(file, + "OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count); + +} + +/**********************************************************************//** +Prints info of the wait array without using any mutexes/semaphores. */ +UNIV_INTERN +void +sync_array_print_innodb(void) +/*=========================*/ +{ + ulint i; + sync_array_t* arr = sync_array_get(); + + fputs("InnoDB: Semaphore wait debug output started for InnoDB:\n", stderr); + + for (i = 0; i < arr->n_cells; i++) { + void* wait_object; + sync_cell_t* cell; + + cell = sync_array_get_nth_cell(arr, i); + + wait_object = cell->latch.mutex; + + if (wait_object == NULL || !cell->waiting) { + + continue; + } + + fputs("InnoDB: Warning: semaphore wait:\n", + stderr); + sync_array_cell_print(stderr, cell); + } + + fputs("InnoDB: Semaphore wait debug output ended:\n", stderr); + +} + +/**********************************************************************//** +Get number of items on sync array. */ +UNIV_INTERN +ulint +sync_arr_get_n_items(void) +/*======================*/ +{ + sync_array_t* sync_arr = sync_array_get(); + return (ulint) sync_arr->n_cells; +} + +/******************************************************************//** +Get specified item from sync array if it is reserved. Set given +pointer to array item if it is reserved. +@return true if item is reserved, false othervise */ +UNIV_INTERN +ibool +sync_arr_get_item( +/*==============*/ + ulint i, /*!< in: requested item */ + sync_cell_t **cell) /*!< out: cell contents if item + reserved */ +{ + sync_array_t* sync_arr; + sync_cell_t* wait_cell; + void* wait_object; + ibool found = FALSE; + + sync_arr = sync_array_get(); + wait_cell = sync_array_get_nth_cell(sync_arr, i); + + if (wait_cell) { + wait_object = wait_cell->latch.mutex; + + if(wait_object != NULL && wait_cell->waiting) { + found = TRUE; + *cell = wait_cell; + } + } + + return found; +} + +/*******************************************************************//** +Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table. +Loop through each item on sync array, and extract the column +information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table. +@return 0 on success */ +UNIV_INTERN +int +sync_arr_fill_sys_semphore_waits_table( +/*===================================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + Field** fields; + ulint n_items; + + DBUG_ENTER("i_s_sys_semaphore_waits_fill_table"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str); + + /* deny access to user without PROCESS_ACL privilege */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + fields = tables->table->field; + n_items = sync_arr_get_n_items(); + ulint type; + + for(ulint i=0; i < n_items;i++) { + sync_cell_t *cell=NULL; + if (sync_arr_get_item(i, &cell)) { + WaitMutex* mutex; + type = cell->request_type; + /* JAN: FIXME + OK(fields[SYS_SEMAPHORE_WAITS_THREAD_ID]->store(, + ulint(cell->thread), true)); + */ + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file))); + OK(fields[SYS_SEMAPHORE_WAITS_LINE]->store(cell->line, true)); + fields[SYS_SEMAPHORE_WAITS_LINE]->set_notnull(); + OK(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME]->store( + difftime(time(NULL), + cell->reservation_time))); + + if (type == SYNC_MUTEX) { + mutex = static_cast<WaitMutex*>(cell->latch.mutex); + + if (mutex) { + // JAN: FIXME + // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name)); + OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)mutex, true)); + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX")); + //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(mutex->thread_id, true)); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name))); + //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(mutex->line, true)); + //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull(); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name))); + //OK(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->store(mutex->cline, true)); + //fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->set_notnull(); + //OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(mutex->waiters, true)); + //OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(mutex->lock_word, true)); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name))); + //OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(mutex->line, true)); + //fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull(); + //OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(mutex->count_os_wait, true)); + } + } else if (type == RW_LOCK_X_WAIT + || type == RW_LOCK_X + || type == RW_LOCK_SX + || type == RW_LOCK_S) { + rw_lock_t* rwlock=NULL; + + rwlock = static_cast<rw_lock_t *> (cell->latch.lock); + + if (rwlock) { + ulint writer = rw_lock_get_writer(rwlock); + + OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)rwlock, true)); + if (type == RW_LOCK_X) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X")); + } else if (type == RW_LOCK_X_WAIT) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT")); + } else if (type == RW_LOCK_S) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S")); + } else if (type == RW_LOCK_SX) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX")); + } + + if (writer != RW_LOCK_NOT_LOCKED) { + // JAN: FIXME + // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name)); + OK(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD]->store(ulint(rwlock->writer_thread), true)); + + if (writer == RW_LOCK_X) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X")); + } else if (writer == RW_LOCK_X_WAIT) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT")); + } else if (type == RW_LOCK_SX) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX")); + } + + //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(rwlock->thread_id, true)); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name))); + //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(rwlock->line, true)); + //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull(); + OK(fields[SYS_SEMAPHORE_WAITS_READERS]->store(rw_lock_get_reader_count(rwlock), true)); + OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store( + rwlock->waiters, + true)); + OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store( + rwlock->lock_word, + true)); + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name))); + OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(rwlock->last_x_line, true)); + fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull(); + OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(rwlock->count_os_wait, true)); + } + } + } + + OK(schema_table_store_record(thd, tables->table)); + } + } + + DBUG_RETURN(0); +} diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc new file mode 100644 index 00000000..7c3e4c05 --- /dev/null +++ b/storage/innobase/sync/sync0debug.cc @@ -0,0 +1,1423 @@ +/***************************************************************************** + +Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2020, MariaDB Corporation. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file sync/sync0debug.cc +Debug checks for latches. + +Created 2012-08-21 Sunny Bains +*******************************************************/ + +#include "sync0sync.h" +#include "sync0debug.h" +#include "srv0start.h" +#include "fil0fil.h" + +#include <vector> +#include <string> +#include <algorithm> +#include <map> + +#ifdef UNIV_DEBUG + +my_bool srv_sync_debug; + +/** The global mutex which protects debug info lists of all rw-locks. +To modify the debug info list of an rw-lock, this mutex has to be +acquired in addition to the mutex protecting the lock. */ +static SysMutex rw_lock_debug_mutex; + +/** The latch held by a thread */ +struct Latched { + + /** Constructor */ + Latched() : m_latch(), m_level(SYNC_UNKNOWN) { } + + /** Constructor + @param[in] latch Latch instance + @param[in] level Level of latch held */ + Latched(const latch_t* latch, + latch_level_t level) + : + m_latch(latch), + m_level(level) + { + /* No op */ + } + + /** @return the latch level */ + latch_level_t get_level() const + { + return(m_level); + } + + /** Check if the rhs latch and level match + @param[in] rhs instance to compare with + @return true on match */ + bool operator==(const Latched& rhs) const + { + return(m_latch == rhs.m_latch && m_level == rhs.m_level); + } + + /** The latch instance */ + const latch_t* m_latch; + + /** The latch level. For buffer blocks we can pass a separate latch + level to check against, see buf_block_dbg_add_level() */ + latch_level_t m_level; +}; + +/** Thread specific latches. This is ordered on level in descending order. */ +typedef std::vector<Latched, ut_allocator<Latched> > Latches; + +/** The deadlock detector. */ +struct LatchDebug { + + /** Debug mutex for control structures, should not be tracked + by this module. */ + typedef OSMutex Mutex; + + /** Comparator for the ThreadMap. */ + struct os_thread_id_less + : public std::binary_function< + os_thread_id_t, + os_thread_id_t, + bool> + { + /** @return true if lhs < rhs */ + bool operator()( + const os_thread_id_t& lhs, + const os_thread_id_t& rhs) const + UNIV_NOTHROW + { + return(ulint(lhs) < ulint(rhs)); + } + }; + + /** For tracking a thread's latches. */ + typedef std::map< + os_thread_id_t, + Latches*, + os_thread_id_less, + ut_allocator<std::pair<const os_thread_id_t, Latches*> > > + ThreadMap; + + /** Constructor */ + LatchDebug() + UNIV_NOTHROW; + + /** Destructor */ + ~LatchDebug() + UNIV_NOTHROW + { + m_mutex.destroy(); + } + + /** Create a new instance if one doesn't exist else return + the existing one. + @param[in] add add an empty entry if one is not + found (default no) + @return pointer to a thread's acquired latches. */ + Latches* thread_latches(bool add = false) + UNIV_NOTHROW; + + /** Check that all the latches already owned by a thread have a lower + level than limit. + @param[in] latches the thread's existing (acquired) latches + @param[in] limit to check against + @return latched if there is one with a level <= limit . */ + const Latched* less( + const Latches* latches, + latch_level_t limit) const + UNIV_NOTHROW; + + /** Checks if the level value exists in the thread's acquired latches. + @param[in] latches the thread's existing (acquired) latches + @param[in] level to lookup + @return latch if found or 0 */ + const latch_t* find( + const Latches* Latches, + latch_level_t level) const + UNIV_NOTHROW; + + /** + Checks if the level value exists in the thread's acquired latches. + @param[in] level to lookup + @return latch if found or 0 */ + const latch_t* find(latch_level_t level) + UNIV_NOTHROW; + + /** Report error and abort. + @param[in] latches thread's existing latches + @param[in] latched The existing latch causing the + invariant to fail + @param[in] level The new level request that breaks + the order */ + void crash( + const Latches* latches, + const Latched* latched, + latch_level_t level) const + UNIV_NOTHROW; + + /** Do a basic ordering check. + @param[in] latches thread's existing latches + @param[in] requested_level Level requested by latch + @param[in] level declared ulint so that we can + do level - 1. The level of the + latch that the thread is trying + to acquire + @return true if passes, else crash with error message. */ + inline bool basic_check( + const Latches* latches, + latch_level_t requested_level, + lint level) const + UNIV_NOTHROW; + + /** Adds a latch and its level in the thread level array. Allocates + the memory for the array if called for the first time for this + OS thread. Makes the checks against other latch levels stored + in the array for this thread. + + @param[in] latch latch that the thread wants to acqire. + @param[in] level latch level to check against */ + void lock_validate( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW + { + /* Ignore diagnostic latches, starting with '.' */ + + if (*latch->get_name() != '.' + && latch->get_level() != SYNC_LEVEL_VARYING) { + + ut_ad(level != SYNC_LEVEL_VARYING); + + Latches* latches = check_order(latch, level); + + ut_a(latches->empty() + || level == SYNC_LEVEL_VARYING + || level == SYNC_NO_ORDER_CHECK + || latches->back().get_level() + == SYNC_NO_ORDER_CHECK + || latches->back().m_latch->get_level() + == SYNC_LEVEL_VARYING + || latches->back().get_level() >= level); + } + } + + /** Adds a latch and its level in the thread level array. Allocates + the memory for the array if called for the first time for this + OS thread. Makes the checks against other latch levels stored + in the array for this thread. + + @param[in] latch latch that the thread wants to acqire. + @param[in] level latch level to check against */ + void lock_granted( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW + { + /* Ignore diagnostic latches, starting with '.' */ + + if (*latch->get_name() != '.' + && latch->get_level() != SYNC_LEVEL_VARYING) { + + Latches* latches = thread_latches(true); + + latches->push_back(Latched(latch, level)); + } + } + + /** For recursive X rw-locks. + @param[in] latch The RW-Lock to relock */ + void relock(const latch_t* latch) + UNIV_NOTHROW + { + ut_a(latch->m_rw_lock); + + latch_level_t level = latch->get_level(); + + /* Ignore diagnostic latches, starting with '.' */ + + if (*latch->get_name() != '.' + && latch->get_level() != SYNC_LEVEL_VARYING) { + + Latches* latches = thread_latches(true); + + Latches::iterator it = std::find( + latches->begin(), latches->end(), + Latched(latch, level)); + + ut_a(latches->empty() + || level == SYNC_LEVEL_VARYING + || level == SYNC_NO_ORDER_CHECK + || latches->back().m_latch->get_level() + == SYNC_LEVEL_VARYING + || latches->back().m_latch->get_level() + == SYNC_NO_ORDER_CHECK + || latches->back().get_level() >= level + || it != latches->end()); + + if (it == latches->end()) { + latches->push_back(Latched(latch, level)); + } else { + latches->insert(it, Latched(latch, level)); + } + } + } + + /** Iterate over a thread's latches. + @param[in] functor The callback + @return true if the functor returns true. */ + bool for_each(const sync_check_functor_t& functor) + UNIV_NOTHROW + { + if (const Latches* latches = thread_latches()) { + Latches::const_iterator end = latches->end(); + for (Latches::const_iterator it = latches->begin(); + it != end; ++it) { + + if (functor(it->m_level)) { + return(true); + } + } + } + + return(false); + } + + /** Removes a latch from the thread level array if it is found there. + @param[in] latch The latch that was released + @return true if found in the array; it is not an error if the latch is + not found, as we presently are not able to determine the level for + every latch reservation the program does */ + void unlock(const latch_t* latch) UNIV_NOTHROW; + + /** Get the level name + @param[in] level The level ID to lookup + @return level name */ + const std::string& get_level_name(latch_level_t level) const + UNIV_NOTHROW + { + Levels::const_iterator it = m_levels.find(level); + + ut_ad(it != m_levels.end()); + + return(it->second); + } + + /** Initialise the debug data structures */ + static void init() + UNIV_NOTHROW; + + /** Shutdown the latch debug checking */ + static void shutdown() + UNIV_NOTHROW; + + /** @return the singleton instance */ + static LatchDebug* instance() + UNIV_NOTHROW + { + return(s_instance); + } + + /** Create the singleton instance */ + static void create_instance() + UNIV_NOTHROW + { + ut_ad(s_instance == NULL); + + s_instance = UT_NEW_NOKEY(LatchDebug()); + } + +private: + /** Disable copying */ + LatchDebug(const LatchDebug&); + LatchDebug& operator=(const LatchDebug&); + + /** Adds a latch and its level in the thread level array. Allocates + the memory for the array if called first time for this OS thread. + Makes the checks against other latch levels stored in the array + for this thread. + + @param[in] latch pointer to a mutex or an rw-lock + @param[in] level level in the latching order + @return the thread's latches */ + Latches* check_order( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW; + + /** Print the latches acquired by a thread + @param[in] latches Latches acquired by a thread */ + void print_latches(const Latches* latches) const + UNIV_NOTHROW; + + /** Special handling for the RTR mutexes. We need to add proper + levels for them if possible. + @param[in] latch Latch to check + @return true if it is a an _RTR_ mutex */ + bool is_rtr_mutex(const latch_t* latch) const + UNIV_NOTHROW + { + return(latch->get_id() == LATCH_ID_RTR_ACTIVE_MUTEX + || latch->get_id() == LATCH_ID_RTR_PATH_MUTEX + || latch->get_id() == LATCH_ID_RTR_MATCH_MUTEX); + } + +private: + /** Comparator for the Levels . */ + struct latch_level_less + : public std::binary_function< + latch_level_t, + latch_level_t, + bool> + { + /** @return true if lhs < rhs */ + bool operator()( + const latch_level_t& lhs, + const latch_level_t& rhs) const + UNIV_NOTHROW + { + return(lhs < rhs); + } + }; + + typedef std::map< + latch_level_t, + std::string, + latch_level_less, + ut_allocator<std::pair<const latch_level_t, std::string> > > + Levels; + + /** Mutex protecting the deadlock detector data structures. */ + Mutex m_mutex; + + /** Thread specific data. Protected by m_mutex. */ + ThreadMap m_threads; + + /** Mapping from latche level to its string representation. */ + Levels m_levels; + + /** The singleton instance. Must be created in single threaded mode. */ + static LatchDebug* s_instance; + +public: + /** For checking whether this module has been initialised or not. */ + static bool s_initialized; +}; + +/** The latch order checking infra-structure */ +LatchDebug* LatchDebug::s_instance = NULL; +bool LatchDebug::s_initialized = false; + +#define LEVEL_MAP_INSERT(T) \ +do { \ + std::pair<Levels::iterator, bool> result = \ + m_levels.insert(Levels::value_type(T, #T)); \ + ut_ad(result.second); \ +} while(0) + +/** Setup the mapping from level ID to level name mapping */ +LatchDebug::LatchDebug() +{ + m_mutex.init(); + + LEVEL_MAP_INSERT(SYNC_UNKNOWN); + LEVEL_MAP_INSERT(SYNC_MUTEX); + LEVEL_MAP_INSERT(RW_LOCK_SX); + LEVEL_MAP_INSERT(RW_LOCK_X_WAIT); + LEVEL_MAP_INSERT(RW_LOCK_S); + LEVEL_MAP_INSERT(RW_LOCK_X); + LEVEL_MAP_INSERT(RW_LOCK_NOT_LOCKED); + LEVEL_MAP_INSERT(SYNC_ANY_LATCH); + LEVEL_MAP_INSERT(SYNC_POOL); + LEVEL_MAP_INSERT(SYNC_POOL_MANAGER); + LEVEL_MAP_INSERT(SYNC_SEARCH_SYS); + LEVEL_MAP_INSERT(SYNC_WORK_QUEUE); + LEVEL_MAP_INSERT(SYNC_FTS_TOKENIZE); + LEVEL_MAP_INSERT(SYNC_FTS_OPTIMIZE); + LEVEL_MAP_INSERT(SYNC_FTS_CACHE_INIT); + LEVEL_MAP_INSERT(SYNC_RECV); + LEVEL_MAP_INSERT(SYNC_PURGE_QUEUE); + LEVEL_MAP_INSERT(SYNC_TRX_SYS_HEADER); + LEVEL_MAP_INSERT(SYNC_TRX); + LEVEL_MAP_INSERT(SYNC_RW_TRX_HASH_ELEMENT); + LEVEL_MAP_INSERT(SYNC_READ_VIEW); + LEVEL_MAP_INSERT(SYNC_TRX_SYS); + LEVEL_MAP_INSERT(SYNC_LOCK_SYS); + LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS); + LEVEL_MAP_INSERT(SYNC_INDEX_ONLINE_LOG); + LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP); + LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP_MUTEX); + LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE); + LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE_NEW); + LEVEL_MAP_INSERT(SYNC_IBUF_INDEX_TREE); + LEVEL_MAP_INSERT(SYNC_IBUF_MUTEX); + LEVEL_MAP_INSERT(SYNC_FSP_PAGE); + LEVEL_MAP_INSERT(SYNC_FSP); + LEVEL_MAP_INSERT(SYNC_EXTERN_STORAGE); + LEVEL_MAP_INSERT(SYNC_TRX_UNDO_PAGE); + LEVEL_MAP_INSERT(SYNC_RSEG_HEADER); + LEVEL_MAP_INSERT(SYNC_RSEG_HEADER_NEW); + LEVEL_MAP_INSERT(SYNC_NOREDO_RSEG); + LEVEL_MAP_INSERT(SYNC_REDO_RSEG); + LEVEL_MAP_INSERT(SYNC_PURGE_LATCH); + LEVEL_MAP_INSERT(SYNC_TREE_NODE); + LEVEL_MAP_INSERT(SYNC_TREE_NODE_FROM_HASH); + LEVEL_MAP_INSERT(SYNC_TREE_NODE_NEW); + LEVEL_MAP_INSERT(SYNC_INDEX_TREE); + LEVEL_MAP_INSERT(SYNC_IBUF_PESS_INSERT_MUTEX); + LEVEL_MAP_INSERT(SYNC_IBUF_HEADER); + LEVEL_MAP_INSERT(SYNC_DICT_HEADER); + LEVEL_MAP_INSERT(SYNC_STATS_AUTO_RECALC); + LEVEL_MAP_INSERT(SYNC_DICT); + LEVEL_MAP_INSERT(SYNC_FTS_CACHE); + LEVEL_MAP_INSERT(SYNC_DICT_OPERATION); + LEVEL_MAP_INSERT(SYNC_TRX_I_S_RWLOCK); + LEVEL_MAP_INSERT(SYNC_LEVEL_VARYING); + LEVEL_MAP_INSERT(SYNC_NO_ORDER_CHECK); + + /* Enum count starts from 0 */ + ut_ad(m_levels.size() == SYNC_LEVEL_MAX + 1); +} + +/** Print the latches acquired by a thread +@param[in] latches Latches acquired by a thread */ +void +LatchDebug::print_latches(const Latches* latches) const + UNIV_NOTHROW +{ + ib::error() << "Latches already owned by this thread: "; + + Latches::const_iterator end = latches->end(); + + for (Latches::const_iterator it = latches->begin(); + it != end; + ++it) { + + ib::error() + << sync_latch_get_name(it->m_latch->get_id()) + << " -> " + << it->m_level << " " + << "(" << get_level_name(it->m_level) << ")"; + } +} + +/** Report error and abort +@param[in] latches thread's existing latches +@param[in] latched The existing latch causing the invariant to fail +@param[in] level The new level request that breaks the order */ +void +LatchDebug::crash( + const Latches* latches, + const Latched* latched, + latch_level_t level) const + UNIV_NOTHROW +{ + const latch_t* latch = latched->m_latch; + const std::string& in_level_name = get_level_name(level); + + const std::string& latch_level_name = + get_level_name(latched->m_level); + + ib::error() + << "Thread " << os_thread_get_curr_id() + << " already owns a latch " + << sync_latch_get_name(latch->m_id) << " at level" + << " " << latched->m_level << " (" << latch_level_name + << " ), which is at a lower/same level than the" + << " requested latch: " + << level << " (" << in_level_name << "). " + << latch->to_string(); + + print_latches(latches); + + ut_error; +} + +/** Check that all the latches already owned by a thread have a lower +level than limit. +@param[in] latches the thread's existing (acquired) latches +@param[in] limit to check against +@return latched info if there is one with a level <= limit . */ +const Latched* +LatchDebug::less( + const Latches* latches, + latch_level_t limit) const + UNIV_NOTHROW +{ + Latches::const_iterator end = latches->end(); + + for (Latches::const_iterator it = latches->begin(); it != end; ++it) { + + if (it->m_level <= limit) { + return(&(*it)); + } + } + + return(NULL); +} + +/** Do a basic ordering check. +@param[in] latches thread's existing latches +@param[in] requested_level Level requested by latch +@param[in] in_level declared ulint so that we can do level - 1. + The level of the latch that the thread is + trying to acquire +@return true if passes, else crash with error message. */ +inline bool +LatchDebug::basic_check( + const Latches* latches, + latch_level_t requested_level, + lint in_level) const + UNIV_NOTHROW +{ + latch_level_t level = latch_level_t(in_level); + + ut_ad(level < SYNC_LEVEL_MAX); + + const Latched* latched = less(latches, level); + + if (latched != NULL) { + crash(latches, latched, requested_level); + return(false); + } + + return(true); +} + +/** Create a new instance if one doesn't exist else return the existing one. +@param[in] add add an empty entry if one is not found + (default no) +@return pointer to a thread's acquired latches. */ +Latches* +LatchDebug::thread_latches(bool add) + UNIV_NOTHROW +{ + m_mutex.enter(); + + os_thread_id_t thread_id = os_thread_get_curr_id(); + ThreadMap::iterator lb = m_threads.lower_bound(thread_id); + + if (lb != m_threads.end() + && !(m_threads.key_comp()(thread_id, lb->first))) { + + Latches* latches = lb->second; + + m_mutex.exit(); + + return(latches); + + } else if (!add) { + + m_mutex.exit(); + + return(NULL); + + } else { + typedef ThreadMap::value_type value_type; + + Latches* latches = UT_NEW_NOKEY(Latches()); + + ut_a(latches != NULL); + + latches->reserve(32); + + m_threads.insert(lb, value_type(thread_id, latches)); + + m_mutex.exit(); + + return(latches); + } +} + +/** Checks if the level value exists in the thread's acquired latches. +@param[in] levels the thread's existing (acquired) latches +@param[in] level to lookup +@return latch if found or 0 */ +const latch_t* +LatchDebug::find( + const Latches* latches, + latch_level_t level) const UNIV_NOTHROW +{ + Latches::const_iterator end = latches->end(); + + for (Latches::const_iterator it = latches->begin(); it != end; ++it) { + + if (it->m_level == level) { + + return(it->m_latch); + } + } + + return(0); +} + +/** Checks if the level value exists in the thread's acquired latches. +@param[in] level The level to lookup +@return latch if found or NULL */ +const latch_t* +LatchDebug::find(latch_level_t level) + UNIV_NOTHROW +{ + return(find(thread_latches(), level)); +} + +/** +Adds a latch and its level in the thread level array. Allocates the memory +for the array if called first time for this OS thread. Makes the checks +against other latch levels stored in the array for this thread. +@param[in] latch pointer to a mutex or an rw-lock +@param[in] level level in the latching order +@return the thread's latches */ +Latches* +LatchDebug::check_order( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW +{ + ut_ad(latch->get_level() != SYNC_LEVEL_VARYING); + + Latches* latches = thread_latches(true); + + /* NOTE that there is a problem with _NODE and _LEAF levels: if the + B-tree height changes, then a leaf can change to an internal node + or the other way around. We do not know at present if this can cause + unnecessary assertion failures below. */ + + switch (level) { + case SYNC_NO_ORDER_CHECK: + case SYNC_EXTERN_STORAGE: + case SYNC_TREE_NODE_FROM_HASH: + /* Do no order checking */ + break; + + case SYNC_TRX_SYS_HEADER: + + if (srv_is_being_started) { + /* This is violated during trx_sys_create_rsegs() + when creating additional rollback segments when + upgrading in srv_start(). */ + break; + } + + /* Fall through */ + + case SYNC_RECV: + case SYNC_WORK_QUEUE: + case SYNC_FTS_TOKENIZE: + case SYNC_FTS_OPTIMIZE: + case SYNC_FTS_CACHE: + case SYNC_FTS_CACHE_INIT: + case SYNC_SEARCH_SYS: + case SYNC_LOCK_SYS: + case SYNC_LOCK_WAIT_SYS: + case SYNC_RW_TRX_HASH_ELEMENT: + case SYNC_READ_VIEW: + case SYNC_TRX_SYS: + case SYNC_IBUF_BITMAP_MUTEX: + case SYNC_REDO_RSEG: + case SYNC_NOREDO_RSEG: + case SYNC_PURGE_LATCH: + case SYNC_PURGE_QUEUE: + case SYNC_DICT_OPERATION: + case SYNC_DICT_HEADER: + case SYNC_TRX_I_S_RWLOCK: + case SYNC_IBUF_MUTEX: + case SYNC_INDEX_ONLINE_LOG: + case SYNC_STATS_AUTO_RECALC: + case SYNC_POOL: + case SYNC_POOL_MANAGER: + basic_check(latches, level, level); + break; + + case SYNC_ANY_LATCH: + + /* Temporary workaround for LATCH_ID_RTR_*_MUTEX */ + if (is_rtr_mutex(latch)) { + + const Latched* latched = less(latches, level); + + if (latched == NULL + || (latched != NULL + && is_rtr_mutex(latched->m_latch))) { + + /* No violation */ + break; + + } + + crash(latches, latched, level); + + } else { + basic_check(latches, level, level); + } + + break; + + case SYNC_TRX: + + /* Either the thread must own the lock_sys.mutex, or + it is allowed to own only ONE trx_t::mutex. */ + + if (less(latches, level) != NULL) { + basic_check(latches, level, level - 1); + ut_a(find(latches, SYNC_LOCK_SYS) != 0); + } + break; + + case SYNC_IBUF_BITMAP: + + /* Either the thread must own the master mutex to all + the bitmap pages, or it is allowed to latch only ONE + bitmap page. */ + + if (find(latches, SYNC_IBUF_BITMAP_MUTEX) != 0) { + + basic_check(latches, level, SYNC_IBUF_BITMAP - 1); + + } else if (!srv_is_being_started) { + + /* This is violated during trx_sys_create_rsegs() + when creating additional rollback segments during + upgrade. */ + + basic_check(latches, level, SYNC_IBUF_BITMAP); + } + break; + + case SYNC_FSP_PAGE: + ut_a(find(latches, SYNC_FSP) != 0); + break; + + case SYNC_FSP: + + ut_a(find(latches, SYNC_FSP) != 0 + || basic_check(latches, level, SYNC_FSP)); + break; + + case SYNC_TRX_UNDO_PAGE: + + /* Purge is allowed to read in as many UNDO pages as it likes. + The purge thread can read the UNDO pages without any covering + mutex. */ + + ut_a(find(latches, SYNC_REDO_RSEG) != 0 + || find(latches, SYNC_NOREDO_RSEG) != 0 + || basic_check(latches, level, level - 1)); + break; + + case SYNC_RSEG_HEADER: + + ut_a(find(latches, SYNC_REDO_RSEG) != 0 + || find(latches, SYNC_NOREDO_RSEG) != 0); + break; + + case SYNC_RSEG_HEADER_NEW: + + ut_a(find(latches, SYNC_FSP_PAGE) != 0); + break; + + case SYNC_TREE_NODE: + + ut_a(find(latches, SYNC_FSP) == &fil_system.temp_space->latch + || find(latches, SYNC_INDEX_TREE) + || find(latches, SYNC_DICT_OPERATION) + || basic_check(latches, level, SYNC_TREE_NODE - 1)); + break; + + case SYNC_TREE_NODE_NEW: + + ut_a(find(latches, SYNC_FSP_PAGE) != 0); + break; + + case SYNC_INDEX_TREE: + + basic_check(latches, level, SYNC_TREE_NODE - 1); + break; + + case SYNC_IBUF_TREE_NODE: + + ut_a(find(latches, SYNC_IBUF_INDEX_TREE) != 0 + || basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1)); + break; + + case SYNC_IBUF_TREE_NODE_NEW: + + /* ibuf_add_free_page() allocates new pages for the change + buffer while only holding the tablespace x-latch. These + pre-allocated new pages may only be used while holding + ibuf_mutex, in btr_page_alloc_for_ibuf(). */ + + ut_a(find(latches, SYNC_IBUF_MUTEX) != 0 + || find(latches, SYNC_FSP) != 0); + break; + + case SYNC_IBUF_INDEX_TREE: + + if (find(latches, SYNC_FSP) != 0) { + basic_check(latches, level, level - 1); + } else { + basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1); + } + break; + + case SYNC_IBUF_PESS_INSERT_MUTEX: + + basic_check(latches, level, SYNC_FSP - 1); + ut_a(find(latches, SYNC_IBUF_MUTEX) == 0); + break; + + case SYNC_IBUF_HEADER: + + basic_check(latches, level, SYNC_FSP - 1); + ut_a(find(latches, SYNC_IBUF_MUTEX) == NULL); + ut_a(find(latches, SYNC_IBUF_PESS_INSERT_MUTEX) == NULL); + break; + + case SYNC_DICT: + basic_check(latches, level, SYNC_DICT); + break; + + case SYNC_MUTEX: + case SYNC_UNKNOWN: + case SYNC_LEVEL_VARYING: + case RW_LOCK_X: + case RW_LOCK_X_WAIT: + case RW_LOCK_S: + case RW_LOCK_SX: + case RW_LOCK_NOT_LOCKED: + /* These levels should never be set for a latch. */ + ut_error; + break; + } + + return(latches); +} + +/** Removes a latch from the thread level array if it is found there. +@param[in] latch that was released/unlocked +@param[in] level level of the latch +@return true if found in the array; it is not an error if the latch is +not found, as we presently are not able to determine the level for +every latch reservation the program does */ +void +LatchDebug::unlock(const latch_t* latch) + UNIV_NOTHROW +{ + if (latch->get_level() == SYNC_LEVEL_VARYING) { + // We don't have varying level mutexes + ut_ad(latch->m_rw_lock); + } + + Latches* latches; + + if (*latch->get_name() == '.') { + + /* Ignore diagnostic latches, starting with '.' */ + + } else if ((latches = thread_latches()) != NULL) { + + Latches::reverse_iterator rend = latches->rend(); + + for (Latches::reverse_iterator it = latches->rbegin(); + it != rend; + ++it) { + + if (it->m_latch != latch) { + + continue; + } + + Latches::iterator i = it.base(); + + latches->erase(--i); + + /* If this thread doesn't own any more + latches remove from the map. + + FIXME: Perhaps use the master thread + to do purge. Or, do it from close connection. + This could be expensive. */ + + if (latches->empty()) { + + m_mutex.enter(); + + os_thread_id_t thread_id; + + thread_id = os_thread_get_curr_id(); + + m_threads.erase(thread_id); + + m_mutex.exit(); + + UT_DELETE(latches); + } + + return; + } + + if (latch->get_level() != SYNC_LEVEL_VARYING) { + ib::error() + << "Couldn't find latch " + << sync_latch_get_name(latch->get_id()); + + print_latches(latches); + + /** Must find the latch. */ + ut_error; + } + } +} + +/** Get the latch id from a latch name. +@param[in] name Latch name +@return latch id if found else LATCH_ID_NONE. */ +latch_id_t +sync_latch_get_id(const char* name) +{ + LatchMetaData::const_iterator end = latch_meta.end(); + + /* Linear scan should be OK, this should be extremely rare. */ + + for (LatchMetaData::const_iterator it = latch_meta.begin(); + it != end; + ++it) { + + if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) { + + continue; + + } else if (strcmp((*it)->get_name(), name) == 0) { + + return((*it)->get_id()); + } + } + + return(LATCH_ID_NONE); +} + +/** Get the latch name from a sync level +@param[in] level Latch level to lookup +@return NULL if not found. */ +const char* +sync_latch_get_name(latch_level_t level) +{ + LatchMetaData::const_iterator end = latch_meta.end(); + + /* Linear scan should be OK, this should be extremely rare. */ + + for (LatchMetaData::const_iterator it = latch_meta.begin(); + it != end; + ++it) { + + if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) { + + continue; + + } else if ((*it)->get_level() == level) { + + return((*it)->get_name()); + } + } + + return(0); +} + +/** Check if it is OK to acquire the latch. +@param[in] latch latch type */ +void +sync_check_lock_validate(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->lock_validate( + latch, latch->get_level()); + } +} + +/** Note that the lock has been granted +@param[in] latch latch type */ +void +sync_check_lock_granted(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->lock_granted(latch, latch->get_level()); + } +} + +/** Check if it is OK to acquire the latch. +@param[in] latch latch type +@param[in] level Latch level */ +void +sync_check_lock( + const latch_t* latch, + latch_level_t level) +{ + if (LatchDebug::instance() != NULL) { + + ut_ad(latch->get_level() == SYNC_LEVEL_VARYING); + ut_ad(latch->get_id() == LATCH_ID_BUF_BLOCK_LOCK); + + LatchDebug::instance()->lock_validate(latch, level); + LatchDebug::instance()->lock_granted(latch, level); + } +} + +/** Check if it is OK to re-acquire the lock. +@param[in] latch RW-LOCK to relock (recursive X locks) */ +void +sync_check_relock(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->relock(latch); + } +} + +/** Removes a latch from the thread level array if it is found there. +@param[in] latch The latch to unlock */ +void +sync_check_unlock(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->unlock(latch); + } +} + +/** Checks if the level array for the current thread contains a +mutex or rw-latch at the specified level. +@param[in] level to find +@return a matching latch, or NULL if not found */ +const latch_t* +sync_check_find(latch_level_t level) +{ + if (LatchDebug::instance() != NULL) { + return(LatchDebug::instance()->find(level)); + } + + return(NULL); +} + +/** Iterate over the thread's latches. +@param[in,out] functor called for each element. +@return true if the functor returns true for any element */ +bool +sync_check_iterate(const sync_check_functor_t& functor) +{ + if (LatchDebug* debug = LatchDebug::instance()) { + return(debug->for_each(functor)); + } + + return(false); +} + +/** Enable sync order checking. + +Note: We don't enforce any synchronisation checks. The caller must ensure +that no races can occur */ +static void sync_check_enable() +{ + if (!srv_sync_debug) { + + return; + } + + /* We should always call this before we create threads. */ + + LatchDebug::create_instance(); +} + +/** Initialise the debug data structures */ +void +LatchDebug::init() + UNIV_NOTHROW +{ + mutex_create(LATCH_ID_RW_LOCK_DEBUG, &rw_lock_debug_mutex); +} + +/** Shutdown the latch debug checking + +Note: We don't enforce any synchronisation checks. The caller must ensure +that no races can occur */ +void +LatchDebug::shutdown() + UNIV_NOTHROW +{ + mutex_free(&rw_lock_debug_mutex); + + ut_a(s_initialized); + + s_initialized = false; + + UT_DELETE(s_instance); + + LatchDebug::s_instance = NULL; +} + +/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync, +because the debug mutex is also acquired in sync0arr while holding the OS +mutex protecting the sync array, and the ordinary mutex_enter might +recursively call routines in sync0arr, leading to a deadlock on the OS +mutex. */ +void +rw_lock_debug_mutex_enter() +{ + mutex_enter(&rw_lock_debug_mutex); +} + +/** Releases the debug mutex. */ +void +rw_lock_debug_mutex_exit() +{ + mutex_exit(&rw_lock_debug_mutex); +} +#endif /* UNIV_DEBUG */ + +/* Meta data for all the InnoDB latches. If the latch is not in recorded +here then it will be be considered for deadlock checks. */ +LatchMetaData latch_meta; + +/** Load the latch meta data. */ +static +void +sync_latch_meta_init() + UNIV_NOTHROW +{ + latch_meta.resize(LATCH_ID_MAX + 1); + + /* The latches should be ordered on latch_id_t. So that we can + index directly into the vector to update and fetch meta-data. */ + + LATCH_ADD_MUTEX(DICT_FOREIGN_ERR, SYNC_NO_ORDER_CHECK, + dict_foreign_err_mutex_key); + + LATCH_ADD_MUTEX(DICT_SYS, SYNC_DICT, dict_sys_mutex_key); + + LATCH_ADD_MUTEX(FIL_SYSTEM, SYNC_ANY_LATCH, fil_system_mutex_key); + + LATCH_ADD_MUTEX(FTS_DELETE, SYNC_FTS_OPTIMIZE, fts_delete_mutex_key); + + LATCH_ADD_MUTEX(FTS_DOC_ID, SYNC_FTS_OPTIMIZE, fts_doc_id_mutex_key); + + LATCH_ADD_MUTEX(FTS_PLL_TOKENIZE, SYNC_FTS_TOKENIZE, + fts_pll_tokenize_mutex_key); + + LATCH_ADD_MUTEX(IBUF_BITMAP, SYNC_IBUF_BITMAP_MUTEX, + ibuf_bitmap_mutex_key); + + LATCH_ADD_MUTEX(IBUF, SYNC_IBUF_MUTEX, ibuf_mutex_key); + + LATCH_ADD_MUTEX(IBUF_PESSIMISTIC_INSERT, SYNC_IBUF_PESS_INSERT_MUTEX, + ibuf_pessimistic_insert_mutex_key); + + LATCH_ADD_MUTEX(PURGE_SYS_PQ, SYNC_PURGE_QUEUE, + purge_sys_pq_mutex_key); + + LATCH_ADD_MUTEX(RECALC_POOL, SYNC_STATS_AUTO_RECALC, + recalc_pool_mutex_key); + + LATCH_ADD_MUTEX(RECV_SYS, SYNC_RECV, recv_sys_mutex_key); + + LATCH_ADD_MUTEX(REDO_RSEG, SYNC_REDO_RSEG, redo_rseg_mutex_key); + + LATCH_ADD_MUTEX(NOREDO_RSEG, SYNC_NOREDO_RSEG, noredo_rseg_mutex_key); + +#ifdef UNIV_DEBUG + /* Mutex names starting with '.' are not tracked. They are assumed + to be diagnostic mutexes used in debugging. */ + latch_meta[LATCH_ID_RW_LOCK_DEBUG] = + LATCH_ADD_MUTEX(RW_LOCK_DEBUG, + SYNC_NO_ORDER_CHECK, + rw_lock_debug_mutex_key); +#endif /* UNIV_DEBUG */ + + LATCH_ADD_MUTEX(RTR_ACTIVE_MUTEX, SYNC_ANY_LATCH, + rtr_active_mutex_key); + + LATCH_ADD_MUTEX(RTR_MATCH_MUTEX, SYNC_ANY_LATCH, rtr_match_mutex_key); + + LATCH_ADD_MUTEX(RTR_PATH_MUTEX, SYNC_ANY_LATCH, rtr_path_mutex_key); + + LATCH_ADD_MUTEX(RW_LOCK_LIST, SYNC_NO_ORDER_CHECK, + rw_lock_list_mutex_key); + + LATCH_ADD_MUTEX(SRV_INNODB_MONITOR, SYNC_NO_ORDER_CHECK, + srv_innodb_monitor_mutex_key); + + LATCH_ADD_MUTEX(SRV_MISC_TMPFILE, SYNC_ANY_LATCH, + srv_misc_tmpfile_mutex_key); + + LATCH_ADD_MUTEX(SRV_MONITOR_FILE, SYNC_NO_ORDER_CHECK, + srv_monitor_file_mutex_key); + + LATCH_ADD_MUTEX(TRX_POOL, SYNC_POOL, trx_pool_mutex_key); + + LATCH_ADD_MUTEX(TRX_POOL_MANAGER, SYNC_POOL_MANAGER, + trx_pool_manager_mutex_key); + + LATCH_ADD_MUTEX(TRX, SYNC_TRX, trx_mutex_key); + + LATCH_ADD_MUTEX(LOCK_SYS, SYNC_LOCK_SYS, lock_mutex_key); + + LATCH_ADD_MUTEX(LOCK_SYS_WAIT, SYNC_LOCK_WAIT_SYS, + lock_wait_mutex_key); + + LATCH_ADD_MUTEX(TRX_SYS, SYNC_TRX_SYS, trx_sys_mutex_key); + + LATCH_ADD_MUTEX(SRV_SYS_TASKS, SYNC_ANY_LATCH, srv_threads_mutex_key); + + LATCH_ADD_MUTEX(PAGE_ZIP_STAT_PER_INDEX, SYNC_ANY_LATCH, + page_zip_stat_per_index_mutex_key); + + LATCH_ADD_MUTEX(SYNC_ARRAY_MUTEX, SYNC_NO_ORDER_CHECK, + sync_array_mutex_key); + + LATCH_ADD_MUTEX(ROW_DROP_LIST, SYNC_NO_ORDER_CHECK, + row_drop_list_mutex_key); + + LATCH_ADD_MUTEX(INDEX_ONLINE_LOG, SYNC_INDEX_ONLINE_LOG, + index_online_log_key); + + LATCH_ADD_MUTEX(WORK_QUEUE, SYNC_WORK_QUEUE, PFS_NOT_INSTRUMENTED); + + // Add the RW locks + LATCH_ADD_RWLOCK(BTR_SEARCH, SYNC_SEARCH_SYS, btr_search_latch_key); + + LATCH_ADD_RWLOCK(BUF_BLOCK_LOCK, SYNC_LEVEL_VARYING, + PFS_NOT_INSTRUMENTED); + +#ifdef UNIV_DEBUG + LATCH_ADD_RWLOCK(BUF_BLOCK_DEBUG, SYNC_LEVEL_VARYING, + PFS_NOT_INSTRUMENTED); +#endif /* UNIV_DEBUG */ + + LATCH_ADD_RWLOCK(DICT_OPERATION, SYNC_DICT_OPERATION, + dict_operation_lock_key); + + LATCH_ADD_RWLOCK(FIL_SPACE, SYNC_FSP, fil_space_latch_key); + + LATCH_ADD_RWLOCK(FTS_CACHE, SYNC_FTS_CACHE, fts_cache_rw_lock_key); + + LATCH_ADD_RWLOCK(FTS_CACHE_INIT, SYNC_FTS_CACHE_INIT, + fts_cache_init_rw_lock_key); + + LATCH_ADD_RWLOCK(TRX_I_S_CACHE, SYNC_TRX_I_S_RWLOCK, + trx_i_s_cache_lock_key); + + LATCH_ADD_RWLOCK(TRX_PURGE, SYNC_PURGE_LATCH, trx_purge_latch_key); + + LATCH_ADD_RWLOCK(IBUF_INDEX_TREE, SYNC_IBUF_INDEX_TREE, + index_tree_rw_lock_key); + + LATCH_ADD_RWLOCK(INDEX_TREE, SYNC_INDEX_TREE, index_tree_rw_lock_key); + + /* JAN: TODO: Add PFS instrumentation */ + LATCH_ADD_MUTEX(DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK, + PFS_NOT_INSTRUMENTED); + LATCH_ADD_MUTEX(BTR_DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK, + PFS_NOT_INSTRUMENTED); + LATCH_ADD_MUTEX(FIL_CRYPT_STAT_MUTEX, SYNC_NO_ORDER_CHECK, + PFS_NOT_INSTRUMENTED); + LATCH_ADD_MUTEX(FIL_CRYPT_DATA_MUTEX, SYNC_NO_ORDER_CHECK, + PFS_NOT_INSTRUMENTED); + LATCH_ADD_MUTEX(FIL_CRYPT_THREADS_MUTEX, SYNC_NO_ORDER_CHECK, + PFS_NOT_INSTRUMENTED); + LATCH_ADD_MUTEX(RW_TRX_HASH_ELEMENT, SYNC_RW_TRX_HASH_ELEMENT, + rw_trx_hash_element_mutex_key); + LATCH_ADD_MUTEX(READ_VIEW, SYNC_READ_VIEW, read_view_mutex_key); + + latch_id_t id = LATCH_ID_NONE; + + /* The array should be ordered on latch ID.We need to + index directly into it from the mutex policy to update + the counters and access the meta-data. */ + + for (LatchMetaData::iterator it = latch_meta.begin(); + it != latch_meta.end(); + ++it) { + + const latch_meta_t* meta = *it; + + + /* Skip blank entries */ + if (meta == NULL || meta->get_id() == LATCH_ID_NONE) { + continue; + } + + ut_a(id < meta->get_id()); + + id = meta->get_id(); + } +} + +/** Destroy the latch meta data */ +static +void +sync_latch_meta_destroy() +{ + for (LatchMetaData::iterator it = latch_meta.begin(); + it != latch_meta.end(); + ++it) { + + UT_DELETE(*it); + } + + latch_meta.clear(); +} + +/** Initializes the synchronization data structures. */ +void +sync_check_init() +{ + ut_ad(!LatchDebug::s_initialized); + ut_d(LatchDebug::s_initialized = true); + + sync_latch_meta_init(); + + /* create the mutex to protect rw_lock list. */ + + mutex_create(LATCH_ID_RW_LOCK_LIST, &rw_lock_list_mutex); + + ut_d(LatchDebug::init()); + + sync_array_init(); + + ut_d(sync_check_enable()); +} + +/** Free the InnoDB synchronization data structures. */ +void +sync_check_close() +{ + ut_d(LatchDebug::shutdown()); + + mutex_free(&rw_lock_list_mutex); + + sync_array_close(); + + sync_latch_meta_destroy(); +} + diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc new file mode 100644 index 00000000..2624ffb9 --- /dev/null +++ b/storage/innobase/sync/sync0rw.cc @@ -0,0 +1,1216 @@ +/***************************************************************************** + +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2008, Google Inc. +Copyright (c) 2017, 2020, MariaDB Corporation. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file sync/sync0rw.cc +The read-write lock (for thread synchronization) + +Created 9/11/1995 Heikki Tuuri +*******************************************************/ + +#include "sync0rw.h" +#include "my_cpu.h" +#include <my_sys.h> + +/* + IMPLEMENTATION OF THE RW_LOCK + ============================= +The status of a rw_lock is held in lock_word. The initial value of lock_word is +X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR +or 1 for each x-lock. This describes the lock state for each value of lock_word: + +lock_word == X_LOCK_DECR: Unlocked. +X_LOCK_HALF_DECR < lock_word < X_LOCK_DECR: + S locked, no waiting writers. + (X_LOCK_DECR - lock_word) is the number + of S locks. +lock_word == X_LOCK_HALF_DECR: SX locked, no waiting writers. +0 < lock_word < X_LOCK_HALF_DECR: + SX locked AND S locked, no waiting writers. + (X_LOCK_HALF_DECR - lock_word) is the number + of S locks. +lock_word == 0: X locked, no waiting writers. +-X_LOCK_HALF_DECR < lock_word < 0: + S locked, with a waiting writer. + (-lock_word) is the number of S locks. +lock_word == -X_LOCK_HALF_DECR: X locked and SX locked, no waiting writers. +-X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR: + S locked, with a waiting writer + which has SX lock. + -(lock_word + X_LOCK_HALF_DECR) is the number + of S locks. +lock_word == -X_LOCK_DECR: X locked with recursive X lock (2 X locks). +-(X_LOCK_DECR + X_LOCK_HALF_DECR) < lock_word < -X_LOCK_DECR: + X locked. The number of the X locks is: + 2 - (lock_word + X_LOCK_DECR) +lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR): + X locked with recursive X lock (2 X locks) + and SX locked. +lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR): + X locked and SX locked. + The number of the X locks is: + 2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR) + + LOCK COMPATIBILITY MATRIX + + | S|SX| X| + --+--+--+--+ + S| +| +| -| + --+--+--+--+ + SX| +| -| -| + --+--+--+--+ + X| -| -| -| + --+--+--+--+ + +The lock_word is always read and updated atomically and consistently, so that +it always represents the state of the lock, and the state of the lock changes +with a single atomic operation. This lock_word holds all of the information +that a thread needs in order to determine if it is eligible to gain the lock +or if it must spin or sleep. The one exception to this is that writer_thread +must be verified before recursive write locks: to solve this scenario, we make +writer_thread readable by all threads, but only writeable by the x-lock or +sx-lock holder. + +The other members of the lock obey the following rules to remain consistent: + +writer_thread: Is used only in recursive x-locking or sx-locking. + This field is 0 at lock creation time and is updated + when x-lock is acquired or when move_ownership is called. + A thread is only allowed to set the value of this field to + it's thread_id i.e.: a thread cannot set writer_thread to + some other thread's id. +waiters: May be set to 1 anytime, but to avoid unnecessary wake-up + signals, it should only be set to 1 when there are threads + waiting on event. Must be 1 when a writer starts waiting to + ensure the current x-locking thread sends a wake-up signal + during unlock. May only be reset to 0 immediately before a + a wake-up signal is sent to event. On most platforms, a + memory barrier is required after waiters is set, and before + verifying lock_word is still held, to ensure some unlocker + really does see the flags new value. +event: Threads wait on event for read or writer lock when another + thread has an x-lock or an x-lock reservation (wait_ex). A + thread may only wait on event after performing the following + actions in order: + (1) Record the counter value of event (with os_event_reset). + (2) Set waiters to 1. + (3) Verify lock_word <= 0. + (1) must come before (2) to ensure signal is not missed. + (2) must come before (3) to ensure a signal is sent. + These restrictions force the above ordering. + Immediately before sending the wake-up signal, we should: + (1) Verify lock_word == X_LOCK_DECR (unlocked) + (2) Reset waiters to 0. +wait_ex_event: A thread may only wait on the wait_ex_event after it has + performed the following actions in order: + (1) Decrement lock_word by X_LOCK_DECR. + (2) Record counter value of wait_ex_event (os_event_reset, + called from sync_array_reserve_cell). + (3) Verify that lock_word < 0. + (1) must come first to ensures no other threads become reader + or next writer, and notifies unlocker that signal must be sent. + (2) must come before (3) to ensure the signal is not missed. + These restrictions force the above ordering. + Immediately before sending the wake-up signal, we should: + Verify lock_word == 0 (waiting thread holds x_lock) +*/ + +rw_lock_stats_t rw_lock_stats; + +/* The global list of rw-locks */ +ilist<rw_lock_t> rw_lock_list; +ib_mutex_t rw_lock_list_mutex; + +#ifdef UNIV_DEBUG +/******************************************************************//** +Creates a debug info struct. */ +static +rw_lock_debug_t* +rw_lock_debug_create(void); +/*======================*/ +/******************************************************************//** +Frees a debug info struct. */ +static +void +rw_lock_debug_free( +/*===============*/ + rw_lock_debug_t* info); + +/******************************************************************//** +Creates a debug info struct. +@return own: debug info struct */ +static +rw_lock_debug_t* +rw_lock_debug_create(void) +/*======================*/ +{ + return((rw_lock_debug_t*) ut_malloc_nokey(sizeof(rw_lock_debug_t))); +} + +/******************************************************************//** +Frees a debug info struct. */ +static +void +rw_lock_debug_free( +/*===============*/ + rw_lock_debug_t* info) +{ + ut_free(info); +} +#endif /* UNIV_DEBUG */ + +/******************************************************************//** +Creates, or rather, initializes an rw-lock object in a specified memory +location (which must be appropriately aligned). The rw-lock is initialized +to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free +is necessary only if the memory block containing it is freed. */ +void +rw_lock_create_func( +/*================*/ + rw_lock_t* lock, /*!< in: pointer to memory */ +#ifdef UNIV_DEBUG + latch_level_t level, /*!< in: level */ +#endif /* UNIV_DEBUG */ + const char* cfile_name, /*!< in: file name where created */ + unsigned cline) /*!< in: file line where created */ +{ +#if defined(UNIV_DEBUG) && !defined(UNIV_PFS_RWLOCK) + /* It should have been created in pfs_rw_lock_create_func() */ + new(lock) rw_lock_t(); +#endif /* UNIV_DEBUG */ + + lock->lock_word = X_LOCK_DECR; + lock->waiters = 0; + + lock->sx_recursive = 0; + lock->writer_thread= 0; + +#ifdef UNIV_DEBUG + lock->m_rw_lock = true; + + UT_LIST_INIT(lock->debug_list, &rw_lock_debug_t::list); + + lock->m_id = sync_latch_get_id(sync_latch_get_name(level)); + ut_a(lock->m_id != LATCH_ID_NONE); + + lock->level = level; +#endif /* UNIV_DEBUG */ + + lock->cfile_name = cfile_name; + + /* This should hold in practice. If it doesn't then we need to + split the source file anyway. Or create the locks on lines + less than 8192. cline is unsigned:13. */ + ut_ad(cline <= ((1U << 13) - 1)); + lock->cline = cline & ((1U << 13) - 1); + lock->count_os_wait = 0; + lock->last_x_file_name = "not yet reserved"; + lock->last_x_line = 0; + lock->event = os_event_create(0); + lock->wait_ex_event = os_event_create(0); + + lock->is_block_lock = 0; + + ut_d(lock->created = true); + + mutex_enter(&rw_lock_list_mutex); + rw_lock_list.push_front(*lock); + mutex_exit(&rw_lock_list_mutex); +} + +/******************************************************************//** +Calling this function is obligatory only if the memory buffer containing +the rw-lock is freed. Removes an rw-lock object from the global list. The +rw-lock is checked to be in the non-locked state. */ +void +rw_lock_free_func( +/*==============*/ + rw_lock_t* lock) /*!< in/out: rw-lock */ +{ + ut_ad(rw_lock_validate(lock)); + ut_a(lock->lock_word == X_LOCK_DECR); + + ut_d(lock->created = false); + + mutex_enter(&rw_lock_list_mutex); + + os_event_destroy(lock->event); + + os_event_destroy(lock->wait_ex_event); + + rw_lock_list.remove(*lock); + + mutex_exit(&rw_lock_list_mutex); +} + +/******************************************************************//** +Lock an rw-lock in shared mode for the current thread. If the rw-lock is +locked in exclusive mode, or there is an exclusive lock request waiting, +the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting +for the lock, before suspending the thread. */ +void +rw_lock_s_lock_spin( +/*================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock + will be passed to another thread to unlock */ + const char* file_name, /*!< in: file name where lock requested */ + unsigned line) /*!< in: line where requested */ +{ + ulint i = 0; /* spin round count */ + sync_array_t* sync_arr; + lint spin_count = 0; + int64_t count_os_wait = 0; + + /* We reuse the thread id to index into the counter, cache + it here for efficiency. */ + + ut_ad(rw_lock_validate(lock)); + + rw_lock_stats.rw_s_spin_wait_count.inc(); + +lock_loop: + + /* Spin waiting for the writer field to become free */ + HMT_low(); + ulint j = i; + while (i < srv_n_spin_wait_rounds && + lock->lock_word <= 0) { + ut_delay(srv_spin_wait_delay); + i++; + } + + HMT_medium(); + if (i >= srv_n_spin_wait_rounds) { + os_thread_yield(); + } + + spin_count += lint(i - j); + + /* We try once again to obtain the lock */ + if (rw_lock_s_lock_low(lock, pass, file_name, line)) { + + if (count_os_wait > 0) { + lock->count_os_wait += + static_cast<uint32_t>(count_os_wait); + rw_lock_stats.rw_s_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_s_spin_round_count.add(spin_count); + + return; /* Success */ + } else { + + if (i < srv_n_spin_wait_rounds) { + goto lock_loop; + } + + + ++count_os_wait; + + sync_cell_t* cell; + + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_S, file_name, line, &cell); + + /* Set waiters before checking lock_word to ensure wake-up + signal is sent. This may lead to some unnecessary signals. */ + lock->waiters.exchange(1, std::memory_order_acquire); + + if (rw_lock_s_lock_low(lock, pass, file_name, line)) { + + sync_array_free_cell(sync_arr, cell); + + if (count_os_wait > 0) { + + lock->count_os_wait += + static_cast<uint32_t>(count_os_wait); + + rw_lock_stats.rw_s_os_wait_count.add( + count_os_wait); + } + + rw_lock_stats.rw_s_spin_round_count.add(spin_count); + + return; /* Success */ + } + + /* see comments in trx_commit_low() to + before_trx_state_committed_in_memory explaining + this care to invoke the following sync check.*/ +#ifndef DBUG_OFF +#ifdef UNIV_DEBUG + if (lock->get_level() != SYNC_DICT_OPERATION) { + DEBUG_SYNC_C("rw_s_lock_waiting"); + } +#endif +#endif + sync_array_wait_event(sync_arr, cell); + + i = 0; + + goto lock_loop; + } +} + +/******************************************************************//** +This function is used in the insert buffer to move the ownership of an +x-latch on a buffer frame to the current thread. The x-latch was set by +the buffer read operation and it protected the buffer frame while the +read was done. The ownership is moved because we want that the current +thread is able to acquire a second x-latch which is stored in an mtr. +This, in turn, is needed to pass the debug checks of index page +operations. */ +void +rw_lock_x_lock_move_ownership( +/*==========================*/ + rw_lock_t* lock) /*!< in: lock which was x-locked in the + buffer read */ +{ + ut_ad(rw_lock_is_locked(lock, RW_LOCK_X)); + + lock->writer_thread = os_thread_get_curr_id(); +} + +/******************************************************************//** +Function for the next writer to call. Waits for readers to exit. +The caller must have already decremented lock_word by X_LOCK_DECR. */ +UNIV_INLINE +void +rw_lock_x_lock_wait_func( +/*=====================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ +#ifdef UNIV_DEBUG + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ +#endif + lint threshold,/*!< in: threshold to wait for */ + const char* file_name,/*!< in: file name where lock requested */ + unsigned line) /*!< in: line where requested */ +{ + ulint i = 0; + lint n_spins = 0; + sync_array_t* sync_arr; + int64_t count_os_wait = 0; + + ut_ad(lock->lock_word <= threshold); + + HMT_low(); + while (lock->lock_word < threshold) { + ut_delay(srv_spin_wait_delay); + + if (i < srv_n_spin_wait_rounds) { + i++; + continue; + } + + /* If there is still a reader, then go to sleep.*/ + n_spins += i; + + sync_cell_t* cell; + + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_X_WAIT, file_name, line, &cell); + + i = 0; + + /* Check lock_word to ensure wake-up isn't missed.*/ + if (lock->lock_word < threshold) { + ++count_os_wait; + + /* Add debug info as it is needed to detect possible + deadlock. We must add info for WAIT_EX thread for + deadlock detection to work properly. */ + ut_d(rw_lock_add_debug_info( + lock, pass, RW_LOCK_X_WAIT, + file_name, line)); + + sync_array_wait_event(sync_arr, cell); + + ut_d(rw_lock_remove_debug_info( + lock, pass, RW_LOCK_X_WAIT)); + + /* It is possible to wake when lock_word < 0. + We must pass the while-loop check to proceed.*/ + + } else { + sync_array_free_cell(sync_arr, cell); + break; + } + } + HMT_medium(); + rw_lock_stats.rw_x_spin_round_count.add(n_spins); + + if (count_os_wait > 0) { + lock->count_os_wait += static_cast<uint32_t>(count_os_wait); + rw_lock_stats.rw_x_os_wait_count.add(count_os_wait); + } +} + +#ifdef UNIV_DEBUG +# define rw_lock_x_lock_wait(L, P, T, F, O) \ + rw_lock_x_lock_wait_func(L, P, T, F, O) +#else +# define rw_lock_x_lock_wait(L, P, T, F, O) \ + rw_lock_x_lock_wait_func(L, T, F, O) +#endif /* UNIV_DBEUG */ + +/******************************************************************//** +Low-level function for acquiring an exclusive lock. +@return FALSE if did not succeed, TRUE if success. */ +UNIV_INLINE +ibool +rw_lock_x_lock_low( +/*===============*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + unsigned line) /*!< in: line where requested */ +{ + if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, X_LOCK_HALF_DECR)) { + + /* As we are going to write our own thread id in that field it + must be that the current writer_thread value is not active. */ + ut_a(!lock->writer_thread); + + /* Decrement occurred: we are writer or next-writer. */ + if (!pass) + { + lock->writer_thread = os_thread_get_curr_id(); + } + + rw_lock_x_lock_wait(lock, pass, 0, file_name, line); + + } else { + os_thread_id_t thread_id = os_thread_get_curr_id(); + + /* Decrement failed: An X or SX lock is held by either + this thread or another. Try to relock. */ + if (!pass && os_thread_eq(lock->writer_thread, thread_id)) { + /* Other s-locks can be allowed. If it is request x + recursively while holding sx lock, this x lock should + be along with the latching-order. */ + + /* The existing X or SX lock is from this thread */ + if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, 0)) { + /* There is at least one SX-lock from this + thread, but no X-lock. */ + + /* Wait for any the other S-locks to be + released. */ + rw_lock_x_lock_wait( + lock, pass, -X_LOCK_HALF_DECR, + file_name, line); + + } else { + int32_t lock_word = lock->lock_word; + /* At least one X lock by this thread already + exists. Add another. */ + if (lock_word == 0 + || lock_word == -X_LOCK_HALF_DECR) { + lock->lock_word.fetch_sub(X_LOCK_DECR); + } else { + ut_ad(lock_word <= -X_LOCK_DECR); + lock->lock_word.fetch_sub(1); + } + } + + } else { + /* Another thread locked before us */ + return(FALSE); + } + } + + ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_X, file_name, line)); + + lock->last_x_file_name = file_name; + lock->last_x_line = line & ((1U << 14) - 1); + + return(TRUE); +} + +/******************************************************************//** +Low-level function for acquiring an sx lock. +@return FALSE if did not succeed, TRUE if success. */ +ibool +rw_lock_sx_lock_low( +/*================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + unsigned line) /*!< in: line where requested */ +{ + if (rw_lock_lock_word_decr(lock, X_LOCK_HALF_DECR, X_LOCK_HALF_DECR)) { + + /* As we are going to write our own thread id in that field it + must be that the current writer_thread value is not active. */ + ut_a(!lock->writer_thread); + + /* Decrement occurred: we are the SX lock owner. */ + if (!pass) + { + lock->writer_thread = os_thread_get_curr_id(); + } + + lock->sx_recursive = 1; + } else { + os_thread_id_t thread_id = os_thread_get_curr_id(); + + /* Decrement failed: It already has an X or SX lock by this + thread or another thread. If it is this thread, relock, + else fail. */ + if (!pass && os_thread_eq(lock->writer_thread, thread_id)) { + /* This thread owns an X or SX lock */ + if (lock->sx_recursive++ == 0) { + /* This thread is making first SX-lock request + and it must be holding at least one X-lock here + because: + + * There can't be a WAIT_EX thread because we are + the thread which has it's thread_id written in + the writer_thread field and we are not waiting. + + * Any other X-lock thread cannot exist because + it must update recursive flag only after + updating the thread_id. Had there been + a concurrent X-locking thread which succeeded + in decrementing the lock_word it must have + written it's thread_id before setting the + recursive flag. As we cleared the if() + condition above therefore we must be the only + thread working on this lock and it is safe to + read and write to the lock_word. */ + +#ifdef UNIV_DEBUG + auto lock_word = +#endif + lock->lock_word.fetch_sub(X_LOCK_HALF_DECR, + std::memory_order_relaxed); + + ut_ad((lock_word == 0) + || ((lock_word <= -X_LOCK_DECR) + && (lock_word + > -(X_LOCK_DECR + + X_LOCK_HALF_DECR)))); + } + } else { + /* Another thread locked before us */ + return(FALSE); + } + } + + ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_SX, file_name, line)); + + lock->last_x_file_name = file_name; + lock->last_x_line = line & ((1U << 14) - 1); + + return(TRUE); +} + +/******************************************************************//** +NOTE! Use the corresponding macro, not directly this function! Lock an +rw-lock in exclusive mode for the current thread. If the rw-lock is locked +in shared or exclusive mode, or there is an exclusive lock request waiting, +the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting +for the lock before suspending the thread. If the same thread has an x-lock +on the rw-lock, locking succeed, with the following exception: if pass != 0, +only a single x-lock may be taken on the lock. NOTE: If the same thread has +an s-lock, locking does not succeed! */ +void +rw_lock_x_lock_func( +/*================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + unsigned line) /*!< in: line where requested */ +{ + ulint i = 0; + sync_array_t* sync_arr; + lint spin_count = 0; + int64_t count_os_wait = 0; + + ut_ad(rw_lock_validate(lock)); + ut_ad(!rw_lock_own(lock, RW_LOCK_S)); + + if (rw_lock_x_lock_low(lock, pass, file_name, line)) { + /* Locking succeeded */ + return; + } + rw_lock_stats.rw_x_spin_wait_count.inc(); + +lock_loop: + + if (rw_lock_x_lock_low(lock, pass, file_name, line)) { + + if (count_os_wait > 0) { + lock->count_os_wait += + static_cast<uint32_t>(count_os_wait); + rw_lock_stats.rw_x_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_x_spin_round_count.add(spin_count); + + /* Locking succeeded */ + return; + + } else { + + /* Spin waiting for the lock_word to become free */ + HMT_low(); + ulint j = i; + while (i < srv_n_spin_wait_rounds + && lock->lock_word <= X_LOCK_HALF_DECR) { + ut_delay(srv_spin_wait_delay); + i++; + } + + HMT_medium(); + spin_count += lint(i - j); + + if (i >= srv_n_spin_wait_rounds) { + + os_thread_yield(); + + } else { + + goto lock_loop; + } + } + + sync_cell_t* cell; + + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_X, file_name, line, &cell); + + /* Waiters must be set before checking lock_word, to ensure signal + is sent. This could lead to a few unnecessary wake-up signals. */ + lock->waiters.exchange(1, std::memory_order_acquire); + + if (rw_lock_x_lock_low(lock, pass, file_name, line)) { + sync_array_free_cell(sync_arr, cell); + + if (count_os_wait > 0) { + lock->count_os_wait += + static_cast<uint32_t>(count_os_wait); + rw_lock_stats.rw_x_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_x_spin_round_count.add(spin_count); + + /* Locking succeeded */ + return; + } + + ++count_os_wait; + + sync_array_wait_event(sync_arr, cell); + + i = 0; + + goto lock_loop; +} + +/******************************************************************//** +NOTE! Use the corresponding macro, not directly this function! Lock an +rw-lock in SX mode for the current thread. If the rw-lock is locked +in exclusive mode, or there is an exclusive lock request waiting, +the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting +for the lock, before suspending the thread. If the same thread has an x-lock +on the rw-lock, locking succeed, with the following exception: if pass != 0, +only a single sx-lock may be taken on the lock. NOTE: If the same thread has +an s-lock, locking does not succeed! */ +void +rw_lock_sx_lock_func( +/*=================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + unsigned line) /*!< in: line where requested */ + +{ + ulint i = 0; + sync_array_t* sync_arr; + lint spin_count = 0; + int64_t count_os_wait = 0; + + ut_ad(rw_lock_validate(lock)); + ut_ad(!rw_lock_own(lock, RW_LOCK_S)); + + if (rw_lock_sx_lock_low(lock, pass, file_name, line)) { + /* Locking succeeded */ + return; + } + + rw_lock_stats.rw_sx_spin_wait_count.inc(); + +lock_loop: + + if (rw_lock_sx_lock_low(lock, pass, file_name, line)) { + + if (count_os_wait > 0) { + lock->count_os_wait += + static_cast<uint32_t>(count_os_wait); + rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_sx_spin_round_count.add(spin_count); + + /* Locking succeeded */ + return; + + } else { + + /* Spin waiting for the lock_word to become free */ + ulint j = i; + while (i < srv_n_spin_wait_rounds + && lock->lock_word <= X_LOCK_HALF_DECR) { + ut_delay(srv_spin_wait_delay); + i++; + } + + spin_count += lint(i - j); + + if (i >= srv_n_spin_wait_rounds) { + + os_thread_yield(); + + } else { + + goto lock_loop; + } + } + + sync_cell_t* cell; + + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_SX, file_name, line, &cell); + + /* Waiters must be set before checking lock_word, to ensure signal + is sent. This could lead to a few unnecessary wake-up signals. */ + lock->waiters.exchange(1, std::memory_order_acquire); + + if (rw_lock_sx_lock_low(lock, pass, file_name, line)) { + + sync_array_free_cell(sync_arr, cell); + + if (count_os_wait > 0) { + lock->count_os_wait += + static_cast<uint32_t>(count_os_wait); + rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_sx_spin_round_count.add(spin_count); + + /* Locking succeeded */ + return; + } + + ++count_os_wait; + + sync_array_wait_event(sync_arr, cell); + + i = 0; + + goto lock_loop; +} + +#ifdef UNIV_DEBUG + +/******************************************************************//** +Checks that the rw-lock has been initialized and that there are no +simultaneous shared and exclusive locks. +@return true */ +bool +rw_lock_validate( +/*=============*/ + const rw_lock_t* lock) /*!< in: rw-lock */ +{ + ut_ad(lock); + + ut_ad(lock->created); + + int32_t lock_word = lock->lock_word; + + ut_ad(lock->waiters < 2); + ut_ad(lock_word > -(2 * X_LOCK_DECR)); + ut_ad(lock_word <= X_LOCK_DECR); + + return(true); +} + +/******************************************************************//** +Checks if somebody has locked the rw-lock in the specified mode. +@return true if locked */ +bool +rw_lock_is_locked( +/*==============*/ + rw_lock_t* lock, /*!< in: rw-lock */ + ulint lock_type) /*!< in: lock type: RW_LOCK_S, + RW_LOCK_X or RW_LOCK_SX */ +{ + ut_ad(rw_lock_validate(lock)); + + switch (lock_type) { + case RW_LOCK_S: + return(rw_lock_get_reader_count(lock) > 0); + + case RW_LOCK_X: + return(rw_lock_get_writer(lock) == RW_LOCK_X); + + case RW_LOCK_SX: + return(rw_lock_get_sx_lock_count(lock) > 0); + + default: + ut_error; + } + return(false); /* avoid compiler warnings */ +} + +/******************************************************************//** +Inserts the debug information for an rw-lock. */ +void +rw_lock_add_debug_info( +/*===================*/ + rw_lock_t* lock, /*!< in: rw-lock */ + ulint pass, /*!< in: pass value */ + ulint lock_type, /*!< in: lock type */ + const char* file_name, /*!< in: file where requested */ + unsigned line) /*!< in: line where requested */ +{ + ut_ad(file_name != NULL); + + rw_lock_debug_t* info = rw_lock_debug_create(); + + rw_lock_debug_mutex_enter(); + + info->pass = pass; + info->line = line; + info->lock_type = lock_type; + info->file_name = file_name; + info->thread_id = os_thread_get_curr_id(); + + UT_LIST_ADD_FIRST(lock->debug_list, info); + + rw_lock_debug_mutex_exit(); + + if (pass == 0 && lock_type != RW_LOCK_X_WAIT) { + int32_t lock_word = lock->lock_word; + + /* Recursive x while holding SX + (lock_type == RW_LOCK_X && lock_word == -X_LOCK_HALF_DECR) + is treated as not-relock (new lock). */ + + if ((lock_type == RW_LOCK_X + && lock_word < -X_LOCK_HALF_DECR) + || (lock_type == RW_LOCK_SX + && (lock_word < 0 || lock->sx_recursive == 1))) { + + sync_check_lock_validate(lock); + sync_check_lock_granted(lock); + } else { + sync_check_relock(lock); + } + } +} + +/******************************************************************//** +Removes a debug information struct for an rw-lock. */ +void +rw_lock_remove_debug_info( +/*======================*/ + rw_lock_t* lock, /*!< in: rw-lock */ + ulint pass, /*!< in: pass value */ + ulint lock_type) /*!< in: lock type */ +{ + rw_lock_debug_t* info; + + ut_ad(lock); + + if (pass == 0 && lock_type != RW_LOCK_X_WAIT) { + sync_check_unlock(lock); + } + + rw_lock_debug_mutex_enter(); + + for (info = UT_LIST_GET_FIRST(lock->debug_list); + info != 0; + info = UT_LIST_GET_NEXT(list, info)) { + + if (pass == info->pass + && (pass != 0 + || os_thread_eq(info->thread_id, + os_thread_get_curr_id())) + && info->lock_type == lock_type) { + + /* Found! */ + UT_LIST_REMOVE(lock->debug_list, info); + + rw_lock_debug_mutex_exit(); + + rw_lock_debug_free(info); + + return; + } + } + + ut_error; +} + +/******************************************************************//** +Checks if the thread has locked the rw-lock in the specified mode, with +the pass value == 0. +@return TRUE if locked */ +bool +rw_lock_own( +/*========*/ + const rw_lock_t*lock, /*!< in: rw-lock */ + ulint lock_type) /*!< in: lock type: RW_LOCK_S, + RW_LOCK_X */ +{ + ut_ad(lock); + ut_ad(rw_lock_validate(lock)); + + const os_thread_id_t thread_id = os_thread_get_curr_id(); + + if (!os_thread_eq(lock->writer_thread, thread_id)) { + } else if (lock_type == RW_LOCK_X && rw_lock_get_x_lock_count(lock)) { + return TRUE; + } else if (lock_type == RW_LOCK_SX && rw_lock_get_sx_lock_count(lock)) { + return TRUE; + } + + rw_lock_debug_mutex_enter(); + + for (const rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { + + if (os_thread_eq(info->thread_id, thread_id) + && info->pass == 0 + && info->lock_type == lock_type) { + + rw_lock_debug_mutex_exit(); + /* Found! */ + + return(true); + } + } + rw_lock_debug_mutex_exit(); + + return(false); +} + +/** Checks if the thread has locked the rw-lock in the specified mode, with +the pass value == 0. +@param[in] lock rw-lock +@param[in] flags specify lock types with OR of the + rw_lock_flag_t values +@return true if locked */ +bool rw_lock_own_flagged(const rw_lock_t* lock, rw_lock_flags_t flags) +{ + ut_ad(rw_lock_validate(lock)); + + const os_thread_id_t thread_id = os_thread_get_curr_id(); + + if (!os_thread_eq(lock->writer_thread, thread_id)) { + } else if ((flags & RW_LOCK_FLAG_X) + && rw_lock_get_x_lock_count(lock)) { + return true; + } else if ((flags & RW_LOCK_FLAG_SX) + && rw_lock_get_sx_lock_count(lock)) { + return true; + } + + rw_lock_debug_mutex_enter(); + + for (rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { + if (!os_thread_eq(info->thread_id, thread_id) + || info->pass) { + continue; + } + + switch (info->lock_type) { + case RW_LOCK_S: + if (!(flags & RW_LOCK_FLAG_S)) { + continue; + } + break; + + case RW_LOCK_X: + if (!(flags & RW_LOCK_FLAG_X)) { + continue; + } + break; + + case RW_LOCK_SX: + if (!(flags & RW_LOCK_FLAG_SX)) { + continue; + } + break; + } + + rw_lock_debug_mutex_exit(); + return true; + } + + rw_lock_debug_mutex_exit(); + return false; +} + +/***************************************************************//** +Prints debug info of currently locked rw-locks. */ +void +rw_lock_list_print_info( +/*====================*/ + FILE* file) /*!< in: file where to print */ +{ + ulint count = 0; + + mutex_enter(&rw_lock_list_mutex); + + fputs("-------------\n" + "RW-LATCH INFO\n" + "-------------\n", file); + + for (const rw_lock_t& lock : rw_lock_list) { + + count++; + + if (lock.lock_word != X_LOCK_DECR) { + + fprintf(file, "RW-LOCK: %p ", (void*) &lock); + + if (int32_t waiters= lock.waiters) { + fprintf(file, " (%d waiters)\n", waiters); + } else { + putc('\n', file); + } + + rw_lock_debug_t* info; + + rw_lock_debug_mutex_enter(); + + for (info = UT_LIST_GET_FIRST(lock.debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { + + rw_lock_debug_print(file, info); + } + + rw_lock_debug_mutex_exit(); + } + } + + fprintf(file, "Total number of rw-locks " ULINTPF "\n", count); + mutex_exit(&rw_lock_list_mutex); +} + +/*********************************************************************//** +Prints info of a debug struct. */ +void +rw_lock_debug_print( +/*================*/ + FILE* f, /*!< in: output stream */ + const rw_lock_debug_t* info) /*!< in: debug struct */ +{ + ulint rwt = info->lock_type; + + fprintf(f, "Locked: thread " ULINTPF " file %s line %u ", + ulint(info->thread_id), + sync_basename(info->file_name), + info->line); + + switch (rwt) { + case RW_LOCK_S: + fputs("S-LOCK", f); + break; + case RW_LOCK_X: + fputs("X-LOCK", f); + break; + case RW_LOCK_SX: + fputs("SX-LOCK", f); + break; + case RW_LOCK_X_WAIT: + fputs("WAIT X-LOCK", f); + break; + default: + ut_error; + } + + if (info->pass != 0) { + fprintf(f, " pass value %lu", (ulong) info->pass); + } + + fprintf(f, "\n"); +} + +/** Print the rw-lock information. +@return the string representation */ +std::string +rw_lock_t::to_string() const +{ + /* Note: For X locks it can be locked form multiple places because + the same thread can call X lock recursively. */ + + std::ostringstream msg; + bool written = false; + + ut_ad(rw_lock_validate(this)); + + msg << "RW-LATCH: " + << "thread id " << os_thread_get_curr_id() + << " addr: " << this + << " Locked from: "; + + rw_lock_debug_mutex_enter(); + + for (rw_lock_debug_t* info = UT_LIST_GET_FIRST(debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { + if (!os_thread_eq(info->thread_id, os_thread_get_curr_id())) { + continue; + } + + if (written) { + msg << ", "; + } + + written = true; + + msg << info->file_name << ":" << info->line; + } + + rw_lock_debug_mutex_exit(); + + return(msg.str()); +} +#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc new file mode 100644 index 00000000..0a6f8bfb --- /dev/null +++ b/storage/innobase/sync/sync0sync.cc @@ -0,0 +1,246 @@ +/***************************************************************************** + +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2020, MariaDB Corporation. +Copyright (c) 2008, Google Inc. +Copyright (c) 2020, MariaDB Corporation. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file sync/sync0sync.cc +Mutex, the basic synchronization primitive + +Created 9/5/1995 Heikki Tuuri +*******************************************************/ + +#include "sync0rw.h" +#include "sync0sync.h" + +#ifdef UNIV_PFS_MUTEX +mysql_pfs_key_t buf_pool_mutex_key; +mysql_pfs_key_t dict_foreign_err_mutex_key; +mysql_pfs_key_t dict_sys_mutex_key; +mysql_pfs_key_t fil_system_mutex_key; +mysql_pfs_key_t flush_list_mutex_key; +mysql_pfs_key_t fts_delete_mutex_key; +mysql_pfs_key_t fts_doc_id_mutex_key; +mysql_pfs_key_t fts_pll_tokenize_mutex_key; +mysql_pfs_key_t ibuf_bitmap_mutex_key; +mysql_pfs_key_t ibuf_mutex_key; +mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; +mysql_pfs_key_t log_sys_mutex_key; +mysql_pfs_key_t log_cmdq_mutex_key; +mysql_pfs_key_t log_flush_order_mutex_key; +mysql_pfs_key_t recalc_pool_mutex_key; +mysql_pfs_key_t purge_sys_pq_mutex_key; +mysql_pfs_key_t recv_sys_mutex_key; +mysql_pfs_key_t redo_rseg_mutex_key; +mysql_pfs_key_t noredo_rseg_mutex_key; +mysql_pfs_key_t page_zip_stat_per_index_mutex_key; +# ifdef UNIV_DEBUG +mysql_pfs_key_t rw_lock_debug_mutex_key; +# endif /* UNIV_DEBUG */ +mysql_pfs_key_t rtr_active_mutex_key; +mysql_pfs_key_t rtr_match_mutex_key; +mysql_pfs_key_t rtr_path_mutex_key; +mysql_pfs_key_t rw_lock_list_mutex_key; +mysql_pfs_key_t srv_innodb_monitor_mutex_key; +mysql_pfs_key_t srv_misc_tmpfile_mutex_key; +mysql_pfs_key_t srv_monitor_file_mutex_key; +mysql_pfs_key_t buf_dblwr_mutex_key; +mysql_pfs_key_t trx_mutex_key; +mysql_pfs_key_t trx_pool_mutex_key; +mysql_pfs_key_t trx_pool_manager_mutex_key; +mysql_pfs_key_t lock_mutex_key; +mysql_pfs_key_t lock_wait_mutex_key; +mysql_pfs_key_t trx_sys_mutex_key; +mysql_pfs_key_t srv_threads_mutex_key; +mysql_pfs_key_t sync_array_mutex_key; +mysql_pfs_key_t thread_mutex_key; +mysql_pfs_key_t row_drop_list_mutex_key; +mysql_pfs_key_t rw_trx_hash_element_mutex_key; +mysql_pfs_key_t read_view_mutex_key; +#endif /* UNIV_PFS_MUTEX */ +#ifdef UNIV_PFS_RWLOCK +mysql_pfs_key_t btr_search_latch_key; +mysql_pfs_key_t dict_operation_lock_key; +mysql_pfs_key_t index_tree_rw_lock_key; +mysql_pfs_key_t index_online_log_key; +mysql_pfs_key_t fil_space_latch_key; +mysql_pfs_key_t fts_cache_rw_lock_key; +mysql_pfs_key_t fts_cache_init_rw_lock_key; +mysql_pfs_key_t trx_i_s_cache_lock_key; +mysql_pfs_key_t trx_purge_latch_key; +#endif /* UNIV_PFS_RWLOCK */ + +/** For monitoring active mutexes */ +MutexMonitor mutex_monitor; + +/** +Prints wait info of the sync system. +@param file - where to print */ +static +void +sync_print_wait_info(FILE* file) +{ + fprintf(file, + "RW-shared spins " UINT64PF ", rounds " UINT64PF "," + " OS waits " UINT64PF "\n" + "RW-excl spins " UINT64PF ", rounds " UINT64PF "," + " OS waits " UINT64PF "\n" + "RW-sx spins " UINT64PF ", rounds " UINT64PF "," + " OS waits " UINT64PF "\n", + (ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count, + (ib_uint64_t) rw_lock_stats.rw_s_spin_round_count, + (ib_uint64_t) rw_lock_stats.rw_s_os_wait_count, + (ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count, + (ib_uint64_t) rw_lock_stats.rw_x_spin_round_count, + (ib_uint64_t) rw_lock_stats.rw_x_os_wait_count, + (ib_uint64_t) rw_lock_stats.rw_sx_spin_wait_count, + (ib_uint64_t) rw_lock_stats.rw_sx_spin_round_count, + (ib_uint64_t) rw_lock_stats.rw_sx_os_wait_count); + + fprintf(file, + "Spin rounds per wait: %.2f RW-shared," + " %.2f RW-excl, %.2f RW-sx\n", + rw_lock_stats.rw_s_spin_wait_count + ? static_cast<double>(rw_lock_stats.rw_s_spin_round_count) / + static_cast<double>(rw_lock_stats.rw_s_spin_wait_count) + : static_cast<double>(rw_lock_stats.rw_s_spin_round_count), + rw_lock_stats.rw_x_spin_wait_count + ? static_cast<double>(rw_lock_stats.rw_x_spin_round_count) / + static_cast<double>(rw_lock_stats.rw_x_spin_wait_count) + : static_cast<double>(rw_lock_stats.rw_x_spin_round_count), + rw_lock_stats.rw_sx_spin_wait_count + ? static_cast<double>(rw_lock_stats.rw_sx_spin_round_count) / + static_cast<double>(rw_lock_stats.rw_sx_spin_wait_count) + : static_cast<double>(rw_lock_stats.rw_sx_spin_round_count)); +} + +/** +Prints info of the sync system. +@param file - where to print */ +void +sync_print(FILE* file) +{ +#ifdef UNIV_DEBUG + rw_lock_list_print_info(file); +#endif /* UNIV_DEBUG */ + + sync_array_print(file); + + sync_print_wait_info(file); +} + +/** Print the filename "basename" e.g., p = "/a/b/c/d/e.cc" -> p = "e.cc" +@param[in] filename Name from where to extract the basename +@return the basename */ +const char* +sync_basename(const char* filename) +{ + const char* ptr = filename + strlen(filename) - 1; + + while (ptr > filename && *ptr != '/' && *ptr != '\\') { + --ptr; + } + + ++ptr; + + return(ptr); +} + +/** String representation of the filename and line number where the +latch was created +@param[in] id Latch ID +@param[in] created Filename and line number where it was crated +@return the string representation */ +std::string +sync_mutex_to_string( + latch_id_t id, + const std::string& created) +{ + std::ostringstream msg; + + msg << "Mutex " << sync_latch_get_name(id) << " " + << "created " << created; + + return(msg.str()); +} + +/** Enable the mutex monitoring */ +void +MutexMonitor::enable() +{ + /** Note: We don't add any latch meta-data after startup. Therefore + there is no need to use a mutex here. */ + + LatchMetaData::iterator end = latch_meta.end(); + + for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) { + + if (*it != NULL) { + (*it)->get_counter()->enable(); + } + } +} + +/** Disable the mutex monitoring */ +void +MutexMonitor::disable() +{ + /** Note: We don't add any latch meta-data after startup. Therefore + there is no need to use a mutex here. */ + + LatchMetaData::iterator end = latch_meta.end(); + + for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) { + + if (*it != NULL) { + (*it)->get_counter()->disable(); + } + } +} + +/** Reset the mutex monitoring counters */ +void +MutexMonitor::reset() +{ + /** Note: We don't add any latch meta-data after startup. Therefore + there is no need to use a mutex here. */ + + LatchMetaData::iterator end = latch_meta.end(); + + for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) { + + if (*it != NULL) { + (*it)->get_counter()->reset(); + } + } + + mutex_enter(&rw_lock_list_mutex); + + for (rw_lock_t& rw_lock : rw_lock_list) { + rw_lock.count_os_wait = 0; + } + + mutex_exit(&rw_lock_list_mutex); +} |