diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
commit | 3f619478f796eddbba6e39502fe941b285dd97b1 (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/srv/srv0srv.cc | |
parent | Initial commit. (diff) | |
download | mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/srv/srv0srv.cc')
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 1659 |
1 files changed, 1659 insertions, 0 deletions
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc new file mode 100644 index 00000000..bf9755fb --- /dev/null +++ b/storage/innobase/srv/srv0srv.cc @@ -0,0 +1,1659 @@ +/***************************************************************************** + +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2008, 2009 Google Inc. +Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, 2022, MariaDB Corporation. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file srv/srv0srv.cc +The database server main program + +Created 10/8/1995 Heikki Tuuri +*******************************************************/ + +#include "my_global.h" +#include "mysql/psi/mysql_stage.h" +#include "mysql/psi/psi.h" + +#include "btr0sea.h" +#include "buf0flu.h" +#include "buf0lru.h" +#include "dict0boot.h" +#include "dict0load.h" +#include "ibuf0ibuf.h" +#include "lock0lock.h" +#include "log0recv.h" +#include "mem0mem.h" +#include "pars0pars.h" +#include "que0que.h" +#include "row0mysql.h" +#include "row0log.h" +#include "srv0mon.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "trx0i_s.h" +#include "trx0purge.h" +#include "btr0defragment.h" +#include "ut0mem.h" +#include "fil0fil.h" +#include "fil0crypt.h" +#include "fil0pagecompress.h" +#include "trx0types.h" +#include <list> +#include "log.h" + +#include "transactional_lock_guard.h" + +#include <my_service_manager.h> +/* The following is the maximum allowed duration of a lock wait. */ +ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT; + +/* How much data manipulation language (DML) statements need to be delayed, +in microseconds, in order to reduce the lagging of the purge thread. */ +ulint srv_dml_needed_delay; + +const char* srv_main_thread_op_info = ""; + +/** Prefix used by MySQL to indicate pre-5.1 table name encoding */ +const char srv_mysql50_table_name_prefix[10] = "#mysql50#"; + +/* Server parameters which are read from the initfile */ + +/* The following three are dir paths which are catenated before file +names, where the file name itself may also contain a path */ + +char* srv_data_home; + +/** Rollback files directory, can be absolute. */ +char* srv_undo_dir; + +/** The number of tablespaces to use for rollback segments. */ +uint srv_undo_tablespaces; + +/** The number of UNDO tablespaces that are open and ready to use. */ +uint32_t srv_undo_tablespaces_open; + +/** The number of UNDO tablespaces that are active (hosting some rollback +segment). It is quite possible that some of the tablespaces doesn't host +any of the rollback-segment based on configuration used. */ +uint32_t srv_undo_tablespaces_active; + +/** Rate at which UNDO records should be purged. */ +ulong srv_purge_rseg_truncate_frequency; + +/** Enable or Disable Truncate of UNDO tablespace. +Note: If enabled then UNDO tablespace will be selected for truncate. +While Server waits for undo-tablespace to truncate if user disables +it, truncate action is completed but no new tablespace is marked +for truncate (action is never aborted). */ +my_bool srv_undo_log_truncate; + +/** Maximum size of undo tablespace. */ +unsigned long long srv_max_undo_log_size; + +/** Set if InnoDB must operate in read-only mode. We don't do any +recovery and open all tables in RO mode instead of RW mode. We don't +sync the max trx id to disk either. */ +my_bool srv_read_only_mode; +/** store to its own file each table created by an user; data +dictionary tables are in the system tablespace 0 */ +my_bool srv_file_per_table; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +my_bool high_level_read_only; + +/** Sort buffer size in index creation */ +ulong srv_sort_buf_size; +/** Maximum modification log file size for online index creation */ +unsigned long long srv_online_max_size; + +/* If this flag is TRUE, then we will use the native aio of the +OS (provided we compiled Innobase with it in), otherwise we will +use simulated aio we build below with threads. +Currently we support native aio on windows and linux */ +my_bool srv_use_native_aio; +my_bool srv_numa_interleave; +/** copy of innodb_use_atomic_writes; @see innodb_init_params() */ +my_bool srv_use_atomic_writes; +/** innodb_compression_algorithm; used with page compression */ +ulong innodb_compression_algorithm; + +/*------------------------- LOG FILES ------------------------ */ +char* srv_log_group_home_dir; + +/** The InnoDB redo log file size, or 0 when changing the redo log format +at startup (while disallowing writes to the redo log). */ +ulonglong srv_log_file_size; +/** innodb_flush_log_at_trx_commit */ +ulong srv_flush_log_at_trx_commit; +/** innodb_flush_log_at_timeout */ +uint srv_flush_log_at_timeout; +/** innodb_page_size */ +ulong srv_page_size; +/** log2 of innodb_page_size; @see innodb_init_params() */ +uint32_t srv_page_size_shift; + +/** innodb_adaptive_flushing; try to flush dirty pages so as to avoid +IO bursts at the checkpoints. */ +my_bool srv_adaptive_flushing; + +/** innodb_flush_sync; whether to ignore io_capacity at log checkpoints */ +my_bool srv_flush_sync; + +/** common thread pool*/ +tpool::thread_pool* srv_thread_pool; + +/** Maximum number of times allowed to conditionally acquire +mutex before switching to blocking wait on the mutex */ +#define MAX_MUTEX_NOWAIT 2 + +/** Check whether the number of failed nonblocking mutex +acquisition attempts exceeds maximum allowed value. If so, +srv_printf_innodb_monitor() will request mutex acquisition +with mysql_mutex_lock(), which will wait until it gets the mutex. */ +#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) + +/** copy of innodb_buffer_pool_size */ +ulint srv_buf_pool_size; +/** Requested buffer pool chunk size */ +size_t srv_buf_pool_chunk_unit; +/** innodb_lru_scan_depth; number of blocks scanned in LRU flush batch */ +ulong srv_LRU_scan_depth; +/** innodb_flush_neighbors; whether or not to flush neighbors of a block */ +ulong srv_flush_neighbors; +/** Previously requested size */ +ulint srv_buf_pool_old_size; +/** Current size as scaling factor for the other components */ +ulint srv_buf_pool_base_size; +/** Current size in bytes */ +ulint srv_buf_pool_curr_size; +/** Dump this % of each buffer pool during BP dump */ +ulong srv_buf_pool_dump_pct; +/** Abort load after this amount of pages */ +#ifdef UNIV_DEBUG +ulong srv_buf_pool_load_pages_abort = LONG_MAX; +#endif +/** Lock table size in bytes */ +ulint srv_lock_table_size = ULINT_MAX; + +/** the value of innodb_checksum_algorithm */ +ulong srv_checksum_algorithm; + +/** innodb_read_io_threads */ +uint srv_n_read_io_threads; +/** innodb_write_io_threads */ +uint srv_n_write_io_threads; + +/** innodb_random_read_ahead */ +my_bool srv_random_read_ahead; +/** innodb_read_ahead_threshold; the number of pages that must be present +in the buffer cache and accessed sequentially for InnoDB to trigger a +readahead request. */ +ulong srv_read_ahead_threshold; + +/** innodb_change_buffer_max_size; maximum on-disk size of change +buffer in terms of percentage of the buffer pool. */ +uint srv_change_buffer_max_size; + +ulong srv_file_flush_method; + + +/** copy of innodb_open_files; @see innodb_init_params() */ +ulint srv_max_n_open_files; + +/** innodb_io_capacity */ +ulong srv_io_capacity; +/** innodb_io_capacity_max */ +ulong srv_max_io_capacity; + +/* The InnoDB main thread tries to keep the ratio of modified pages +in the buffer pool to all database pages in the buffer pool smaller than +the following number. But it is not guaranteed that the value stays below +that during a time of heavy update/insert activity. */ + +/** innodb_max_dirty_pages_pct */ +double srv_max_buf_pool_modified_pct; +/** innodb_max_dirty_pages_pct_lwm */ +double srv_max_dirty_pages_pct_lwm; + +/** innodb_adaptive_flushing_lwm; the percentage of log capacity at +which adaptive flushing, if enabled, will kick in. */ +double srv_adaptive_flushing_lwm; + +/** innodb_flushing_avg_loops; number of iterations over which +adaptive flushing is averaged */ +ulong srv_flushing_avg_loops; + +/** innodb_purge_threads; the number of purge tasks to use */ +uint srv_n_purge_threads; + +/** innodb_purge_batch_size, in pages */ +ulong srv_purge_batch_size; + +/** innodb_stats_method decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method; + +srv_stats_t srv_stats; + +/* structure to pass status variables to MySQL */ +export_var_t export_vars; + +/** Normally 0. When nonzero, skip some phases of crash recovery, +starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered +by SELECT or mysqldump. When this is nonzero, we do not allow any user +modifications to the data. */ +ulong srv_force_recovery; + +/** innodb_print_all_deadlocks; whether to print all user-level +transactions deadlocks to the error log */ +my_bool srv_print_all_deadlocks; + +/** innodb_cmp_per_index_enabled; enable +INFORMATION_SCHEMA.innodb_cmp_per_index */ +my_bool srv_cmp_per_index_enabled; + +/** innodb_fast_shutdown=1 skips purge and change buffer merge. +innodb_fast_shutdown=2 effectively crashes the server (no log checkpoint). +innodb_fast_shutdown=3 is a clean shutdown that skips the rollback +of active transaction (to be done on restart). */ +uint srv_fast_shutdown; + +/** copy of innodb_status_file; generate a innodb_status.<pid> file */ +ibool srv_innodb_status; + +/** innodb_stats_transient_sample_pages; +When estimating number of different key values in an index, sample +this many index pages, there are 2 ways to calculate statistics: +* persistent stats that are calculated by ANALYZE TABLE and saved + in the innodb database. +* quick transient stats, that are used if persistent stats for the given + table/index are not found in the innodb database */ +unsigned long long srv_stats_transient_sample_pages; +/** innodb_stats_persistent */ +my_bool srv_stats_persistent; +/** innodb_stats_include_delete_marked */ +my_bool srv_stats_include_delete_marked; +/** innodb_stats_persistent_sample_pages */ +unsigned long long srv_stats_persistent_sample_pages; +/** innodb_stats_auto_recalc */ +my_bool srv_stats_auto_recalc; + +/** innodb_stats_modified_counter; The number of rows modified before +we calculate new statistics (default 0 = current limits) */ +unsigned long long srv_stats_modified_counter; + +/** innodb_stats_traditional; enable traditional statistic calculation +based on number of configured pages */ +my_bool srv_stats_sample_traditional; + +my_bool srv_use_doublewrite_buf; + +/** innodb_sync_spin_loops */ +ulong srv_n_spin_wait_rounds; +/** innodb_spin_wait_delay */ +uint srv_spin_wait_delay; + +/** Number of initialized rollback segments for persistent undo log */ +ulong srv_available_undo_logs; + +/* Defragmentation */ +my_bool srv_defragment; +/** innodb_defragment_n_pages */ +uint srv_defragment_n_pages; +uint srv_defragment_stats_accuracy; +/** innodb_defragment_fill_factor_n_recs */ +uint srv_defragment_fill_factor_n_recs; +/** innodb_defragment_fill_factor */ +double srv_defragment_fill_factor; +/** innodb_defragment_frequency */ +uint srv_defragment_frequency; +/** derived from innodb_defragment_frequency; +@see innodb_defragment_frequency_update() */ +ulonglong srv_defragment_interval; + +/** Current mode of operation */ +enum srv_operation_mode srv_operation; + +/** whether this is the server's first start after mariabackup --prepare */ +bool srv_start_after_restore; + +/* Set the following to 0 if you want InnoDB to write messages on +stderr on startup/shutdown. Not enabled on the embedded server. */ +ibool srv_print_verbose_log; +my_bool srv_print_innodb_monitor; +my_bool srv_print_innodb_lock_monitor; +/** innodb_force_primary_key; whether to disallow CREATE TABLE without +PRIMARY KEY */ +my_bool srv_force_primary_key; + +/** Key version to encrypt the temporary tablespace */ +my_bool innodb_encrypt_temporary_tables; + +my_bool srv_immediate_scrub_data_uncompressed; + +static time_t srv_last_monitor_time; + +static mysql_mutex_t srv_innodb_monitor_mutex; + +/** Mutex protecting page_zip_stat_per_index */ +mysql_mutex_t page_zip_stat_per_index_mutex; + +/** Mutex for locking srv_monitor_file */ +mysql_mutex_t srv_monitor_file_mutex; + +/** Temporary file for innodb monitor output */ +FILE* srv_monitor_file; +/** Mutex for locking srv_misc_tmpfile */ +mysql_mutex_t srv_misc_tmpfile_mutex; +/** Temporary file for miscellanous diagnostic output */ +FILE* srv_misc_tmpfile; + +/* The following counts are used by the srv_master_callback. */ + +/** Iterations of the loop bounded by 'srv_active' label. */ +ulint srv_main_active_loops; +/** Iterations of the loop bounded by the 'srv_idle' label. */ +ulint srv_main_idle_loops; +/** Iterations of the loop bounded by the 'srv_shutdown' label. */ +static ulint srv_main_shutdown_loops; +/** Log writes involving flush. */ +ulint srv_log_writes_and_flush; + +/* This is only ever touched by the master thread. It records the +time when the last flush of log file has happened. The master +thread ensures that we flush the log files at least once per +second. */ +static time_t srv_last_log_flush_time; + +/** Buffer pool dump status frequence in percentages */ +ulong srv_buf_dump_status_frequency; + +/* + IMPLEMENTATION OF THE SERVER MAIN PROGRAM + ========================================= + +There is the following analogue between this database +server and an operating system kernel: + +DB concept equivalent OS concept +---------- --------------------- +transaction -- process; + +query thread -- thread; + +lock -- semaphore; + +kernel -- kernel; + +query thread execution: +(a) without lock_sys.latch +reserved -- process executing in user mode; +(b) with lock_sys.latch reserved + -- process executing in kernel mode; + +The server has several background threads all running at the same +priority as user threads. + +The threads which we call user threads serve the queries of the MySQL +server. They run at normal priority. + +When there is no activity in the system, also the master thread +suspends itself to wait for an event making the server totally silent. + +There is still one complication in our server design. If a +background utility thread obtains a resource (e.g., mutex) needed by a user +thread, and there is also some other user activity in the system, +the user thread may have to wait indefinitely long for the +resource, as the OS does not schedule a background thread if +there is some other runnable user thread. This problem is called +priority inversion in real-time programming. + +One solution to the priority inversion problem would be to keep record +of which thread owns which resource and in the above case boost the +priority of the background thread so that it will be scheduled and it +can release the resource. This solution is called priority inheritance +in real-time programming. A drawback of this solution is that the overhead +of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100 +MHz Pentium, because the thread has to call pthread_self. This may +be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note +that the thread cannot store the information in the resource , say mutex, +itself, because competing threads could wipe out the information if it is +stored before acquiring the mutex, and if it stored afterwards, the +information is outdated for the time of one machine instruction, at least. +(To be precise, the information could be stored to lock_word in mutex if +the machine supports atomic swap.) + +The above solution with priority inheritance may become actual in the +future, currently we do not implement any priority twiddling solution. +Our general aim is to reduce the contention of all mutexes by making +them more fine grained. + +The thread table contains information of the current status of each +thread existing in the system, and also the event semaphores used in +suspending the master thread and utility threads when they have nothing +to do. The thread table can be seen as an analogue to the process table +in a traditional Unix implementation. */ + +/** The server system struct */ +struct srv_sys_t{ + mysql_mutex_t tasks_mutex; /*!< variable protecting the + tasks queue */ + UT_LIST_BASE_NODE_T(que_thr_t) + tasks; /*!< task queue */ + + srv_stats_t::ulint_ctr_1_t + activity_count; /*!< For tracking server + activity */ +}; + +static srv_sys_t srv_sys; + +/* + Structure shared by timer and coordinator_callback. + No protection necessary since timer and task never run + in parallel (being in the same task group of size 1). +*/ +struct purge_coordinator_state +{ + /** Snapshot of the last history length before the purge call.*/ + size_t history_size; + Atomic_counter<int> m_running; +public: + inline void do_purge(); +}; + +static purge_coordinator_state purge_state; + +/** threadpool timer for srv_monitor_task() */ +std::unique_ptr<tpool::timer> srv_monitor_timer; + + +/** The buffer pool dump/load file name */ +char* srv_buf_dump_filename; + +/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown +and/or load it during startup. */ +char srv_buffer_pool_dump_at_shutdown = TRUE; +char srv_buffer_pool_load_at_startup = TRUE; + +#ifdef HAVE_PSI_STAGE_INTERFACE +/** Performance schema stage event for monitoring ALTER TABLE progress +in ha_innobase::commit_inplace_alter_table(). */ +PSI_stage_info srv_stage_alter_table_end + = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_merge_insert_index_tuples(). */ +PSI_stage_info srv_stage_alter_table_insert + = {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_log_apply(). */ +PSI_stage_info srv_stage_alter_table_log_index + = {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_log_table_apply(). */ +PSI_stage_info srv_stage_alter_table_log_table + = {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_merge_sort(). */ +PSI_stage_info srv_stage_alter_table_merge_sort + = {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_merge_read_clustered_index(). */ +PSI_stage_info srv_stage_alter_table_read_pk_internal_sort + = {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring buffer pool load progress. */ +PSI_stage_info srv_stage_buffer_pool_load + = {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS}; +#endif /* HAVE_PSI_STAGE_INTERFACE */ + +/*********************************************************************//** +Prints counters for work done by srv_master_thread. */ +static +void +srv_print_master_thread_info( +/*=========================*/ + FILE *file) /* in: output stream */ +{ + fprintf(file, "srv_master_thread loops: " ULINTPF " srv_active, " + ULINTPF " srv_shutdown, " ULINTPF " srv_idle\n" + "srv_master_thread log flush and writes: " ULINTPF "\n", + srv_main_active_loops, + srv_main_shutdown_loops, + srv_main_idle_loops, + srv_log_writes_and_flush); +} + +static void thread_pool_thread_init() +{ + my_thread_init(); + pfs_register_thread(thread_pool_thread_key); +} +static void thread_pool_thread_end() +{ + pfs_delete_thread(); + my_thread_end(); +} + + +void srv_thread_pool_init() +{ + DBUG_ASSERT(!srv_thread_pool); + +#if defined (_WIN32) + srv_thread_pool= tpool::create_thread_pool_win(); +#else + srv_thread_pool= tpool::create_thread_pool_generic(); +#endif + srv_thread_pool->set_thread_callbacks(thread_pool_thread_init, + thread_pool_thread_end); +} + + +void srv_thread_pool_end() +{ + ut_ad(!srv_master_timer); + delete srv_thread_pool; + srv_thread_pool= nullptr; +} + +static bool need_srv_free; + +/** Initialize the server. */ +static void srv_init() +{ + mysql_mutex_init(srv_innodb_monitor_mutex_key, + &srv_innodb_monitor_mutex, nullptr); + mysql_mutex_init(srv_threads_mutex_key, &srv_sys.tasks_mutex, nullptr); + UT_LIST_INIT(srv_sys.tasks, &que_thr_t::queue); + + need_srv_free = true; + + mysql_mutex_init(page_zip_stat_per_index_mutex_key, + &page_zip_stat_per_index_mutex, nullptr); + + /* Initialize some INFORMATION SCHEMA internal structures */ + trx_i_s_cache_init(trx_i_s_cache); +} + +/*********************************************************************//** +Frees the data structures created in srv_init(). */ +void +srv_free(void) +/*==========*/ +{ + if (!need_srv_free) { + return; + } + + mysql_mutex_destroy(&srv_innodb_monitor_mutex); + mysql_mutex_destroy(&page_zip_stat_per_index_mutex); + mysql_mutex_destroy(&srv_sys.tasks_mutex); + + trx_i_s_cache_free(trx_i_s_cache); + srv_thread_pool_end(); +} + +/*********************************************************************//** +Boots the InnoDB server. */ +void srv_boot() +{ +#ifndef NO_ELISION + if (transactional_lock_enabled()) + sql_print_information("InnoDB: Using transactional memory"); +#endif + buf_dblwr.init(); + srv_thread_pool_init(); + trx_pool_init(); + srv_init(); +} + +/******************************************************************//** +Refreshes the values used to calculate per-second averages. */ +static void srv_refresh_innodb_monitor_stats(time_t current_time) +{ + mysql_mutex_lock(&srv_innodb_monitor_mutex); + + if (difftime(current_time, srv_last_monitor_time) < 60) { + /* We refresh InnoDB Monitor values so that averages are + printed from at most 60 last seconds */ + mysql_mutex_unlock(&srv_innodb_monitor_mutex); + return; + } + + srv_last_monitor_time = current_time; + + os_aio_refresh_stats(); + +#ifdef BTR_CUR_HASH_ADAPT + btr_cur_n_sea_old = btr_cur_n_sea; + btr_cur_n_non_sea_old = btr_cur_n_non_sea; +#endif /* BTR_CUR_HASH_ADAPT */ + + buf_refresh_io_stats(); + + mysql_mutex_unlock(&srv_innodb_monitor_mutex); +} + +/******************************************************************//** +Outputs to a file the output of the InnoDB Monitor. +@return FALSE if not all information printed +due to failure to obtain necessary mutex */ +ibool +srv_printf_innodb_monitor( +/*======================*/ + FILE* file, /*!< in: output stream */ + ibool nowait, /*!< in: whether to wait for lock_sys.latch */ + ulint* trx_start_pos, /*!< out: file position of the start of + the list of active transactions */ + ulint* trx_end) /*!< out: file position of the end of + the list of active transactions */ +{ + double time_elapsed; + time_t current_time; + ibool ret; + + mysql_mutex_lock(&srv_innodb_monitor_mutex); + + current_time = time(NULL); + + /* We add 0.001 seconds to time_elapsed to prevent division + by zero if two users happen to call SHOW ENGINE INNODB STATUS at the + same time */ + + time_elapsed = difftime(current_time, srv_last_monitor_time) + + 0.001; + + srv_last_monitor_time = time(NULL); + + fputs("\n=====================================\n", file); + + ut_print_timestamp(file); + fprintf(file, + " INNODB MONITOR OUTPUT\n" + "=====================================\n" + "Per second averages calculated from the last %lu seconds\n", + (ulong) time_elapsed); + + fputs("-----------------\n" + "BACKGROUND THREAD\n" + "-----------------\n", file); + srv_print_master_thread_info(file); + + /* This section is intentionally left blank, for tools like "innotop" */ + fputs("----------\n" + "SEMAPHORES\n" + "----------\n", file); + /* End of intentionally blank section */ + + /* Conceptually, srv_innodb_monitor_mutex has a very high latching + order level, while dict_foreign_err_mutex has a very low level. + Therefore we can reserve the latter mutex here without + a danger of a deadlock of threads. */ + + mysql_mutex_lock(&dict_foreign_err_mutex); + + if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) { + fputs("------------------------\n" + "LATEST FOREIGN KEY ERROR\n" + "------------------------\n", file); + ut_copy_file(file, dict_foreign_err_file); + } + + mysql_mutex_unlock(&dict_foreign_err_mutex); + + /* Only if lock_print_info_summary proceeds correctly, + before we call the lock_print_info_all_transactions + to print all the lock information. IMPORTANT NOTE: This + function acquires exclusive lock_sys.latch on success. */ + ret = lock_print_info_summary(file, nowait); + + if (ret) { + if (trx_start_pos) { + long t = ftell(file); + if (t < 0) { + *trx_start_pos = ULINT_UNDEFINED; + } else { + *trx_start_pos = (ulint) t; + } + } + + /* NOTE: The following function will release the lock_sys.latch + that lock_print_info_summary() acquired. */ + + lock_print_info_all_transactions(file); + + if (trx_end) { + long t = ftell(file); + if (t < 0) { + *trx_end = ULINT_UNDEFINED; + } else { + *trx_end = (ulint) t; + } + } + } + + fputs("--------\n" + "FILE I/O\n" + "--------\n", file); + os_aio_print(file); + + ibuf_print(file); + +#ifdef BTR_CUR_HASH_ADAPT + if (btr_search_enabled) { + fputs("-------------------\n" + "ADAPTIVE HASH INDEX\n" + "-------------------\n", file); + for (ulint i = 0; i < btr_ahi_parts; ++i) { + const auto part= &btr_search_sys.parts[i]; + part->latch.rd_lock(SRW_LOCK_CALL); + ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH); + fprintf(file, "Hash table size " ULINTPF + ", node heap has " ULINTPF " buffer(s)\n", + part->table.n_cells, + part->heap->base.count + - !part->heap->free_block); + part->latch.rd_unlock(); + } + + const ulint with_ahi = btr_cur_n_sea; + const ulint without_ahi = btr_cur_n_non_sea; + fprintf(file, + "%.2f hash searches/s, %.2f non-hash searches/s\n", + static_cast<double>(with_ahi - btr_cur_n_sea_old) + / time_elapsed, + static_cast<double>(without_ahi - btr_cur_n_non_sea_old) + / time_elapsed); + btr_cur_n_sea_old = with_ahi; + btr_cur_n_non_sea_old = without_ahi; + } +#endif /* BTR_CUR_HASH_ADAPT */ + + fputs("---\n" + "LOG\n" + "---\n", file); + log_print(file); + + fputs("----------------------\n" + "BUFFER POOL AND MEMORY\n" + "----------------------\n", file); + fprintf(file, + "Total large memory allocated " ULINTPF "\n" + "Dictionary memory allocated " ULINTPF "\n", + ulint{os_total_large_mem_allocated}, + dict_sys.rough_size()); + + buf_print_io(file); + + fputs("--------------\n" + "ROW OPERATIONS\n" + "--------------\n", file); + fprintf(file, ULINTPF " read views open inside InnoDB\n", + trx_sys.view_count()); + + if (ulint n_reserved = fil_system.sys_space->n_reserved_extents) { + fprintf(file, + ULINTPF " tablespace extents now reserved for" + " B-tree split operations\n", + n_reserved); + } + + fprintf(file, "state: %s\n", srv_main_thread_op_info); + + fputs("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" + "============================\n", file); + mysql_mutex_unlock(&srv_innodb_monitor_mutex); + fflush(file); + + return(ret); +} + +/******************************************************************//** +Function to pass InnoDB status variables to MySQL */ +void +srv_export_innodb_status(void) +/*==========================*/ +{ + fil_crypt_stat_t crypt_stat; + + if (!srv_read_only_mode) { + fil_crypt_total_stat(&crypt_stat); + } + +#ifdef BTR_CUR_HASH_ADAPT + export_vars.innodb_ahi_hit = btr_cur_n_sea; + export_vars.innodb_ahi_miss = btr_cur_n_non_sea; + + ulint mem_adaptive_hash = 0; + for (ulong i = 0; i < btr_ahi_parts; i++) { + const auto part= &btr_search_sys.parts[i]; + part->latch.rd_lock(SRW_LOCK_CALL); + if (part->heap) { + ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH); + + mem_adaptive_hash += mem_heap_get_size(part->heap) + + part->table.n_cells * sizeof(hash_cell_t); + } + part->latch.rd_unlock(); + } + export_vars.innodb_mem_adaptive_hash = mem_adaptive_hash; +#endif + + export_vars.innodb_mem_dictionary = dict_sys.rough_size(); + + mysql_mutex_lock(&srv_innodb_monitor_mutex); + + export_vars.innodb_data_pending_reads = + ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS)); + + export_vars.innodb_data_pending_writes = + ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES)); + + export_vars.innodb_data_read = srv_stats.data_read; + + export_vars.innodb_data_reads = os_n_file_reads; + + export_vars.innodb_data_writes = os_n_file_writes; + + buf_dblwr.lock(); + ulint dblwr = buf_dblwr.written(); + export_vars.innodb_dblwr_pages_written = dblwr; + export_vars.innodb_dblwr_writes = buf_dblwr.batches(); + buf_dblwr.unlock(); + + export_vars.innodb_data_written = srv_stats.data_written + + (dblwr << srv_page_size_shift); + + export_vars.innodb_buffer_pool_bytes_data = + buf_pool.stat.LRU_bytes + + (UT_LIST_GET_LEN(buf_pool.unzip_LRU) + << srv_page_size_shift); + +#ifdef UNIV_DEBUG + export_vars.innodb_buffer_pool_pages_latched = + buf_get_latched_pages_number(); +#endif /* UNIV_DEBUG */ + export_vars.innodb_buffer_pool_pages_total = buf_pool.get_n_pages(); + + export_vars.innodb_buffer_pool_pages_misc = + buf_pool.get_n_pages() + - UT_LIST_GET_LEN(buf_pool.LRU) + - UT_LIST_GET_LEN(buf_pool.free); + + export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id(); + export_vars.innodb_history_list_length = trx_sys.history_size_approx(); + + mysql_mutex_lock(&lock_sys.wait_mutex); + export_vars.innodb_row_lock_waits = lock_sys.get_wait_cumulative(); + + export_vars.innodb_row_lock_current_waits= lock_sys.get_wait_pending(); + + export_vars.innodb_row_lock_time = lock_sys.get_wait_time_cumulative(); + export_vars.innodb_row_lock_time_max = lock_sys.get_wait_time_max(); + + mysql_mutex_unlock(&lock_sys.wait_mutex); + + export_vars.innodb_row_lock_time_avg= export_vars.innodb_row_lock_waits + ? static_cast<ulint>(export_vars.innodb_row_lock_time + / export_vars.innodb_row_lock_waits) + : 0; + + export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved; + export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed; + export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; + export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; + export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error; + export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted; + export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted; + export_vars.innodb_n_merge_blocks_encrypted = srv_stats.n_merge_blocks_encrypted; + export_vars.innodb_n_merge_blocks_decrypted = srv_stats.n_merge_blocks_decrypted; + export_vars.innodb_n_rowlog_blocks_encrypted = srv_stats.n_rowlog_blocks_encrypted; + export_vars.innodb_n_rowlog_blocks_decrypted = srv_stats.n_rowlog_blocks_decrypted; + + export_vars.innodb_n_temp_blocks_encrypted = + srv_stats.n_temp_blocks_encrypted; + + export_vars.innodb_n_temp_blocks_decrypted = + srv_stats.n_temp_blocks_decrypted; + + export_vars.innodb_defragment_compression_failures = + btr_defragment_compression_failures; + export_vars.innodb_defragment_failures = btr_defragment_failures; + export_vars.innodb_defragment_count = btr_defragment_count; + + export_vars.innodb_onlineddl_rowlog_rows = onlineddl_rowlog_rows; + export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used; + export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress; + + if (!srv_read_only_mode) { + export_vars.innodb_encryption_rotation_pages_read_from_cache = + crypt_stat.pages_read_from_cache; + export_vars.innodb_encryption_rotation_pages_read_from_disk = + crypt_stat.pages_read_from_disk; + export_vars.innodb_encryption_rotation_pages_modified = + crypt_stat.pages_modified; + export_vars.innodb_encryption_rotation_pages_flushed = + crypt_stat.pages_flushed; + export_vars.innodb_encryption_rotation_estimated_iops = + crypt_stat.estimated_iops; + export_vars.innodb_encryption_key_requests = + srv_stats.n_key_requests; + } + + mysql_mutex_unlock(&srv_innodb_monitor_mutex); + + log_sys.latch.rd_lock(SRW_LOCK_CALL); + export_vars.innodb_lsn_current = log_sys.get_lsn(); + export_vars.innodb_lsn_flushed = log_sys.get_flushed_lsn(); + export_vars.innodb_lsn_last_checkpoint = log_sys.last_checkpoint_lsn; + export_vars.innodb_checkpoint_max_age = static_cast<ulint>( + log_sys.max_checkpoint_age); + log_sys.latch.rd_unlock(); + export_vars.innodb_os_log_written = export_vars.innodb_lsn_current + - recv_sys.lsn; + + export_vars.innodb_checkpoint_age = static_cast<ulint>( + export_vars.innodb_lsn_current + - export_vars.innodb_lsn_last_checkpoint); +} + +struct srv_monitor_state_t +{ + time_t last_monitor_time; + ulint mutex_skipped; + bool last_srv_print_monitor; + srv_monitor_state_t() : mutex_skipped(0), last_srv_print_monitor(false) + { + srv_last_monitor_time = time(NULL); + last_monitor_time= srv_last_monitor_time; + } +}; + +static srv_monitor_state_t monitor_state; + +/** A task which prints the info output by various InnoDB monitors.*/ +static void srv_monitor() +{ + time_t current_time = time(NULL); + + if (difftime(current_time, monitor_state.last_monitor_time) >= 15) { + monitor_state.last_monitor_time = current_time; + + if (srv_print_innodb_monitor) { + /* Reset mutex_skipped counter everytime + srv_print_innodb_monitor changes. This is to + ensure we will not be blocked by lock_sys.latch + for short duration information printing */ + if (!monitor_state.last_srv_print_monitor) { + monitor_state.mutex_skipped = 0; + monitor_state.last_srv_print_monitor = true; + } + + if (!srv_printf_innodb_monitor(stderr, + MUTEX_NOWAIT(monitor_state.mutex_skipped), + NULL, NULL)) { + monitor_state.mutex_skipped++; + } else { + /* Reset the counter */ + monitor_state.mutex_skipped = 0; + } + } else { + monitor_state.last_monitor_time = 0; + } + + + /* We don't create the temp files or associated + mutexes in read-only-mode */ + + if (!srv_read_only_mode && srv_innodb_status) { + mysql_mutex_lock(&srv_monitor_file_mutex); + rewind(srv_monitor_file); + if (!srv_printf_innodb_monitor(srv_monitor_file, + MUTEX_NOWAIT(monitor_state.mutex_skipped), + NULL, NULL)) { + monitor_state.mutex_skipped++; + } else { + monitor_state.mutex_skipped = 0; + } + + os_file_set_eof(srv_monitor_file); + mysql_mutex_unlock(&srv_monitor_file_mutex); + } + } + + srv_refresh_innodb_monitor_stats(current_time); +} + +/** Periodic task which prints the info output by various InnoDB monitors.*/ +void srv_monitor_task(void*) +{ + /* number of successive fatal timeouts observed */ + static lsn_t old_lsn = recv_sys.lsn; + + ut_ad(!srv_read_only_mode); + + /* Try to track a strange bug reported by Harald Fuchs and others, + where the lsn seems to decrease at times */ + + lsn_t new_lsn = log_sys.get_lsn(); + ut_a(new_lsn >= old_lsn); + old_lsn = new_lsn; + + /* Update the statistics collected for deciding LRU + eviction policy. */ + buf_LRU_stat_update(); + + ulonglong now = my_hrtime_coarse().val; + const ulong threshold = srv_fatal_semaphore_wait_threshold; + + if (ulonglong start = dict_sys.oldest_wait()) { + if (now >= start) { + now -= start; + ulong waited = static_cast<ulong>(now / 1000000); + if (waited >= threshold) { + ib::fatal() << dict_sys.fatal_msg; + } + + if (waited == threshold / 4 + || waited == threshold / 2 + || waited == threshold / 4 * 3) { + ib::warn() << "Long wait (" << waited + << " seconds) for dict_sys.latch"; + } + } + } + + srv_monitor(); +} + +/******************************************************************//** +Increment the server activity count. */ +void +srv_inc_activity_count(void) +/*========================*/ +{ + srv_sys.activity_count.inc(); +} + +#ifdef UNIV_DEBUG +/** @return whether purge or master task is active */ +bool srv_any_background_activity() +{ + if (purge_sys.enabled() || srv_master_timer.get()) + { + ut_ad(!srv_read_only_mode); + return true; + } + return false; +} +#endif /* UNIV_DEBUG */ + +static void purge_worker_callback(void*); +static void purge_coordinator_callback(void*); +static void purge_truncation_callback(void*) +{ + purge_sys.latch.rd_lock(SRW_LOCK_CALL); + const purge_sys_t::iterator head= purge_sys.head; + purge_sys.latch.rd_unlock(); + head.free_history(); +} + +static tpool::task_group purge_task_group; +tpool::waitable_task purge_worker_task(purge_worker_callback, nullptr, + &purge_task_group); +static tpool::task_group purge_coordinator_task_group(1); +static tpool::waitable_task purge_coordinator_task + (purge_coordinator_callback, nullptr, &purge_coordinator_task_group); +static tpool::task_group purge_truncation_task_group(1); +static tpool::waitable_task purge_truncation_task + (purge_truncation_callback, nullptr, &purge_truncation_task_group); + +/** Wake up the purge threads if there is work to do. */ +void purge_sys_t::wake_if_not_active() +{ + if (enabled() && !paused() && !purge_state.m_running && + (srv_undo_log_truncate || trx_sys.history_exists()) && + ++purge_state.m_running == 1) + srv_thread_pool->submit_task(&purge_coordinator_task); +} + +/** @return whether the purge tasks are active */ +bool purge_sys_t::running() +{ + return purge_coordinator_task.is_running(); +} + +void purge_sys_t::stop_FTS() +{ + latch.rd_lock(SRW_LOCK_CALL); + m_FTS_paused++; + latch.rd_unlock(); + while (m_active) + std::this_thread::sleep_for(std::chrono::seconds(1)); +} + +/** Stop purge during FLUSH TABLES FOR EXPORT */ +void purge_sys_t::stop() +{ + latch.wr_lock(SRW_LOCK_CALL); + + if (!enabled()) + { + /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */ + ut_ad(!srv_undo_sources); + latch.wr_unlock(); + return; + } + + ut_ad(srv_n_purge_threads > 0); + + const auto paused= m_paused++; + + latch.wr_unlock(); + + if (!paused) + { + ib::info() << "Stopping purge"; + MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT); + purge_coordinator_task.disable(); + } +} + +/** Resume purge in data dictionary tables */ +void purge_sys_t::resume_SYS(void *) +{ + ut_d(auto paused=) purge_sys.m_SYS_paused--; + ut_ad(paused); +} + +/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */ +void purge_sys_t::resume() +{ + if (!enabled()) + { + /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */ + ut_ad(!srv_undo_sources); + return; + } + ut_ad(!srv_read_only_mode); + ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + purge_coordinator_task.enable(); + latch.wr_lock(SRW_LOCK_CALL); + int32_t paused= m_paused--; + ut_a(paused); + + if (paused == 1) + { + ib::info() << "Resuming purge"; + purge_state.m_running= 1; + srv_thread_pool->submit_task(&purge_coordinator_task); + MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT); + } + latch.wr_unlock(); +} + +/*******************************************************************//** +Get current server activity count. +@return activity count. */ +ulint +srv_get_activity_count(void) +/*========================*/ +{ + return(srv_sys.activity_count); +} + +/** Check if srv_inc_activity_count() has been called. +@param activity_count copy of srv_sys.activity_count +@return whether the activity_count had changed */ +static bool srv_check_activity(ulint *activity_count) +{ + ulint new_activity_count= srv_sys.activity_count; + if (new_activity_count != *activity_count) + { + *activity_count= new_activity_count; + return true; + } + + return false; +} + +/********************************************************************//** +The master thread is tasked to ensure that flush of log file happens +once every second in the background. This is to ensure that not more +than one second of trxs are lost in case of crash when +innodb_flush_logs_at_trx_commit != 1 */ +static void srv_sync_log_buffer_in_background() +{ + time_t current_time = time(NULL); + + srv_main_thread_op_info = "flushing log"; + if (difftime(current_time, srv_last_log_flush_time) + >= srv_flush_log_at_timeout) { + log_buffer_flush_to_disk(); + srv_last_log_flush_time = current_time; + srv_log_writes_and_flush++; + } +} + +/** Report progress during shutdown. +@param last time of last output +@param n_read number of page reads initiated for change buffer merge */ +static void srv_shutdown_print(time_t &last, ulint n_read) +{ + time_t now= time(nullptr); + if (now - last >= 15) + { + last= now; + + const ulint ibuf_size= ibuf.size; + sql_print_information("Completing change buffer merge;" + " %zu page reads initiated;" + " %zu change buffer pages remain", + n_read, ibuf_size); +#if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY + service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, + "Completing change buffer merge;" + " %zu page reads initiated;" + " %zu change buffer pages remain", + n_read, ibuf_size); +#endif + } +} + +/** Perform periodic tasks whenever the server is active. +@param counter_time microsecond_interval_timer() */ +static void srv_master_do_active_tasks(ulonglong counter_time) +{ + ++srv_main_active_loops; + + MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS); + + if (!(counter_time % (47 * 1000000ULL))) { + srv_main_thread_op_info = "enforcing dict cache limit"; + if (ulint n_evicted = dict_sys.evict_table_LRU(true)) { + MONITOR_INC_VALUE( + MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, + n_evicted); + } + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); + } +} + +/** Perform periodic tasks whenever the server is idle. +@param counter_time microsecond_interval_timer() */ +static void srv_master_do_idle_tasks(ulonglong counter_time) +{ + ++srv_main_idle_loops; + + MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS); + + srv_main_thread_op_info = "enforcing dict cache limit"; + if (ulint n_evicted = dict_sys.evict_table_LRU(false)) { + MONITOR_INC_VALUE( + MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted); + } + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); +} + +/** +Complete the shutdown tasks such as background DROP TABLE, +and optionally change buffer merge (on innodb_fast_shutdown=0). */ +void srv_shutdown(bool ibuf_merge) +{ + ulint n_read = 0; + time_t now = time(NULL); + + do { + ut_ad(!srv_read_only_mode); + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); + ++srv_main_shutdown_loops; + + if (ibuf_merge) { + srv_main_thread_op_info = "doing insert buffer merge"; + /* Disallow the use of change buffer to + avoid a race condition with + ibuf_read_merge_pages() */ + ibuf_max_size_update(0); + log_free_check(); + n_read = ibuf_contract(); + srv_shutdown_print(now, n_read); + } + } while (n_read); +} + +/** The periodic master task controlling the server. */ +void srv_master_callback(void*) +{ + static ulint old_activity_count; + + ut_a(srv_shutdown_state <= SRV_SHUTDOWN_INITIATED); + + MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); + purge_sys.wake_if_not_active(); + ulonglong counter_time= microsecond_interval_timer(); + srv_sync_log_buffer_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_LOG_FLUSH_MICROSECOND, + counter_time); + + if (srv_check_activity(&old_activity_count)) + srv_master_do_active_tasks(counter_time); + else + srv_master_do_idle_tasks(counter_time); + + srv_main_thread_op_info= "sleeping"; +} + +/** @return whether purge should exit due to shutdown */ +static bool srv_purge_should_exit(size_t old_history_size) +{ + ut_ad(srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP); + + if (srv_undo_sources) + return false; + + if (srv_fast_shutdown) + return true; + + /* Slow shutdown was requested. */ + size_t prepared, active= trx_sys.any_active_transactions(&prepared); + const size_t history_size= trx_sys.history_size(); + + if (!history_size); + else if (!active && history_size == old_history_size && prepared); + else + { + static time_t progress_time; + time_t now= time(NULL); + if (now - progress_time >= 15) + { + progress_time= now; +#if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY + service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, + "InnoDB: to purge %zu transactions", + history_size); + sql_print_information("InnoDB: to purge %zu transactions", history_size); +#endif + } + return false; + } + + return !active; +} + +/*********************************************************************//** +Fetch and execute a task from the work queue. +@param [in,out] slot purge worker thread slot +@return true if a task was executed */ +static bool srv_task_execute() +{ + ut_ad(!srv_read_only_mode); + ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + + mysql_mutex_lock(&srv_sys.tasks_mutex); + + if (que_thr_t* thr = UT_LIST_GET_FIRST(srv_sys.tasks)) { + ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE); + UT_LIST_REMOVE(srv_sys.tasks, thr); + mysql_mutex_unlock(&srv_sys.tasks_mutex); + que_run_threads(thr); + return true; + } + + ut_ad(UT_LIST_GET_LEN(srv_sys.tasks) == 0); + mysql_mutex_unlock(&srv_sys.tasks_mutex); + return false; +} + +static void purge_create_background_thds(int ); + +/** Flag which is set, whenever innodb_purge_threads changes. */ +static Atomic_relaxed<bool> srv_purge_thread_count_changed; + +static std::mutex purge_thread_count_mtx; +void srv_update_purge_thread_count(uint n) +{ + std::lock_guard<std::mutex> lk(purge_thread_count_mtx); + ut_ad(n > 0); + ut_ad(n <= innodb_purge_threads_MAX); + srv_n_purge_threads = n; + srv_purge_thread_count_changed = true; +} + +inline void purge_coordinator_state::do_purge() +{ + ut_ad(!srv_read_only_mode); + + if (!purge_sys.enabled() || purge_sys.paused()) + return; + + uint n_threads; + + { + std::lock_guard<std::mutex> lk(purge_thread_count_mtx); + n_threads= srv_n_purge_threads; + srv_purge_thread_count_changed= false; + goto first_loop; + } + + do + { + if (UNIV_UNLIKELY(srv_purge_thread_count_changed)) + { + /* Read the fresh value of srv_n_purge_threads, reset + the changed flag. Both are protected by purge_thread_count_mtx. */ + { + std::lock_guard<std::mutex> lk(purge_thread_count_mtx); + n_threads= srv_n_purge_threads; + srv_purge_thread_count_changed= false; + } + } + first_loop: + ut_ad(n_threads); + + history_size= trx_sys.history_size(); + + if (!history_size) + { + no_history: + srv_dml_needed_delay= 0; + purge_truncation_task.wait(); + trx_purge_truncate_history(); + break; + } + + ulint n_pages_handled= trx_purge(n_threads, history_size); + if (!trx_sys.history_exists()) + goto no_history; + if (purge_sys.truncate.current || srv_shutdown_state != SRV_SHUTDOWN_NONE) + { + purge_truncation_task.wait(); + trx_purge_truncate_history(); + } + else + srv_thread_pool->submit_task(&purge_truncation_task); + if (!n_pages_handled) + break; + } + while (purge_sys.enabled() && !purge_sys.paused() && + !srv_purge_should_exit(history_size)); + + m_running= 0; +} + +static std::list<THD*> purge_thds; +static std::mutex purge_thd_mutex; +extern void* thd_attach_thd(THD*); +extern void thd_detach_thd(void *); +static int n_purge_thds; + +/* Ensure that we have at least n background THDs for purge */ +static void purge_create_background_thds(int n) +{ + THD *thd= current_thd; + std::unique_lock<std::mutex> lk(purge_thd_mutex); + while (n_purge_thds < n) + { + purge_thds.push_back(innobase_create_background_thd("InnoDB purge worker")); + n_purge_thds++; + } + set_current_thd(thd); +} + +static THD *acquire_thd(void **ctx) +{ + std::unique_lock<std::mutex> lk(purge_thd_mutex); + ut_a(!purge_thds.empty()); + THD* thd = purge_thds.front(); + purge_thds.pop_front(); + lk.unlock(); + + /* Set current thd, and thd->mysys_var as well, + it might be used by something in the server.*/ + *ctx = thd_attach_thd(thd); + return thd; +} + +static void release_thd(THD *thd, void *ctx) +{ + thd_detach_thd(ctx); + std::unique_lock<std::mutex> lk(purge_thd_mutex); + purge_thds.push_back(thd); + lk.unlock(); + set_current_thd(0); +} + +static void purge_worker_callback(void*) +{ + ut_ad(!current_thd); + ut_ad(!srv_read_only_mode); + ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + void *ctx; + THD *thd= acquire_thd(&ctx); + while (srv_task_execute()) + ut_ad(purge_sys.running()); + release_thd(thd,ctx); +} + +static void purge_coordinator_callback(void*) +{ + void *ctx; + THD *thd= acquire_thd(&ctx); + purge_state.do_purge(); + release_thd(thd, ctx); +} + +void srv_init_purge_tasks() +{ + purge_create_background_thds(innodb_purge_threads_MAX); + purge_sys.coordinator_startup(); +} + +static void srv_shutdown_purge_tasks() +{ + purge_coordinator_task.disable(); + purge_worker_task.wait(); + std::unique_lock<std::mutex> lk(purge_thd_mutex); + while (!purge_thds.empty()) + { + destroy_background_thd(purge_thds.front()); + purge_thds.pop_front(); + } + n_purge_thds= 0; + purge_truncation_task.wait(); +} + +/**********************************************************************//** +Enqueues a task to server task queue and releases a worker thread, if there +is a suspended one. */ +void +srv_que_task_enqueue_low( +/*=====================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ut_ad(!srv_read_only_mode); + mysql_mutex_lock(&srv_sys.tasks_mutex); + + UT_LIST_ADD_LAST(srv_sys.tasks, thr); + + mysql_mutex_unlock(&srv_sys.tasks_mutex); +} + +#ifdef UNIV_DEBUG +/** @return number of tasks in queue */ +ulint srv_get_task_queue_length() +{ + ulint n_tasks; + + ut_ad(!srv_read_only_mode); + + mysql_mutex_lock(&srv_sys.tasks_mutex); + + n_tasks = UT_LIST_GET_LEN(srv_sys.tasks); + + mysql_mutex_unlock(&srv_sys.tasks_mutex); + + return(n_tasks); +} +#endif + +/** Shut down the purge threads. */ +void srv_purge_shutdown() +{ + if (purge_sys.enabled()) + { + if (!srv_fast_shutdown && !opt_bootstrap) + { + srv_purge_batch_size= innodb_purge_batch_size_MAX; + srv_update_purge_thread_count(innodb_purge_threads_MAX); + } + size_t history_size= trx_sys.history_size(); + while (!srv_purge_should_exit(history_size)) + { + history_size= trx_sys.history_size(); + ut_a(!purge_sys.paused()); + srv_thread_pool->submit_task(&purge_coordinator_task); + purge_coordinator_task.wait(); + } + purge_sys.coordinator_shutdown(); + srv_shutdown_purge_tasks(); + } +} |