summaryrefslogtreecommitdiffstats
path: root/storage/innobase/srv/srv0srv.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/srv/srv0srv.cc')
-rw-r--r--storage/innobase/srv/srv0srv.cc2135
1 files changed, 2135 insertions, 0 deletions
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
new file mode 100644
index 00000000..ad221dc2
--- /dev/null
+++ b/storage/innobase/srv/srv0srv.cc
@@ -0,0 +1,2135 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2009 Google Inc.
+Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2021, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0srv.cc
+The database server main program
+
+Created 10/8/1995 Heikki Tuuri
+*******************************************************/
+
+#include "my_global.h"
+// JAN: TODO: MySQL 5.7 missing header
+//#include "my_thread.h"
+//
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
+#include "btr0sea.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "dict0boot.h"
+#include "dict0load.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0recv.h"
+#include "mem0mem.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "row0mysql.h"
+#include "row0log.h"
+#include "srv0mon.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "sync0sync.h"
+#include "trx0i_s.h"
+#include "trx0purge.h"
+#include "ut0crc32.h"
+#include "btr0defragment.h"
+#include "ut0mem.h"
+#include "fil0fil.h"
+#include "fil0crypt.h"
+#include "fil0pagecompress.h"
+#include "trx0types.h"
+#include <list>
+
+#include <my_service_manager.h>
+/* The following is the maximum allowed duration of a lock wait. */
+UNIV_INTERN ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT;
+
+/* How much data manipulation language (DML) statements need to be delayed,
+in microseconds, in order to reduce the lagging of the purge thread. */
+ulint srv_dml_needed_delay;
+
+const char* srv_main_thread_op_info = "";
+
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
+const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
+
+/* Server parameters which are read from the initfile */
+
+/* The following three are dir paths which are catenated before file
+names, where the file name itself may also contain a path */
+
+char* srv_data_home;
+
+/** Rollback files directory, can be absolute. */
+char* srv_undo_dir;
+
+/** The number of tablespaces to use for rollback segments. */
+ulong srv_undo_tablespaces;
+
+/** The number of UNDO tablespaces that are open and ready to use. */
+ulint srv_undo_tablespaces_open;
+
+/** The number of UNDO tablespaces that are active (hosting some rollback
+segment). It is quite possible that some of the tablespaces doesn't host
+any of the rollback-segment based on configuration used. */
+ulint srv_undo_tablespaces_active;
+
+/** Rate at which UNDO records should be purged. */
+ulong srv_purge_rseg_truncate_frequency;
+
+/** Enable or Disable Truncate of UNDO tablespace.
+Note: If enabled then UNDO tablespace will be selected for truncate.
+While Server waits for undo-tablespace to truncate if user disables
+it, truncate action is completed but no new tablespace is marked
+for truncate (action is never aborted). */
+my_bool srv_undo_log_truncate;
+
+/** Maximum size of undo tablespace. */
+unsigned long long srv_max_undo_log_size;
+
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+my_bool srv_read_only_mode;
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+my_bool srv_file_per_table;
+/** Set if InnoDB operates in read-only mode or innodb-force-recovery
+is greater than SRV_FORCE_NO_TRX_UNDO. */
+my_bool high_level_read_only;
+
+/** Sort buffer size in index creation */
+ulong srv_sort_buf_size;
+/** Maximum modification log file size for online index creation */
+unsigned long long srv_online_max_size;
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads.
+Currently we support native aio on windows and linux */
+my_bool srv_use_native_aio;
+my_bool srv_numa_interleave;
+/** copy of innodb_use_atomic_writes; @see innodb_init_params() */
+my_bool srv_use_atomic_writes;
+/** innodb_compression_algorithm; used with page compression */
+ulong innodb_compression_algorithm;
+
+#ifdef UNIV_DEBUG
+/** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
+my_bool srv_master_thread_disabled_debug;
+/** Event used to inform that master thread is disabled. */
+static os_event_t srv_master_thread_disabled_event;
+#endif /* UNIV_DEBUG */
+
+/*------------------------- LOG FILES ------------------------ */
+char* srv_log_group_home_dir;
+
+/** The InnoDB redo log file size, or 0 when changing the redo log format
+at startup (while disallowing writes to the redo log). */
+ulonglong srv_log_file_size;
+/** innodb_log_buffer_size, in bytes */
+ulong srv_log_buffer_size;
+/** innodb_flush_log_at_trx_commit */
+ulong srv_flush_log_at_trx_commit;
+/** innodb_flush_log_at_timeout */
+uint srv_flush_log_at_timeout;
+/** innodb_page_size */
+ulong srv_page_size;
+/** log2 of innodb_page_size; @see innodb_init_params() */
+ulong srv_page_size_shift;
+/** innodb_log_write_ahead_size */
+ulong srv_log_write_ahead_size;
+
+/** innodb_adaptive_flushing; try to flush dirty pages so as to avoid
+IO bursts at the checkpoints. */
+my_bool srv_adaptive_flushing;
+
+/** innodb_flush_sync; whether to ignore io_capacity at log checkpoints */
+my_bool srv_flush_sync;
+
+/** common thread pool*/
+tpool::thread_pool* srv_thread_pool;
+
+/** Maximum number of times allowed to conditionally acquire
+mutex before switching to blocking wait on the mutex */
+#define MAX_MUTEX_NOWAIT 2
+
+/** Check whether the number of failed nonblocking mutex
+acquisition attempts exceeds maximum allowed value. If so,
+srv_printf_innodb_monitor() will request mutex acquisition
+with mutex_enter(), which will wait until it gets the mutex. */
+#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
+
+#ifdef WITH_INNODB_DISALLOW_WRITES
+UNIV_INTERN os_event_t srv_allow_writes_event;
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
+/** copy of innodb_buffer_pool_size */
+ulint srv_buf_pool_size;
+const ulint srv_buf_pool_min_size = 5 * 1024 * 1024;
+/** Default pool size in bytes */
+const ulint srv_buf_pool_def_size = 128 * 1024 * 1024;
+/** Requested buffer pool chunk size */
+ulong srv_buf_pool_chunk_unit;
+/** innodb_lru_scan_depth; number of blocks scanned in LRU flush batch */
+ulong srv_LRU_scan_depth;
+/** innodb_flush_neighbors; whether or not to flush neighbors of a block */
+ulong srv_flush_neighbors;
+/** Previously requested size */
+ulint srv_buf_pool_old_size;
+/** Current size as scaling factor for the other components */
+ulint srv_buf_pool_base_size;
+/** Current size in bytes */
+ulint srv_buf_pool_curr_size;
+/** Dump this % of each buffer pool during BP dump */
+ulong srv_buf_pool_dump_pct;
+/** Abort load after this amount of pages */
+#ifdef UNIV_DEBUG
+ulong srv_buf_pool_load_pages_abort = LONG_MAX;
+#endif
+/** Lock table size in bytes */
+ulint srv_lock_table_size = ULINT_MAX;
+
+/** innodb_read_io_threads */
+uint srv_n_read_io_threads;
+/** innodb_write_io_threads */
+uint srv_n_write_io_threads;
+
+/** innodb_random_read_ahead */
+my_bool srv_random_read_ahead;
+/** innodb_read_ahead_threshold; the number of pages that must be present
+in the buffer cache and accessed sequentially for InnoDB to trigger a
+readahead request. */
+ulong srv_read_ahead_threshold;
+
+/** innodb_change_buffer_max_size; maximum on-disk size of change
+buffer in terms of percentage of the buffer pool. */
+uint srv_change_buffer_max_size;
+
+ulong srv_file_flush_method;
+
+
+/** copy of innodb_open_files; @see innodb_init_params() */
+ulint srv_max_n_open_files;
+
+/** innodb_io_capacity */
+ulong srv_io_capacity;
+/** innodb_io_capacity_max */
+ulong srv_max_io_capacity;
+
+/* The InnoDB main thread tries to keep the ratio of modified pages
+in the buffer pool to all database pages in the buffer pool smaller than
+the following number. But it is not guaranteed that the value stays below
+that during a time of heavy update/insert activity. */
+
+/** innodb_max_dirty_pages_pct */
+double srv_max_buf_pool_modified_pct;
+/** innodb_max_dirty_pages_pct_lwm */
+double srv_max_dirty_pages_pct_lwm;
+
+/** innodb_adaptive_flushing_lwm; the percentage of log capacity at
+which adaptive flushing, if enabled, will kick in. */
+double srv_adaptive_flushing_lwm;
+
+/** innodb_flushing_avg_loops; number of iterations over which
+adaptive flushing is averaged */
+ulong srv_flushing_avg_loops;
+
+/** innodb_purge_threads; the number of purge tasks to use */
+uint srv_n_purge_threads;
+
+/** innodb_purge_batch_size, in pages */
+ulong srv_purge_batch_size;
+
+/** innodb_stats_method decides how InnoDB treats
+NULL value when collecting statistics. By default, it is set to
+SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
+ulong srv_innodb_stats_method;
+
+srv_stats_t srv_stats;
+
+/* structure to pass status variables to MySQL */
+export_var_t export_vars;
+
+/** Normally 0. When nonzero, skip some phases of crash recovery,
+starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
+by SELECT or mysqldump. When this is nonzero, we do not allow any user
+modifications to the data. */
+ulong srv_force_recovery;
+
+/** innodb_print_all_deadlocks; whether to print all user-level
+transactions deadlocks to the error log */
+my_bool srv_print_all_deadlocks;
+
+/** innodb_cmp_per_index_enabled; enable
+INFORMATION_SCHEMA.innodb_cmp_per_index */
+my_bool srv_cmp_per_index_enabled;
+
+/** innodb_fast_shutdown=1 skips purge and change buffer merge.
+innodb_fast_shutdown=2 effectively crashes the server (no log checkpoint).
+innodb_fast_shutdown=3 is a clean shutdown that skips the rollback
+of active transaction (to be done on restart). */
+uint srv_fast_shutdown;
+
+/** copy of innodb_status_file; generate a innodb_status.<pid> file */
+ibool srv_innodb_status;
+
+/** innodb_prefix_index_cluster_optimization; whether to optimize
+prefix index queries to skip cluster index lookup when possible */
+my_bool srv_prefix_index_cluster_optimization;
+
+/** innodb_stats_transient_sample_pages;
+When estimating number of different key values in an index, sample
+this many index pages, there are 2 ways to calculate statistics:
+* persistent stats that are calculated by ANALYZE TABLE and saved
+ in the innodb database.
+* quick transient stats, that are used if persistent stats for the given
+ table/index are not found in the innodb database */
+unsigned long long srv_stats_transient_sample_pages;
+/** innodb_stats_persistent */
+my_bool srv_stats_persistent;
+/** innodb_stats_include_delete_marked */
+my_bool srv_stats_include_delete_marked;
+/** innodb_stats_persistent_sample_pages */
+unsigned long long srv_stats_persistent_sample_pages;
+/** innodb_stats_auto_recalc */
+my_bool srv_stats_auto_recalc;
+
+/** innodb_stats_modified_counter; The number of rows modified before
+we calculate new statistics (default 0 = current limits) */
+unsigned long long srv_stats_modified_counter;
+
+/** innodb_stats_traditional; enable traditional statistic calculation
+based on number of configured pages */
+my_bool srv_stats_sample_traditional;
+
+my_bool srv_use_doublewrite_buf;
+
+/** innodb_sync_spin_loops */
+ulong srv_n_spin_wait_rounds;
+/** innodb_spin_wait_delay */
+uint srv_spin_wait_delay;
+
+static ulint srv_n_rows_inserted_old;
+static ulint srv_n_rows_updated_old;
+static ulint srv_n_rows_deleted_old;
+static ulint srv_n_rows_read_old;
+static ulint srv_n_system_rows_inserted_old;
+static ulint srv_n_system_rows_updated_old;
+static ulint srv_n_system_rows_deleted_old;
+static ulint srv_n_system_rows_read_old;
+
+ulint srv_truncated_status_writes;
+/** Number of initialized rollback segments for persistent undo log */
+ulong srv_available_undo_logs;
+
+/* Defragmentation */
+UNIV_INTERN my_bool srv_defragment;
+/** innodb_defragment_n_pages */
+UNIV_INTERN uint srv_defragment_n_pages;
+UNIV_INTERN uint srv_defragment_stats_accuracy;
+/** innodb_defragment_fill_factor_n_recs */
+UNIV_INTERN uint srv_defragment_fill_factor_n_recs;
+/** innodb_defragment_fill_factor */
+UNIV_INTERN double srv_defragment_fill_factor;
+/** innodb_defragment_frequency */
+UNIV_INTERN uint srv_defragment_frequency;
+/** derived from innodb_defragment_frequency;
+@see innodb_defragment_frequency_update() */
+UNIV_INTERN ulonglong srv_defragment_interval;
+
+/** Current mode of operation */
+UNIV_INTERN enum srv_operation_mode srv_operation;
+
+/* Set the following to 0 if you want InnoDB to write messages on
+stderr on startup/shutdown. Not enabled on the embedded server. */
+ibool srv_print_verbose_log;
+my_bool srv_print_innodb_monitor;
+my_bool srv_print_innodb_lock_monitor;
+/** innodb_force_primary_key; whether to disallow CREATE TABLE without
+PRIMARY KEY */
+my_bool srv_force_primary_key;
+
+/** Key version to encrypt the temporary tablespace */
+my_bool innodb_encrypt_temporary_tables;
+
+my_bool srv_immediate_scrub_data_uncompressed;
+
+static time_t srv_last_monitor_time;
+
+static ib_mutex_t srv_innodb_monitor_mutex;
+
+/** Mutex protecting page_zip_stat_per_index */
+ib_mutex_t page_zip_stat_per_index_mutex;
+
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+ib_mutex_t srv_monitor_file_mutex;
+
+/** Temporary file for innodb monitor output */
+FILE* srv_monitor_file;
+/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
+This mutex has a very low rank; threads reserving it should not
+acquire any further latches or sleep before releasing this one. */
+ib_mutex_t srv_misc_tmpfile_mutex;
+/** Temporary file for miscellanous diagnostic output */
+FILE* srv_misc_tmpfile;
+
+static ulint srv_main_thread_process_no;
+static ulint srv_main_thread_id;
+
+/* The following counts are used by the srv_master_callback. */
+
+/** Iterations of the loop bounded by 'srv_active' label. */
+ulint srv_main_active_loops;
+/** Iterations of the loop bounded by the 'srv_idle' label. */
+ulint srv_main_idle_loops;
+/** Iterations of the loop bounded by the 'srv_shutdown' label. */
+static ulint srv_main_shutdown_loops;
+/** Log writes involving flush. */
+ulint srv_log_writes_and_flush;
+
+/* This is only ever touched by the master thread. It records the
+time when the last flush of log file has happened. The master
+thread ensures that we flush the log files at least once per
+second. */
+static time_t srv_last_log_flush_time;
+
+/* Interval in seconds at which various tasks are performed by the
+master thread when server is active. In order to balance the workload,
+we should try to keep intervals such that they are not multiple of
+each other. For example, if we have intervals for various tasks
+defined as 5, 10, 15, 60 then all tasks will be performed when
+current_time % 60 == 0 and no tasks will be performed when
+current_time % 5 != 0. */
+
+# define SRV_MASTER_CHECKPOINT_INTERVAL (7)
+# define SRV_MASTER_DICT_LRU_INTERVAL (47)
+
+/** Buffer pool dump status frequence in percentages */
+UNIV_INTERN ulong srv_buf_dump_status_frequency;
+
+/*
+ IMPLEMENTATION OF THE SERVER MAIN PROGRAM
+ =========================================
+
+There is the following analogue between this database
+server and an operating system kernel:
+
+DB concept equivalent OS concept
+---------- ---------------------
+transaction -- process;
+
+query thread -- thread;
+
+lock -- semaphore;
+
+kernel -- kernel;
+
+query thread execution:
+(a) without lock mutex
+reserved -- process executing in user mode;
+(b) with lock mutex reserved
+ -- process executing in kernel mode;
+
+The server has several backgroind threads all running at the same
+priority as user threads. It periodically checks if here is anything
+happening in the server which requires intervention of the master
+thread. Such situations may be, for example, when flushing of dirty
+blocks is needed in the buffer pool or old version of database rows
+have to be cleaned away (purged). The user can configure a separate
+dedicated purge thread(s) too, in which case the master thread does not
+do any purging.
+
+The threads which we call user threads serve the queries of the MySQL
+server. They run at normal priority.
+
+When there is no activity in the system, also the master thread
+suspends itself to wait for an event making the server totally silent.
+
+There is still one complication in our server design. If a
+background utility thread obtains a resource (e.g., mutex) needed by a user
+thread, and there is also some other user activity in the system,
+the user thread may have to wait indefinitely long for the
+resource, as the OS does not schedule a background thread if
+there is some other runnable user thread. This problem is called
+priority inversion in real-time programming.
+
+One solution to the priority inversion problem would be to keep record
+of which thread owns which resource and in the above case boost the
+priority of the background thread so that it will be scheduled and it
+can release the resource. This solution is called priority inheritance
+in real-time programming. A drawback of this solution is that the overhead
+of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
+MHz Pentium, because the thread has to call os_thread_get_curr_id. This may
+be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
+that the thread cannot store the information in the resource , say mutex,
+itself, because competing threads could wipe out the information if it is
+stored before acquiring the mutex, and if it stored afterwards, the
+information is outdated for the time of one machine instruction, at least.
+(To be precise, the information could be stored to lock_word in mutex if
+the machine supports atomic swap.)
+
+The above solution with priority inheritance may become actual in the
+future, currently we do not implement any priority twiddling solution.
+Our general aim is to reduce the contention of all mutexes by making
+them more fine grained.
+
+The thread table contains information of the current status of each
+thread existing in the system, and also the event semaphores used in
+suspending the master thread and utility threads when they have nothing
+to do. The thread table can be seen as an analogue to the process table
+in a traditional Unix implementation. */
+
+/** The server system struct */
+struct srv_sys_t{
+ ib_mutex_t tasks_mutex; /*!< variable protecting the
+ tasks queue */
+ UT_LIST_BASE_NODE_T(que_thr_t)
+ tasks; /*!< task queue */
+
+ srv_stats_t::ulint_ctr_1_t
+ activity_count; /*!< For tracking server
+ activity */
+};
+
+static srv_sys_t srv_sys;
+
+/*
+ Structure shared by timer and coordinator_callback.
+ No protection necessary since timer and task never run
+ in parallel (being in the same task group of size 1).
+*/
+struct purge_coordinator_state
+{
+ /** Snapshot of the last history length before the purge call.*/
+ uint32 m_history_length;
+ Atomic_counter<int> m_running;
+ purge_coordinator_state() : m_history_length(), m_running(0) {}
+};
+
+static purge_coordinator_state purge_state;
+
+/** threadpool timer for srv_monitor_task() */
+std::unique_ptr<tpool::timer> srv_monitor_timer;
+
+
+/** The buffer pool dump/load file name */
+char* srv_buf_dump_filename;
+
+/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
+and/or load it during startup. */
+char srv_buffer_pool_dump_at_shutdown = TRUE;
+char srv_buffer_pool_load_at_startup = TRUE;
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Performance schema stage event for monitoring ALTER TABLE progress
+everything after flush log_make_checkpoint(). */
+PSI_stage_info srv_stage_alter_table_end
+ = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_insert_index_tuples(). */
+PSI_stage_info srv_stage_alter_table_insert
+ = {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_apply(). */
+PSI_stage_info srv_stage_alter_table_log_index
+ = {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_table_apply(). */
+PSI_stage_info srv_stage_alter_table_log_table
+ = {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_sort(). */
+PSI_stage_info srv_stage_alter_table_merge_sort
+ = {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_read_clustered_index(). */
+PSI_stage_info srv_stage_alter_table_read_pk_internal_sort
+ = {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring buffer pool load progress. */
+PSI_stage_info srv_stage_buffer_pool_load
+ = {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS};
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+/*********************************************************************//**
+Prints counters for work done by srv_master_thread. */
+static
+void
+srv_print_master_thread_info(
+/*=========================*/
+ FILE *file) /* in: output stream */
+{
+ fprintf(file, "srv_master_thread loops: " ULINTPF " srv_active, "
+ ULINTPF " srv_shutdown, " ULINTPF " srv_idle\n"
+ "srv_master_thread log flush and writes: " ULINTPF "\n",
+ srv_main_active_loops,
+ srv_main_shutdown_loops,
+ srv_main_idle_loops,
+ srv_log_writes_and_flush);
+}
+
+static void thread_pool_thread_init()
+{
+ my_thread_init();
+ pfs_register_thread(thread_pool_thread_key);
+}
+static void thread_pool_thread_end()
+{
+ pfs_delete_thread();
+ my_thread_end();
+}
+
+
+#ifndef DBUG_OFF
+static void dbug_after_task_callback()
+{
+ ut_ad(!sync_check_iterate(sync_check()));
+}
+#endif
+
+void srv_thread_pool_init()
+{
+ DBUG_ASSERT(!srv_thread_pool);
+
+#if defined (_WIN32)
+ srv_thread_pool= tpool::create_thread_pool_win();
+#else
+ srv_thread_pool= tpool::create_thread_pool_generic();
+#endif
+ srv_thread_pool->set_thread_callbacks(thread_pool_thread_init,
+ thread_pool_thread_end);
+#ifndef DBUG_OFF
+ tpool::set_after_task_callback(dbug_after_task_callback);
+#endif
+}
+
+
+void srv_thread_pool_end()
+{
+ ut_ad(!srv_master_timer);
+ delete srv_thread_pool;
+ srv_thread_pool= nullptr;
+}
+
+static bool need_srv_free;
+
+/** Initialize the server. */
+static void srv_init()
+{
+ mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex);
+
+ if (!srv_read_only_mode) {
+ mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys.tasks_mutex);
+
+ UT_LIST_INIT(srv_sys.tasks, &que_thr_t::queue);
+ }
+
+ need_srv_free = true;
+ ut_d(srv_master_thread_disabled_event = os_event_create(0));
+
+ /* page_zip_stat_per_index_mutex is acquired from:
+ 1. page_zip_compress() (after SYNC_FSP)
+ 2. page_zip_decompress()
+ 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
+ 4. innodb_cmp_per_index_update(), no other latches
+ since we do not acquire any other latches while holding this mutex,
+ it can have very low level. We pick SYNC_ANY_LATCH for it. */
+ mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
+ &page_zip_stat_per_index_mutex);
+
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ /* Writes have to be enabled on init or else we hang. Thus, we
+ always set the event here regardless of innobase_disallow_writes.
+ That flag will always be 0 at this point because it isn't settable
+ via my.cnf or command line arg. */
+ srv_allow_writes_event = os_event_create(0);
+ os_event_set(srv_allow_writes_event);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
+ /* Initialize some INFORMATION SCHEMA internal structures */
+ trx_i_s_cache_init(trx_i_s_cache);
+
+}
+
+/*********************************************************************//**
+Frees the data structures created in srv_init(). */
+void
+srv_free(void)
+/*==========*/
+{
+ if (!need_srv_free) {
+ return;
+ }
+
+ mutex_free(&srv_innodb_monitor_mutex);
+ mutex_free(&page_zip_stat_per_index_mutex);
+
+ if (!srv_read_only_mode) {
+ mutex_free(&srv_sys.tasks_mutex);
+ }
+
+ ut_d(os_event_destroy(srv_master_thread_disabled_event));
+
+ trx_i_s_cache_free(trx_i_s_cache);
+ srv_thread_pool_end();
+}
+
+/*********************************************************************//**
+Boots the InnoDB server. */
+void
+srv_boot(void)
+/*==========*/
+{
+ srv_thread_pool_init();
+ sync_check_init();
+ trx_pool_init();
+ row_mysql_init();
+ srv_init();
+}
+
+/******************************************************************//**
+Refreshes the values used to calculate per-second averages. */
+static void srv_refresh_innodb_monitor_stats(time_t current_time)
+{
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ if (difftime(current_time, srv_last_monitor_time) < 60) {
+ /* We referesh InnoDB Monitor values so that averages are
+ printed from at most 60 last seconds */
+ mutex_exit(&srv_innodb_monitor_mutex);
+ return;
+ }
+
+ srv_last_monitor_time = current_time;
+
+ os_aio_refresh_stats();
+
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_cur_n_sea_old = btr_cur_n_sea;
+#endif /* BTR_CUR_HASH_ADAPT */
+ btr_cur_n_non_sea_old = btr_cur_n_non_sea;
+
+ log_refresh_stats();
+
+ buf_refresh_io_stats();
+
+ srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+ srv_n_rows_updated_old = srv_stats.n_rows_updated;
+ srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+ srv_n_rows_read_old = srv_stats.n_rows_read;
+
+ srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
+ srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
+ srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
+ srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
+
+ mutex_exit(&srv_innodb_monitor_mutex);
+}
+
+/******************************************************************//**
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
+ibool
+srv_printf_innodb_monitor(
+/*======================*/
+ FILE* file, /*!< in: output stream */
+ ibool nowait, /*!< in: whether to wait for the
+ lock_sys_t:: mutex */
+ ulint* trx_start_pos, /*!< out: file position of the start of
+ the list of active transactions */
+ ulint* trx_end) /*!< out: file position of the end of
+ the list of active transactions */
+{
+ double time_elapsed;
+ time_t current_time;
+ ibool ret;
+
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ current_time = time(NULL);
+
+ /* We add 0.001 seconds to time_elapsed to prevent division
+ by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
+ same time */
+
+ time_elapsed = difftime(current_time, srv_last_monitor_time)
+ + 0.001;
+
+ srv_last_monitor_time = time(NULL);
+
+ fputs("\n=====================================\n", file);
+
+ ut_print_timestamp(file);
+ fprintf(file,
+ " INNODB MONITOR OUTPUT\n"
+ "=====================================\n"
+ "Per second averages calculated from the last %lu seconds\n",
+ (ulong) time_elapsed);
+
+ fputs("-----------------\n"
+ "BACKGROUND THREAD\n"
+ "-----------------\n", file);
+ srv_print_master_thread_info(file);
+
+ fputs("----------\n"
+ "SEMAPHORES\n"
+ "----------\n", file);
+
+ sync_print(file);
+
+ /* Conceptually, srv_innodb_monitor_mutex has a very high latching
+ order level in sync0sync.h, while dict_foreign_err_mutex has a very
+ low level 135. Therefore we can reserve the latter mutex here without
+ a danger of a deadlock of threads. */
+
+ mutex_enter(&dict_foreign_err_mutex);
+
+ if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
+ fputs("------------------------\n"
+ "LATEST FOREIGN KEY ERROR\n"
+ "------------------------\n", file);
+ ut_copy_file(file, dict_foreign_err_file);
+ }
+
+ mutex_exit(&dict_foreign_err_mutex);
+
+ /* Only if lock_print_info_summary proceeds correctly,
+ before we call the lock_print_info_all_transactions
+ to print all the lock information. IMPORTANT NOTE: This
+ function acquires the lock mutex on success. */
+ ret = lock_print_info_summary(file, nowait);
+
+ if (ret) {
+ if (trx_start_pos) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_start_pos = ULINT_UNDEFINED;
+ } else {
+ *trx_start_pos = (ulint) t;
+ }
+ }
+
+ /* NOTE: If we get here then we have the lock mutex. This
+ function will release the lock mutex that we acquired when
+ we called the lock_print_info_summary() function earlier. */
+
+ lock_print_info_all_transactions(file);
+
+ if (trx_end) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_end = ULINT_UNDEFINED;
+ } else {
+ *trx_end = (ulint) t;
+ }
+ }
+ }
+
+ fputs("--------\n"
+ "FILE I/O\n"
+ "--------\n", file);
+ os_aio_print(file);
+
+ fputs("-------------------------------------\n"
+ "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
+ "-------------------------------------\n", file);
+ ibuf_print(file);
+
+#ifdef BTR_CUR_HASH_ADAPT
+ for (ulint i = 0; i < btr_ahi_parts && btr_search_enabled; ++i) {
+ const auto part= &btr_search_sys.parts[i];
+ rw_lock_s_lock(&part->latch);
+ ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH);
+ fprintf(file, "Hash table size " ULINTPF
+ ", node heap has " ULINTPF " buffer(s)\n",
+ part->table.n_cells,
+ part->heap->base.count - !part->heap->free_block);
+ rw_lock_s_unlock(&part->latch);
+ }
+
+ fprintf(file,
+ "%.2f hash searches/s, %.2f non-hash searches/s\n",
+ static_cast<double>(btr_cur_n_sea - btr_cur_n_sea_old)
+ / time_elapsed,
+ static_cast<double>(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
+ / time_elapsed);
+ btr_cur_n_sea_old = btr_cur_n_sea;
+#else /* BTR_CUR_HASH_ADAPT */
+ fprintf(file,
+ "%.2f non-hash searches/s\n",
+ static_cast<double>(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
+ / time_elapsed);
+#endif /* BTR_CUR_HASH_ADAPT */
+ btr_cur_n_non_sea_old = btr_cur_n_non_sea;
+
+ fputs("---\n"
+ "LOG\n"
+ "---\n", file);
+ log_print(file);
+
+ fputs("----------------------\n"
+ "BUFFER POOL AND MEMORY\n"
+ "----------------------\n", file);
+ fprintf(file,
+ "Total large memory allocated " ULINTPF "\n"
+ "Dictionary memory allocated " ULINTPF "\n",
+ ulint{os_total_large_mem_allocated},
+ dict_sys.rough_size());
+
+ buf_print_io(file);
+
+ fputs("--------------\n"
+ "ROW OPERATIONS\n"
+ "--------------\n", file);
+ fprintf(file, ULINTPF " read views open inside InnoDB\n",
+ trx_sys.view_count());
+
+ if (ulint n_reserved = fil_system.sys_space->n_reserved_extents) {
+ fprintf(file,
+ ULINTPF " tablespace extents now reserved for"
+ " B-tree split operations\n",
+ n_reserved);
+ }
+
+ fprintf(file,
+ "Process ID=" ULINTPF
+ ", Main thread ID=" ULINTPF
+ ", state: %s\n",
+ srv_main_thread_process_no,
+ srv_main_thread_id,
+ srv_main_thread_op_info);
+ fprintf(file,
+ "Number of rows inserted " ULINTPF
+ ", updated " ULINTPF
+ ", deleted " ULINTPF
+ ", read " ULINTPF "\n",
+ (ulint) srv_stats.n_rows_inserted,
+ (ulint) srv_stats.n_rows_updated,
+ (ulint) srv_stats.n_rows_deleted,
+ (ulint) srv_stats.n_rows_read);
+ fprintf(file,
+ "%.2f inserts/s, %.2f updates/s,"
+ " %.2f deletes/s, %.2f reads/s\n",
+ static_cast<double>(srv_stats.n_rows_inserted
+ - srv_n_rows_inserted_old)
+ / time_elapsed,
+ static_cast<double>(srv_stats.n_rows_updated
+ - srv_n_rows_updated_old)
+ / time_elapsed,
+ static_cast<double>(srv_stats.n_rows_deleted
+ - srv_n_rows_deleted_old)
+ / time_elapsed,
+ static_cast<double>(srv_stats.n_rows_read
+ - srv_n_rows_read_old)
+ / time_elapsed);
+ fprintf(file,
+ "Number of system rows inserted " ULINTPF
+ ", updated " ULINTPF ", deleted " ULINTPF
+ ", read " ULINTPF "\n",
+ (ulint) srv_stats.n_system_rows_inserted,
+ (ulint) srv_stats.n_system_rows_updated,
+ (ulint) srv_stats.n_system_rows_deleted,
+ (ulint) srv_stats.n_system_rows_read);
+ fprintf(file,
+ "%.2f inserts/s, %.2f updates/s,"
+ " %.2f deletes/s, %.2f reads/s\n",
+ static_cast<double>(srv_stats.n_system_rows_inserted
+ - srv_n_system_rows_inserted_old)
+ / time_elapsed,
+ static_cast<double>(srv_stats.n_system_rows_updated
+ - srv_n_system_rows_updated_old)
+ / time_elapsed,
+ static_cast<double>(srv_stats.n_system_rows_deleted
+ - srv_n_system_rows_deleted_old)
+ / time_elapsed,
+ static_cast<double>(srv_stats.n_system_rows_read
+ - srv_n_system_rows_read_old)
+ / time_elapsed);
+ srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+ srv_n_rows_updated_old = srv_stats.n_rows_updated;
+ srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+ srv_n_rows_read_old = srv_stats.n_rows_read;
+ srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
+ srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
+ srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
+ srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
+
+ fputs("----------------------------\n"
+ "END OF INNODB MONITOR OUTPUT\n"
+ "============================\n", file);
+ mutex_exit(&srv_innodb_monitor_mutex);
+ fflush(file);
+
+ return(ret);
+}
+
+/******************************************************************//**
+Function to pass InnoDB status variables to MySQL */
+void
+srv_export_innodb_status(void)
+/*==========================*/
+{
+ fil_crypt_stat_t crypt_stat;
+
+ if (!srv_read_only_mode) {
+ fil_crypt_total_stat(&crypt_stat);
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ ulint mem_adaptive_hash = 0;
+ for (ulong i = 0; i < btr_ahi_parts; i++) {
+ const auto part= &btr_search_sys.parts[i];
+ rw_lock_s_lock(&part->latch);
+ if (part->heap) {
+ ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH);
+
+ mem_adaptive_hash += mem_heap_get_size(part->heap)
+ + part->table.n_cells * sizeof(hash_cell_t);
+ }
+ rw_lock_s_unlock(&part->latch);
+ }
+ export_vars.innodb_mem_adaptive_hash = mem_adaptive_hash;
+#endif
+
+ export_vars.innodb_mem_dictionary = dict_sys.rough_size();
+
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ export_vars.innodb_data_pending_reads =
+ ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS));
+
+ export_vars.innodb_data_pending_writes =
+ ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
+
+ export_vars.innodb_data_pending_fsyncs =
+ log_sys.get_pending_flushes()
+ + fil_n_pending_tablespace_flushes;
+
+ export_vars.innodb_data_fsyncs = os_n_fsyncs;
+
+ export_vars.innodb_data_read = srv_stats.data_read;
+
+ export_vars.innodb_data_reads = os_n_file_reads;
+
+ export_vars.innodb_data_writes = os_n_file_writes;
+
+ ulint dblwr = 0;
+
+ if (buf_dblwr.is_initialised()) {
+ buf_dblwr.lock();
+ dblwr = buf_dblwr.submitted();
+ export_vars.innodb_dblwr_pages_written = buf_dblwr.written();
+ export_vars.innodb_dblwr_writes = buf_dblwr.batches();
+ buf_dblwr.unlock();
+ }
+
+ export_vars.innodb_data_written = srv_stats.data_written + dblwr;
+
+ export_vars.innodb_buffer_pool_read_requests
+ = buf_pool.stat.n_page_gets;
+
+ export_vars.innodb_buffer_pool_write_requests =
+ srv_stats.buf_pool_write_requests;
+
+ export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
+
+ export_vars.innodb_buffer_pool_read_ahead_rnd =
+ buf_pool.stat.n_ra_pages_read_rnd;
+
+ export_vars.innodb_buffer_pool_read_ahead =
+ buf_pool.stat.n_ra_pages_read;
+
+ export_vars.innodb_buffer_pool_read_ahead_evicted =
+ buf_pool.stat.n_ra_pages_evicted;
+
+ export_vars.innodb_buffer_pool_pages_data =
+ UT_LIST_GET_LEN(buf_pool.LRU);
+
+ export_vars.innodb_buffer_pool_bytes_data =
+ buf_pool.stat.LRU_bytes
+ + (UT_LIST_GET_LEN(buf_pool.unzip_LRU)
+ << srv_page_size_shift);
+
+ export_vars.innodb_buffer_pool_pages_dirty =
+ UT_LIST_GET_LEN(buf_pool.flush_list);
+
+ export_vars.innodb_buffer_pool_pages_made_young
+ = buf_pool.stat.n_pages_made_young;
+ export_vars.innodb_buffer_pool_pages_made_not_young
+ = buf_pool.stat.n_pages_not_made_young;
+
+ export_vars.innodb_buffer_pool_pages_old = buf_pool.LRU_old_len;
+
+ export_vars.innodb_buffer_pool_bytes_dirty =
+ buf_pool.stat.flush_list_bytes;
+
+ export_vars.innodb_buffer_pool_pages_free =
+ UT_LIST_GET_LEN(buf_pool.free);
+
+#ifdef UNIV_DEBUG
+ export_vars.innodb_buffer_pool_pages_latched =
+ buf_get_latched_pages_number();
+#endif /* UNIV_DEBUG */
+ export_vars.innodb_buffer_pool_pages_total = buf_pool.get_n_pages();
+
+ export_vars.innodb_buffer_pool_pages_misc =
+ buf_pool.get_n_pages()
+ - UT_LIST_GET_LEN(buf_pool.LRU)
+ - UT_LIST_GET_LEN(buf_pool.free);
+
+ export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id();
+ export_vars.innodb_history_list_length = trx_sys.rseg_history_len;
+
+ export_vars.innodb_log_waits = srv_stats.log_waits;
+
+ export_vars.innodb_os_log_written = srv_stats.os_log_written;
+
+ export_vars.innodb_os_log_fsyncs = log_sys.get_flushes();
+
+ export_vars.innodb_os_log_pending_fsyncs
+ = log_sys.get_pending_flushes();
+
+ export_vars.innodb_os_log_pending_writes =
+ srv_stats.os_log_pending_writes;
+
+ export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
+
+ export_vars.innodb_log_writes = srv_stats.log_writes;
+
+ export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
+
+ export_vars.innodb_row_lock_current_waits =
+ srv_stats.n_lock_wait_current_count;
+
+ export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
+
+ if (srv_stats.n_lock_wait_count > 0) {
+
+ export_vars.innodb_row_lock_time_avg = (ulint)
+ (srv_stats.n_lock_wait_time
+ / 1000 / srv_stats.n_lock_wait_count);
+
+ } else {
+ export_vars.innodb_row_lock_time_avg = 0;
+ }
+
+ export_vars.innodb_row_lock_time_max =
+ lock_sys.n_lock_max_wait_time / 1000;
+
+ export_vars.innodb_rows_read = srv_stats.n_rows_read;
+
+ export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
+
+ export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
+
+ export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
+
+ export_vars.innodb_system_rows_read = srv_stats.n_system_rows_read;
+
+ export_vars.innodb_system_rows_inserted =
+ srv_stats.n_system_rows_inserted;
+
+ export_vars.innodb_system_rows_updated =
+ srv_stats.n_system_rows_updated;
+
+ export_vars.innodb_system_rows_deleted =
+ srv_stats.n_system_rows_deleted;
+
+ export_vars.innodb_truncated_status_writes =
+ srv_truncated_status_writes;
+
+ export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
+ export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
+ export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
+ export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
+ export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
+ export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
+ export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error;
+ export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;
+ export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted;
+ export_vars.innodb_n_merge_blocks_encrypted = srv_stats.n_merge_blocks_encrypted;
+ export_vars.innodb_n_merge_blocks_decrypted = srv_stats.n_merge_blocks_decrypted;
+ export_vars.innodb_n_rowlog_blocks_encrypted = srv_stats.n_rowlog_blocks_encrypted;
+ export_vars.innodb_n_rowlog_blocks_decrypted = srv_stats.n_rowlog_blocks_decrypted;
+
+ export_vars.innodb_n_temp_blocks_encrypted =
+ srv_stats.n_temp_blocks_encrypted;
+
+ export_vars.innodb_n_temp_blocks_decrypted =
+ srv_stats.n_temp_blocks_decrypted;
+
+ export_vars.innodb_defragment_compression_failures =
+ btr_defragment_compression_failures;
+ export_vars.innodb_defragment_failures = btr_defragment_failures;
+ export_vars.innodb_defragment_count = btr_defragment_count;
+
+ export_vars.innodb_onlineddl_rowlog_rows = onlineddl_rowlog_rows;
+ export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used;
+ export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress;
+
+ export_vars.innodb_sec_rec_cluster_reads =
+ srv_stats.n_sec_rec_cluster_reads;
+ export_vars.innodb_sec_rec_cluster_reads_avoided =
+ srv_stats.n_sec_rec_cluster_reads_avoided;
+
+ if (!srv_read_only_mode) {
+ export_vars.innodb_encryption_rotation_pages_read_from_cache =
+ crypt_stat.pages_read_from_cache;
+ export_vars.innodb_encryption_rotation_pages_read_from_disk =
+ crypt_stat.pages_read_from_disk;
+ export_vars.innodb_encryption_rotation_pages_modified =
+ crypt_stat.pages_modified;
+ export_vars.innodb_encryption_rotation_pages_flushed =
+ crypt_stat.pages_flushed;
+ export_vars.innodb_encryption_rotation_estimated_iops =
+ crypt_stat.estimated_iops;
+ export_vars.innodb_encryption_key_requests =
+ srv_stats.n_key_requests;
+ export_vars.innodb_key_rotation_list_length =
+ srv_stats.key_rotation_list_length;
+ }
+
+ mutex_exit(&srv_innodb_monitor_mutex);
+
+ mysql_mutex_lock(&log_sys.mutex);
+ export_vars.innodb_lsn_current = log_sys.get_lsn();
+ export_vars.innodb_lsn_flushed = log_sys.get_flushed_lsn();
+ export_vars.innodb_lsn_last_checkpoint = log_sys.last_checkpoint_lsn;
+ export_vars.innodb_checkpoint_max_age = static_cast<ulint>(
+ log_sys.max_checkpoint_age);
+ mysql_mutex_unlock(&log_sys.mutex);
+
+ export_vars.innodb_checkpoint_age = static_cast<ulint>(
+ export_vars.innodb_lsn_current
+ - export_vars.innodb_lsn_last_checkpoint);
+}
+
+struct srv_monitor_state_t
+{
+ time_t last_monitor_time;
+ ulint mutex_skipped;
+ bool last_srv_print_monitor;
+ srv_monitor_state_t() : mutex_skipped(0), last_srv_print_monitor(false)
+ {
+ srv_last_monitor_time = time(NULL);
+ last_monitor_time= srv_last_monitor_time;
+ }
+};
+
+static srv_monitor_state_t monitor_state;
+
+/** A task which prints the info output by various InnoDB monitors.*/
+static void srv_monitor()
+{
+ time_t current_time = time(NULL);
+
+ if (difftime(current_time, monitor_state.last_monitor_time) >= 15) {
+ monitor_state.last_monitor_time = current_time;
+
+ if (srv_print_innodb_monitor) {
+ /* Reset mutex_skipped counter everytime
+ srv_print_innodb_monitor changes. This is to
+ ensure we will not be blocked by lock_sys.mutex
+ for short duration information printing */
+ if (!monitor_state.last_srv_print_monitor) {
+ monitor_state.mutex_skipped = 0;
+ monitor_state.last_srv_print_monitor = true;
+ }
+
+ if (!srv_printf_innodb_monitor(stderr,
+ MUTEX_NOWAIT(monitor_state.mutex_skipped),
+ NULL, NULL)) {
+ monitor_state.mutex_skipped++;
+ } else {
+ /* Reset the counter */
+ monitor_state.mutex_skipped = 0;
+ }
+ } else {
+ monitor_state.last_monitor_time = 0;
+ }
+
+
+ /* We don't create the temp files or associated
+ mutexes in read-only-mode */
+
+ if (!srv_read_only_mode && srv_innodb_status) {
+ mutex_enter(&srv_monitor_file_mutex);
+ rewind(srv_monitor_file);
+ if (!srv_printf_innodb_monitor(srv_monitor_file,
+ MUTEX_NOWAIT(monitor_state.mutex_skipped),
+ NULL, NULL)) {
+ monitor_state.mutex_skipped++;
+ } else {
+ monitor_state.mutex_skipped = 0;
+ }
+
+ os_file_set_eof(srv_monitor_file);
+ mutex_exit(&srv_monitor_file_mutex);
+ }
+ }
+
+ srv_refresh_innodb_monitor_stats(current_time);
+}
+
+/*********************************************************************//**
+A task which prints warnings about semaphore waits which have lasted
+too long. These can be used to track bugs which cause hangs.
+*/
+void srv_monitor_task(void*)
+{
+ /* number of successive fatal timeouts observed */
+ static ulint fatal_cnt;
+ static lsn_t old_lsn = recv_sys.recovered_lsn;
+ /* longest waiting thread for a semaphore */
+ os_thread_id_t waiter;
+ static os_thread_id_t old_waiter = os_thread_get_curr_id();
+ /* the semaphore that is being waited for */
+ const void* sema = NULL;
+ static const void* old_sema = NULL;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* Try to track a strange bug reported by Harald Fuchs and others,
+ where the lsn seems to decrease at times */
+
+ lsn_t new_lsn = log_sys.get_lsn();
+ ut_a(new_lsn >= old_lsn);
+ old_lsn = new_lsn;
+
+ /* Update the statistics collected for deciding LRU
+ eviction policy. */
+ buf_LRU_stat_update();
+
+ if (sync_array_print_long_waits(&waiter, &sema)
+ && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
+#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
+ if (!os_event_is_set(srv_allow_writes_event)) {
+ fprintf(stderr,
+ "WSREP: avoiding InnoDB self crash due to "
+ "long semaphore wait of > %lu seconds\n"
+ "Server is processing SST donor operation, "
+ "fatal_cnt now: " ULINTPF,
+ srv_fatal_semaphore_wait_threshold, fatal_cnt);
+ return;
+ }
+#endif /* WITH_WSREP */
+ if (fatal_cnt++) {
+ ib::fatal() << "Semaphore wait has lasted > "
+ << srv_fatal_semaphore_wait_threshold
+ << " seconds. We intentionally crash the"
+ " server because it appears to be hung.";
+ }
+ } else {
+ fatal_cnt = 0;
+ old_waiter = waiter;
+ old_sema = sema;
+ }
+
+ srv_monitor();
+}
+
+/******************************************************************//**
+Increment the server activity count. */
+void
+srv_inc_activity_count(void)
+/*========================*/
+{
+ srv_sys.activity_count.inc();
+}
+
+#ifdef UNIV_DEBUG
+/** @return whether purge or master task is active */
+bool srv_any_background_activity()
+{
+ if (purge_sys.enabled() || srv_master_timer.get())
+ {
+ ut_ad(!srv_read_only_mode);
+ return true;
+ }
+ return false;
+}
+#endif /* UNIV_DEBUG */
+
+static void purge_worker_callback(void*);
+static void purge_coordinator_callback(void*);
+static void purge_coordinator_timer_callback(void*);
+
+static tpool::task_group purge_task_group;
+tpool::waitable_task purge_worker_task(purge_worker_callback, nullptr,
+ &purge_task_group);
+static tpool::task_group purge_coordinator_task_group(1);
+static tpool::waitable_task purge_coordinator_task
+ (purge_coordinator_callback, nullptr, &purge_coordinator_task_group);
+
+static tpool::timer *purge_coordinator_timer;
+
+/** Wake up the purge threads if there is work to do. */
+void
+srv_wake_purge_thread_if_not_active()
+{
+ ut_ad(!srv_read_only_mode);
+
+ if (purge_sys.enabled() && !purge_sys.paused()
+ && trx_sys.rseg_history_len) {
+ if(++purge_state.m_running == 1) {
+ srv_thread_pool->submit_task(&purge_coordinator_task);
+ }
+ }
+}
+
+/** @return whether the purge tasks are active */
+bool purge_sys_t::running() const
+{
+ return purge_coordinator_task.is_running();
+}
+
+/** Stop purge during FLUSH TABLES FOR EXPORT */
+void purge_sys_t::stop()
+{
+ rw_lock_x_lock(&latch);
+
+ if (!enabled())
+ {
+ /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ rw_lock_x_unlock(&latch);
+ return;
+ }
+
+ ut_ad(srv_n_purge_threads > 0);
+
+ const auto paused= m_paused++;
+
+ rw_lock_x_unlock(&latch);
+
+ if (!paused)
+ {
+ ib::info() << "Stopping purge";
+ MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT);
+ purge_coordinator_task.disable();
+ }
+}
+
+/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
+void purge_sys_t::resume()
+{
+ if (!enabled())
+ {
+ /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ return;
+ }
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+ ut_ad(!sync_check_iterate(sync_check()));
+ purge_coordinator_task.enable();
+ rw_lock_x_lock(&latch);
+ int32_t paused= m_paused--;
+ ut_a(paused);
+
+ if (paused == 1)
+ {
+ ib::info() << "Resuming purge";
+ purge_state.m_running = 0;
+ srv_wake_purge_thread_if_not_active();
+ MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT);
+ }
+ rw_lock_x_unlock(&latch);
+}
+
+/*******************************************************************//**
+Get current server activity count.
+@return activity count. */
+ulint
+srv_get_activity_count(void)
+/*========================*/
+{
+ return(srv_sys.activity_count);
+}
+
+/** Check if srv_inc_activity_count() has been called.
+@param activity_count copy of srv_sys.activity_count
+@return whether the activity_count had changed */
+static bool srv_check_activity(ulint *activity_count)
+{
+ ulint new_activity_count= srv_sys.activity_count;
+ if (new_activity_count != *activity_count)
+ {
+ *activity_count= new_activity_count;
+ return true;
+ }
+
+ return false;
+}
+
+/********************************************************************//**
+The master thread is tasked to ensure that flush of log file happens
+once every second in the background. This is to ensure that not more
+than one second of trxs are lost in case of crash when
+innodb_flush_logs_at_trx_commit != 1 */
+static
+void
+srv_sync_log_buffer_in_background(void)
+/*===================================*/
+{
+ time_t current_time = time(NULL);
+
+ srv_main_thread_op_info = "flushing log";
+ if (difftime(current_time, srv_last_log_flush_time)
+ >= srv_flush_log_at_timeout) {
+ log_buffer_flush_to_disk();
+ srv_last_log_flush_time = current_time;
+ srv_log_writes_and_flush++;
+ }
+}
+
+/********************************************************************//**
+Make room in the table cache by evicting an unused table.
+@return number of tables evicted. */
+static
+ulint
+srv_master_evict_from_table_cache(
+/*==============================*/
+ ulint pct_check) /*!< in: max percent to check */
+{
+ ulint n_tables_evicted = 0;
+
+ dict_sys_lock();
+
+ n_tables_evicted = dict_make_room_in_cache(
+ innobase_get_table_cache_size(), pct_check);
+
+ dict_sys_unlock();
+
+ return(n_tables_evicted);
+}
+
+/*********************************************************************//**
+This function prints progress message every 60 seconds during server
+shutdown, for any activities that master thread is pending on. */
+static
+void
+srv_shutdown_print_master_pending(
+/*==============================*/
+ time_t* last_print_time, /*!< last time the function
+ print the message */
+ ulint n_tables_to_drop, /*!< number of tables to
+ be dropped */
+ ulint n_bytes_merged) /*!< number of change buffer
+ just merged */
+{
+ time_t current_time = time(NULL);
+
+ if (difftime(current_time, *last_print_time) > 60) {
+ *last_print_time = current_time;
+
+ if (n_tables_to_drop) {
+ ib::info() << "Waiting for " << n_tables_to_drop
+ << " table(s) to be dropped";
+ }
+
+ /* Check change buffer merge, we only wait for change buffer
+ merge if it is a slow shutdown */
+ if (!srv_fast_shutdown && n_bytes_merged) {
+ ib::info() << "Waiting for change buffer merge to"
+ " complete number of bytes of change buffer"
+ " just merged: " << n_bytes_merged;
+ }
+ }
+}
+
+#ifdef UNIV_DEBUG
+/** Waits in loop as long as master thread is disabled (debug) */
+static
+void
+srv_master_do_disabled_loop(void)
+{
+ if (!srv_master_thread_disabled_debug) {
+ /* We return here to avoid changing op_info. */
+ return;
+ }
+
+ srv_main_thread_op_info = "disabled";
+
+ while (srv_master_thread_disabled_debug) {
+ os_event_set(srv_master_thread_disabled_event);
+ if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ break;
+ }
+ os_thread_sleep(100000);
+ }
+
+ srv_main_thread_op_info = "";
+}
+
+/** Disables master thread. It's used by:
+ SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
+@param[in] save immediate result from check function */
+void
+srv_master_thread_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+ const void* save)
+{
+ /* This method is protected by mutex, as every SET GLOBAL .. */
+ ut_ad(srv_master_thread_disabled_event != NULL);
+
+ const bool disable = *static_cast<const my_bool*>(save);
+
+ const int64_t sig_count = os_event_reset(
+ srv_master_thread_disabled_event);
+
+ srv_master_thread_disabled_debug = disable;
+
+ if (disable) {
+ os_event_wait_low(
+ srv_master_thread_disabled_event, sig_count);
+ }
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Perform the tasks that the master thread is supposed to do when the
+server is active. There are two types of tasks. The first category is
+of such tasks which are performed at each inovcation of this function.
+We assume that this function is called roughly every second when the
+server is active. The second category is of such tasks which are
+performed at some interval e.g.: purge, dict_LRU cleanup etc. */
+static
+void
+srv_master_do_active_tasks(void)
+/*============================*/
+{
+ time_t cur_time = time(NULL);
+ ulonglong counter_time = microsecond_interval_timer();
+
+ /* First do the tasks that we are suppose to do at each
+ invocation of this function. */
+
+ ++srv_main_active_loops;
+
+ MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
+
+ /* ALTER TABLE in MySQL requires on Unix that the table handler
+ can drop tables lazily after there no longer are SELECT
+ queries to them. */
+ srv_main_thread_op_info = "doing background drop tables";
+ row_drop_tables_for_mysql_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
+
+ ut_d(srv_master_do_disabled_loop());
+
+ if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
+ return;
+ }
+
+ /* make sure that there is enough reusable space in the redo
+ log files */
+ srv_main_thread_op_info = "checking free log space";
+ log_free_check();
+
+ /* Flush logs if needed */
+ srv_main_thread_op_info = "flushing log";
+ srv_sync_log_buffer_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
+
+ /* Now see if various tasks that are performed at defined
+ intervals need to be performed. */
+
+ if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
+ return;
+ }
+
+ if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
+ srv_main_thread_op_info = "enforcing dict cache limit";
+ ulint n_evicted = srv_master_evict_from_table_cache(50);
+ if (n_evicted != 0) {
+ MONITOR_INC_VALUE(
+ MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted);
+ }
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
+ }
+}
+
+/*********************************************************************//**
+Perform the tasks that the master thread is supposed to do whenever the
+server is idle. We do check for the server state during this function
+and if the server has entered the shutdown phase we may return from
+the function without completing the required tasks.
+Note that the server can move to active state when we are executing this
+function but we don't check for that as we are suppose to perform more
+or less same tasks when server is active. */
+static
+void
+srv_master_do_idle_tasks(void)
+/*==========================*/
+{
+ ++srv_main_idle_loops;
+
+ MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
+
+
+ /* ALTER TABLE in MySQL requires on Unix that the table handler
+ can drop tables lazily after there no longer are SELECT
+ queries to them. */
+ ulonglong counter_time = microsecond_interval_timer();
+ srv_main_thread_op_info = "doing background drop tables";
+ row_drop_tables_for_mysql_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
+ counter_time);
+
+ ut_d(srv_master_do_disabled_loop());
+
+ if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
+ return;
+ }
+
+ /* make sure that there is enough reusable space in the redo
+ log files */
+ srv_main_thread_op_info = "checking free log space";
+ log_free_check();
+
+ if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
+ return;
+ }
+
+ srv_main_thread_op_info = "enforcing dict cache limit";
+ ulint n_evicted = srv_master_evict_from_table_cache(100);
+ if (n_evicted != 0) {
+ MONITOR_INC_VALUE(
+ MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted);
+ }
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
+
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
+}
+
+/**
+Complete the shutdown tasks such as background DROP TABLE,
+and optionally change buffer merge (on innodb_fast_shutdown=0). */
+void srv_shutdown(bool ibuf_merge)
+{
+ ulint n_bytes_merged = 0;
+ ulint n_tables_to_drop;
+ time_t now = time(NULL);
+
+ do {
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
+ ++srv_main_shutdown_loops;
+
+ /* FIXME: Remove the background DROP TABLE queue; it is not
+ crash-safe and breaks ACID. */
+ srv_main_thread_op_info = "doing background drop tables";
+ n_tables_to_drop = row_drop_tables_for_mysql_in_background();
+
+ if (ibuf_merge) {
+ srv_main_thread_op_info = "checking free log space";
+ log_free_check();
+ srv_main_thread_op_info = "doing insert buffer merge";
+ n_bytes_merged = ibuf_merge_all();
+
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
+ }
+
+ /* Print progress message every 60 seconds during shutdown */
+ if (srv_print_verbose_log) {
+ srv_shutdown_print_master_pending(
+ &now, n_tables_to_drop, n_bytes_merged);
+ }
+ } while (n_bytes_merged || n_tables_to_drop);
+}
+
+/** The periodic master task controlling the server. */
+void srv_master_callback(void*)
+{
+ static ulint old_activity_count;
+
+ ut_a(srv_shutdown_state <= SRV_SHUTDOWN_INITIATED);
+
+ srv_main_thread_op_info = "";
+ MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
+ if (srv_check_activity(&old_activity_count)) {
+ srv_master_do_active_tasks();
+ } else {
+ srv_master_do_idle_tasks();
+ }
+ srv_main_thread_op_info = "sleeping";
+}
+
+/** @return whether purge should exit due to shutdown */
+static bool srv_purge_should_exit()
+{
+ ut_ad(srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP);
+
+ if (srv_undo_sources)
+ return false;
+
+ if (srv_fast_shutdown)
+ return true;
+
+ /* Slow shutdown was requested. */
+ if (const uint32_t history_size= trx_sys.rseg_history_len)
+ {
+ static time_t progress_time;
+ time_t now= time(NULL);
+ if (now - progress_time >= 15)
+ {
+ progress_time= now;
+#if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY
+ service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "InnoDB: to purge %u transactions",
+ history_size);
+ ib::info() << "to purge " << history_size << " transactions";
+#endif
+ }
+ return false;
+ }
+
+ return !trx_sys.any_active_transactions();
+}
+
+/*********************************************************************//**
+Fetch and execute a task from the work queue.
+@param [in,out] slot purge worker thread slot
+@return true if a task was executed */
+static bool srv_task_execute()
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+
+ mutex_enter(&srv_sys.tasks_mutex);
+
+ if (que_thr_t* thr = UT_LIST_GET_FIRST(srv_sys.tasks)) {
+ ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
+ UT_LIST_REMOVE(srv_sys.tasks, thr);
+ mutex_exit(&srv_sys.tasks_mutex);
+ que_run_threads(thr);
+ return true;
+ }
+
+ ut_ad(UT_LIST_GET_LEN(srv_sys.tasks) == 0);
+ mutex_exit(&srv_sys.tasks_mutex);
+ return false;
+}
+
+std::mutex purge_thread_count_mtx;
+void srv_update_purge_thread_count(uint n)
+{
+ std::lock_guard<std::mutex> lk(purge_thread_count_mtx);
+ srv_n_purge_threads = n;
+ srv_purge_thread_count_changed = 1;
+}
+
+Atomic_counter<int> srv_purge_thread_count_changed;
+
+/** Do the actual purge operation.
+@param[in,out] n_total_purged total number of purged pages
+@return length of history list before the last purge batch. */
+static uint32_t srv_do_purge(ulint* n_total_purged)
+{
+ ulint n_pages_purged;
+
+ static ulint count = 0;
+ static ulint n_use_threads = 0;
+ static uint32_t rseg_history_len = 0;
+ ulint old_activity_count = srv_get_activity_count();
+ static ulint n_threads = srv_n_purge_threads;
+
+ ut_a(n_threads > 0);
+ ut_ad(!srv_read_only_mode);
+
+ /* Purge until there are no more records to purge and there is
+ no change in configuration or server state. If the user has
+ configured more than one purge thread then we treat that as a
+ pool of threads and only use the extra threads if purge can't
+ keep up with updates. */
+
+ if (n_use_threads == 0) {
+ n_use_threads = n_threads;
+ }
+
+ do {
+ if (UNIV_UNLIKELY(srv_purge_thread_count_changed)) {
+ /* Read the fresh value of srv_n_purge_threads, reset
+ the changed flag. Both variables are protected by
+ purge_thread_count_mtx.
+
+ This code does not run concurrently, it is executed
+ by a single purge_coordinator thread, and no races
+ involving srv_purge_thread_count_changed are possible.
+ */
+
+ std::lock_guard<std::mutex> lk(purge_thread_count_mtx);
+ n_threads = n_use_threads = srv_n_purge_threads;
+ srv_purge_thread_count_changed = 0;
+ } else if (trx_sys.rseg_history_len > rseg_history_len
+ || (srv_max_purge_lag > 0
+ && rseg_history_len > srv_max_purge_lag)) {
+
+ /* History length is now longer than what it was
+ when we took the last snapshot. Use more threads. */
+
+ if (n_use_threads < n_threads) {
+ ++n_use_threads;
+ }
+
+ } else if (srv_check_activity(&old_activity_count)
+ && n_use_threads > 1) {
+
+ /* History length same or smaller since last snapshot,
+ use fewer threads. */
+
+ --n_use_threads;
+ }
+
+ /* Ensure that the purge threads are less than what
+ was configured. */
+
+ ut_a(n_use_threads > 0);
+ ut_a(n_use_threads <= n_threads);
+
+ /* Take a snapshot of the history list before purge. */
+ if (!(rseg_history_len = trx_sys.rseg_history_len)) {
+ break;
+ }
+
+ n_pages_purged = trx_purge(
+ n_use_threads,
+ !(++count % srv_purge_rseg_truncate_frequency)
+ || purge_sys.truncate.current);
+
+ *n_total_purged += n_pages_purged;
+ } while (n_pages_purged > 0 && !purge_sys.paused()
+ && !srv_purge_should_exit());
+
+ return(rseg_history_len);
+}
+
+
+static std::list<THD*> purge_thds;
+static std::mutex purge_thd_mutex;
+extern void* thd_attach_thd(THD*);
+extern void thd_detach_thd(void *);
+
+THD* acquire_thd(void **ctx)
+{
+ std::unique_lock<std::mutex> lk(purge_thd_mutex);
+ if (purge_thds.empty()) {
+ THD* thd = current_thd;
+ purge_thds.push_back(innobase_create_background_thd("InnoDB purge worker"));
+ set_current_thd(thd);
+ }
+ THD* thd = purge_thds.front();
+ purge_thds.pop_front();
+ lk.unlock();
+
+ /* Set current thd, and thd->mysys_var as well,
+ it might be used by something in the server.*/
+ *ctx = thd_attach_thd(thd);
+ return thd;
+}
+
+void release_thd(THD *thd, void *ctx)
+{
+ thd_detach_thd(ctx);
+ std::unique_lock<std::mutex> lk(purge_thd_mutex);
+ purge_thds.push_back(thd);
+ lk.unlock();
+ set_current_thd(0);
+}
+
+
+/*
+ Called by timer when purge coordinator decides
+ to delay processing of purge records.
+*/
+static void purge_coordinator_timer_callback(void *)
+{
+ if (!purge_sys.enabled() || purge_sys.paused() ||
+ purge_state.m_running || !trx_sys.rseg_history_len)
+ return;
+
+ if (purge_state.m_history_length < 5000 &&
+ purge_state.m_history_length == trx_sys.rseg_history_len)
+ /* No new records were added since wait started.
+ Simply wait for new records. The magic number 5000 is an
+ approximation for the case where we have cached UNDO
+ log records which prevent truncate of the UNDO segments.*/
+ return;
+ srv_wake_purge_thread_if_not_active();
+}
+
+static void purge_worker_callback(void*)
+{
+ ut_ad(!current_thd);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+ void *ctx;
+ THD *thd= acquire_thd(&ctx);
+ while (srv_task_execute())
+ ut_ad(purge_sys.running());
+ release_thd(thd,ctx);
+}
+
+static void purge_coordinator_callback_low()
+{
+ ulint n_total_purged= ULINT_UNDEFINED;
+ purge_state.m_history_length= 0;
+
+ if (!purge_sys.enabled() || purge_sys.paused())
+ return;
+ do
+ {
+ n_total_purged = 0;
+ int sigcount= purge_state.m_running;
+
+ purge_state.m_history_length= srv_do_purge(&n_total_purged);
+
+ /* Check if purge was woken by srv_wake_purge_thread_if_not_active() */
+
+ bool woken_during_purge= purge_state.m_running > sigcount;
+
+ /* If last purge batch processed less than 1 page and there is
+ still work to do, delay the next batch by 10ms. Unless
+ someone added work and woke us up. */
+ if (n_total_purged == 0)
+ {
+ if (trx_sys.rseg_history_len == 0)
+ return;
+ if (!woken_during_purge)
+ {
+ /* Delay next purge round*/
+ purge_coordinator_timer->set_time(10, 0);
+ return;
+ }
+ }
+ }
+ while ((purge_sys.enabled() && !purge_sys.paused()) ||
+ !srv_purge_should_exit());
+}
+
+static void purge_coordinator_callback(void*)
+{
+ void *ctx;
+ THD *thd= acquire_thd(&ctx);
+ purge_coordinator_callback_low();
+ release_thd(thd,ctx);
+ purge_state.m_running= 0;
+}
+
+void srv_init_purge_tasks()
+{
+ purge_coordinator_timer= srv_thread_pool->create_timer
+ (purge_coordinator_timer_callback, nullptr);
+}
+
+static void srv_shutdown_purge_tasks()
+{
+ purge_coordinator_task.wait();
+ delete purge_coordinator_timer;
+ purge_coordinator_timer= nullptr;
+ purge_worker_task.wait();
+ while (!purge_thds.empty())
+ {
+ innobase_destroy_background_thd(purge_thds.front());
+ purge_thds.pop_front();
+ }
+}
+
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ut_ad(!srv_read_only_mode);
+ mutex_enter(&srv_sys.tasks_mutex);
+
+ UT_LIST_ADD_LAST(srv_sys.tasks, thr);
+
+ mutex_exit(&srv_sys.tasks_mutex);
+}
+
+#ifdef UNIV_DEBUG
+/** @return number of tasks in queue */
+ulint srv_get_task_queue_length()
+{
+ ulint n_tasks;
+
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&srv_sys.tasks_mutex);
+
+ n_tasks = UT_LIST_GET_LEN(srv_sys.tasks);
+
+ mutex_exit(&srv_sys.tasks_mutex);
+
+ return(n_tasks);
+}
+#endif
+
+/** Shut down the purge threads. */
+void srv_purge_shutdown()
+{
+ if (purge_sys.enabled()) {
+ srv_update_purge_thread_count(innodb_purge_threads_MAX);
+ while(!srv_purge_should_exit()) {
+ ut_a(!purge_sys.paused());
+ srv_wake_purge_thread_if_not_active();
+ os_thread_sleep(1000);
+ }
+ purge_sys.coordinator_shutdown();
+ srv_shutdown_purge_tasks();
+ }
+}