summaryrefslogtreecommitdiffstats
path: root/storage/innobase/srv
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
commit06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/srv
parentInitial commit. (diff)
downloadmariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz
mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/srv')
-rw-r--r--storage/innobase/srv/srv0mon.cc1799
-rw-r--r--storage/innobase/srv/srv0srv.cc1659
-rw-r--r--storage/innobase/srv/srv0start.cc2101
3 files changed, 5559 insertions, 0 deletions
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
new file mode 100644
index 00000000..75798241
--- /dev/null
+++ b/storage/innobase/srv/srv0mon.cc
@@ -0,0 +1,1799 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, 2022, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0mon.cc
+Database monitor counter interfaces
+
+Created 12/9/2009 Jimmy Yang
+*******************************************************/
+
+#include "buf0flu.h"
+#include "dict0mem.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "mach0data.h"
+#include "os0file.h"
+#include "srv0mon.h"
+#include "srv0srv.h"
+#include "trx0rseg.h"
+#include "trx0sys.h"
+
+/* Macro to standardize the counter names for counters in the
+"monitor_buf_page" module as they have very structured defines */
+#define MONITOR_BUF_PAGE(name, description, code, op, op_code) \
+ {"buffer_page_" op "_" name, "buffer_page_io", \
+ "Number of " description " Pages " op, \
+ MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \
+ MONITOR_##code##_##op_code}
+
+#define MONITOR_BUF_PAGE_READ(name, description, code) \
+ MONITOR_BUF_PAGE(name, description, code, "read", PAGE_READ)
+
+#define MONITOR_BUF_PAGE_WRITTEN(name, description, code) \
+ MONITOR_BUF_PAGE(name, description, code, "written", PAGE_WRITTEN)
+
+/** This array defines basic static information of monitor counters,
+including each monitor's name, module it belongs to, a short
+description and its property/type and corresponding monitor_id.
+Please note: If you add a monitor here, please add its corresponding
+monitor_id to "enum monitor_id_value" structure in srv0mon.h file. */
+
+static monitor_info_t innodb_counter_info[] =
+{
+ /* A dummy item to mark the module start, this is
+ to accomodate the default value (0) set for the
+ global variables with the control system. */
+ {"module_start", "module_start", "module_start",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_DEFAULT_START},
+
+ /* ========== Counters for Server Metadata ========== */
+ {"module_metadata", "metadata", "Server Metadata",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_METADATA},
+
+ {"metadata_table_handles_opened", "metadata",
+ "Number of table handles opened",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLE_OPEN},
+
+ /* ========== Counters for Lock Module ========== */
+ {"module_lock", "lock", "Lock Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_LOCK},
+
+ {"lock_deadlocks", "lock", "Number of deadlocks",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_DEADLOCK},
+
+ {"lock_timeouts", "lock", "Number of lock timeouts",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_TIMEOUT},
+
+ {"lock_rec_lock_waits", "lock",
+ "Number of times enqueued into record lock wait queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LOCKREC_WAIT},
+
+ {"lock_table_lock_waits", "lock",
+ "Number of times enqueued into table lock wait queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLELOCK_WAIT},
+
+ {"lock_rec_lock_requests", "lock",
+ "Number of record locks requested",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK_REQ},
+
+ {"lock_rec_lock_created", "lock", "Number of record locks created",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_RECLOCK_CREATED},
+
+ {"lock_rec_lock_removed", "lock",
+ "Number of record locks removed from the lock queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_RECLOCK_REMOVED},
+
+ {"lock_rec_locks", "lock",
+ "Current number of record locks on tables",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK},
+
+ {"lock_table_lock_created", "lock", "Number of table locks created",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLELOCK_CREATED},
+
+ {"lock_table_lock_removed", "lock",
+ "Number of table locks removed from the lock queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLELOCK_REMOVED},
+
+ {"lock_table_locks", "lock",
+ "Current number of table locks on tables",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_TABLELOCK},
+
+ {"lock_row_lock_current_waits", "lock",
+ "Number of row locks currently being waited for"
+ " (innodb_row_lock_current_waits)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT},
+
+ {"lock_row_lock_time", "lock",
+ "Time spent in acquiring row locks, in milliseconds"
+ " (innodb_row_lock_time)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_WAIT_TIME},
+
+ {"lock_row_lock_time_max", "lock",
+ "The maximum time to acquire a row lock, in milliseconds"
+ " (innodb_row_lock_time_max)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_MAX_WAIT_TIME},
+
+ {"lock_row_lock_waits", "lock",
+ "Number of times a row lock had to be waited for"
+ " (innodb_row_lock_waits)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_WAIT},
+
+ {"lock_row_lock_time_avg", "lock",
+ "The average time to acquire a row lock, in milliseconds"
+ " (innodb_row_lock_time_avg)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_AVG_WAIT_TIME},
+
+ /* ========== Counters for Buffer Manager and I/O ========== */
+ {"module_buffer", "buffer", "Buffer Manager Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_BUFFER},
+
+ {"buffer_pool_size", "server",
+ "Server buffer pool size (all buffer pools) in bytes",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUFFER_POOL_SIZE},
+
+ {"buffer_pool_reads", "buffer",
+ "Number of reads directly from disk (innodb_buffer_pool_reads)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READS},
+
+ {"buffer_pool_read_requests", "buffer",
+ "Number of logical read requests (innodb_buffer_pool_read_requests)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_REQUESTS},
+
+ {"buffer_pool_write_requests", "buffer",
+ "Number of write requests (innodb_buffer_pool_write_requests)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST},
+
+ {"buffer_pool_wait_free", "buffer",
+ "Number of times waited for free buffer"
+ " (innodb_buffer_pool_wait_free)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WAIT_FREE},
+
+ {"buffer_pool_read_ahead", "buffer",
+ "Number of pages read as read ahead (innodb_buffer_pool_read_ahead)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD},
+
+ {"buffer_pool_read_ahead_evicted", "buffer",
+ "Read-ahead pages evicted without being accessed"
+ " (innodb_buffer_pool_read_ahead_evicted)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED},
+
+ {"buffer_pool_pages_total", "buffer",
+ "Total buffer pool size in pages (innodb_buffer_pool_pages_total)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_TOTAL},
+
+ {"buffer_pool_pages_misc", "buffer",
+ "Buffer pages for misc use such as row locks or the adaptive"
+ " hash index (innodb_buffer_pool_pages_misc)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_MISC},
+
+ {"buffer_pool_pages_data", "buffer",
+ "Buffer pages containing data (innodb_buffer_pool_pages_data)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA},
+
+ {"buffer_pool_bytes_data", "buffer",
+ "Buffer bytes containing data (innodb_buffer_pool_bytes_data)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA},
+
+ {"buffer_pool_pages_dirty", "buffer",
+ "Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY},
+
+ {"buffer_pool_bytes_dirty", "buffer",
+ "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
+
+ {"buffer_pool_pages_free", "buffer",
+ "Buffer pages currently free (innodb_buffer_pool_pages_free)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_FREE},
+
+ {"buffer_pages_created", "buffer",
+ "Number of pages created (innodb_pages_created)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_CREATED},
+
+ {"buffer_pages_written", "buffer",
+ "Number of pages written (innodb_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN},
+
+ {"buffer_pages_read", "buffer",
+ "Number of pages read (innodb_pages_read)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_READ},
+
+ {"buffer_data_reads", "buffer",
+ "Amount of data read in bytes (innodb_data_reads)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_READ},
+
+ {"buffer_data_written", "buffer",
+ "Amount of data written in bytes (innodb_data_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_WRITTEN},
+
+ /* Cumulative counter for scanning in flush batches */
+ {"buffer_flush_batch_scanned", "buffer",
+ "Total pages scanned as part of flush batch",
+ MONITOR_SET_OWNER,
+ MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+ MONITOR_FLUSH_BATCH_SCANNED},
+
+ {"buffer_flush_batch_num_scan", "buffer",
+ "Number of times buffer flush list flush is called",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
+ MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL},
+
+ {"buffer_flush_batch_scanned_per_call", "buffer",
+ "Pages scanned per flush batch scan",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
+ MONITOR_FLUSH_BATCH_SCANNED_PER_CALL},
+
+ /* Cumulative counter for pages flushed in flush batches */
+ {"buffer_flush_batch_total_pages", "buffer",
+ "Total pages flushed as part of flush batch",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_BATCH_COUNT,
+ MONITOR_FLUSH_BATCH_TOTAL_PAGE},
+
+ {"buffer_flush_batches", "buffer",
+ "Number of flush batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_COUNT},
+
+ {"buffer_flush_batch_pages", "buffer",
+ "Pages queued as a flush batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_PAGES},
+
+ /* Cumulative counter for flush batches because of neighbor */
+ {"buffer_flush_neighbor_total_pages", "buffer",
+ "Total neighbors flushed as part of neighbor flush",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_NEIGHBOR_COUNT,
+ MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE},
+
+ {"buffer_flush_neighbor", "buffer",
+ "Number of times neighbors flushing is invoked",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+ MONITOR_FLUSH_NEIGHBOR_COUNT},
+
+ {"buffer_flush_neighbor_pages", "buffer",
+ "Pages queued as a neighbor batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+ MONITOR_FLUSH_NEIGHBOR_PAGES},
+
+ {"buffer_flush_n_to_flush_requested", "buffer",
+ "Number of pages requested for flushing.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED},
+
+ {"buffer_flush_n_to_flush_by_age", "buffer",
+ "Number of pages target by LSN Age for flushing.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_BY_AGE},
+
+ {"buffer_flush_adaptive_avg_time", "buffer",
+ "Avg time (ms) spent for adaptive flushing recently.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME},
+
+ {"buffer_flush_adaptive_avg_pass", "buffer",
+ "Number of adaptive flushes passed during the recent Avg period.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_PASS},
+
+ {"buffer_LRU_get_free_loops", "buffer",
+ "Total loops in LRU get free.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_LOOPS},
+
+ {"buffer_LRU_get_free_waits", "buffer",
+ "Total sleep waits in LRU get free.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_WAITS},
+
+ {"buffer_flush_avg_page_rate", "buffer",
+ "Average number of pages at which flushing is happening",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE},
+
+ {"buffer_flush_lsn_avg_rate", "buffer",
+ "Average redo generation rate",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE},
+
+ {"buffer_flush_pct_for_dirty", "buffer",
+ "Percent of IO capacity used to avoid max dirty page limit",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY},
+
+ {"buffer_flush_pct_for_lsn", "buffer",
+ "Percent of IO capacity used to avoid reusable redo space limit",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN},
+
+ {"buffer_flush_sync_waits", "buffer",
+ "Number of times a wait happens due to sync flushing",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS},
+
+ /* Cumulative counter for flush batches for adaptive flushing */
+ {"buffer_flush_adaptive_total_pages", "buffer",
+ "Total pages flushed as part of adaptive flushing",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT,
+ MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE},
+
+ {"buffer_flush_adaptive", "buffer",
+ "Number of adaptive batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_COUNT},
+
+ {"buffer_flush_adaptive_pages", "buffer",
+ "Pages queued as an adaptive batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_PAGES},
+
+ /* Cumulative counter for flush batches because of sync */
+ {"buffer_flush_sync_total_pages", "buffer",
+ "Total pages flushed as part of sync batches",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_TOTAL_PAGE},
+
+ {"buffer_flush_sync", "buffer",
+ "Number of sync batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT},
+
+ {"buffer_flush_sync_pages", "buffer",
+ "Pages queued as a sync batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_PAGES},
+
+ /* Cumulative counter for flush batches because of background */
+ {"buffer_flush_background_total_pages", "buffer",
+ "Total pages flushed as part of background batches",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_BACKGROUND_COUNT,
+ MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE},
+
+ {"buffer_flush_background", "buffer",
+ "Number of background batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+ MONITOR_FLUSH_BACKGROUND_COUNT},
+
+ {"buffer_flush_background_pages", "buffer",
+ "Pages queued as a background batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+ MONITOR_FLUSH_BACKGROUND_PAGES},
+
+ /* Cumulative counter for LRU batch scan */
+ {"buffer_LRU_batch_scanned", "buffer",
+ "Total pages scanned as part of LRU batch",
+ MONITOR_SET_OWNER, MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_BATCH_SCANNED},
+
+ {"buffer_LRU_batch_num_scan", "buffer",
+ "Number of times LRU batch is called",
+ MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED,
+ MONITOR_LRU_BATCH_SCANNED_NUM_CALL},
+
+ {"buffer_LRU_batch_scanned_per_call", "buffer",
+ "Pages scanned per LRU batch call",
+ MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED,
+ MONITOR_LRU_BATCH_SCANNED_PER_CALL},
+
+ /* Cumulative counter for LRU batch pages flushed */
+ {"buffer_LRU_batch_flush_total_pages", "buffer",
+ "Total pages flushed as part of LRU batches",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE},
+
+ /* Cumulative counter for LRU batch pages flushed */
+ {"buffer_LRU_batch_evict_total_pages", "buffer",
+ "Total pages evicted as part of LRU batches",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE},
+
+ {"buffer_LRU_single_flush_failure_count", "Buffer",
+ "Number of times attempt to flush a single page from LRU failed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT},
+
+ {"buffer_LRU_get_free_search", "Buffer",
+ "Number of searches performed for a clean page",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_SEARCH},
+
+ /* Cumulative counter for LRU search scans */
+ {"buffer_LRU_search_scanned", "buffer",
+ "Total pages scanned as part of LRU search",
+ MONITOR_SET_OWNER,
+ MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_SEARCH_SCANNED},
+
+ {"buffer_LRU_search_num_scan", "buffer",
+ "Number of times LRU search is performed",
+ MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED,
+ MONITOR_LRU_SEARCH_SCANNED_NUM_CALL},
+
+ {"buffer_LRU_search_scanned_per_call", "buffer",
+ "Page scanned per single LRU search",
+ MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED,
+ MONITOR_LRU_SEARCH_SCANNED_PER_CALL},
+
+ /* Cumulative counter for LRU unzip search scans */
+ {"buffer_LRU_unzip_search_scanned", "buffer",
+ "Total pages scanned as part of LRU unzip search",
+ MONITOR_SET_OWNER,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED},
+
+ {"buffer_LRU_unzip_search_num_scan", "buffer",
+ "Number of times LRU unzip search is performed",
+ MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL},
+
+ {"buffer_LRU_unzip_search_scanned_per_call", "buffer",
+ "Page scanned per single LRU unzip search",
+ MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL},
+
+ /* ========== Counters for Buffer Page I/O ========== */
+ {"module_buffer_page", "buffer_page_io", "Buffer Page I/O Module",
+ static_cast<monitor_type_t>(
+ MONITOR_MODULE | MONITOR_GROUP_MODULE),
+ MONITOR_DEFAULT_START, MONITOR_MODULE_BUF_PAGE},
+
+ MONITOR_BUF_PAGE_READ("index_leaf","Index Leaf", INDEX_LEAF),
+
+ MONITOR_BUF_PAGE_READ("index_non_leaf","Index Non-leaf",
+ INDEX_NON_LEAF),
+
+ MONITOR_BUF_PAGE_READ("index_ibuf_leaf", "Insert Buffer Index Leaf",
+ INDEX_IBUF_LEAF),
+
+ MONITOR_BUF_PAGE_READ("index_ibuf_non_leaf",
+ "Insert Buffer Index Non-Leaf",
+ INDEX_IBUF_NON_LEAF),
+
+ MONITOR_BUF_PAGE_READ("undo_log", "Undo Log", UNDO_LOG),
+
+ MONITOR_BUF_PAGE_READ("index_inode", "Index Inode", INODE),
+
+ MONITOR_BUF_PAGE_READ("ibuf_free_list", "Insert Buffer Free List",
+ IBUF_FREELIST),
+
+ MONITOR_BUF_PAGE_READ("ibuf_bitmap", "Insert Buffer Bitmap",
+ IBUF_BITMAP),
+
+ MONITOR_BUF_PAGE_READ("system_page", "System", SYSTEM),
+
+ MONITOR_BUF_PAGE_READ("trx_system", "Transaction System", TRX_SYSTEM),
+
+ MONITOR_BUF_PAGE_READ("fsp_hdr", "File Space Header", FSP_HDR),
+
+ MONITOR_BUF_PAGE_READ("xdes", "Extent Descriptor", XDES),
+
+ MONITOR_BUF_PAGE_READ("blob", "Uncompressed BLOB", BLOB),
+
+ MONITOR_BUF_PAGE_READ("zblob", "First Compressed BLOB", ZBLOB),
+
+ MONITOR_BUF_PAGE_READ("zblob2", "Subsequent Compressed BLOB", ZBLOB2),
+
+ MONITOR_BUF_PAGE_READ("other", "other/unknown (old version of InnoDB)",
+ OTHER),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_leaf","Index Leaf", INDEX_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_non_leaf","Index Non-leaf",
+ INDEX_NON_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_ibuf_leaf", "Insert Buffer Index Leaf",
+ INDEX_IBUF_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_ibuf_non_leaf",
+ "Insert Buffer Index Non-Leaf",
+ INDEX_IBUF_NON_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("undo_log", "Undo Log", UNDO_LOG),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_inode", "Index Inode", INODE),
+
+ MONITOR_BUF_PAGE_WRITTEN("ibuf_free_list", "Insert Buffer Free List",
+ IBUF_FREELIST),
+
+ MONITOR_BUF_PAGE_WRITTEN("ibuf_bitmap", "Insert Buffer Bitmap",
+ IBUF_BITMAP),
+
+ MONITOR_BUF_PAGE_WRITTEN("system_page", "System", SYSTEM),
+
+ MONITOR_BUF_PAGE_WRITTEN("trx_system", "Transaction System",
+ TRX_SYSTEM),
+
+ MONITOR_BUF_PAGE_WRITTEN("fsp_hdr", "File Space Header", FSP_HDR),
+
+ MONITOR_BUF_PAGE_WRITTEN("xdes", "Extent Descriptor", XDES),
+
+ MONITOR_BUF_PAGE_WRITTEN("blob", "Uncompressed BLOB", BLOB),
+
+ MONITOR_BUF_PAGE_WRITTEN("zblob", "First Compressed BLOB", ZBLOB),
+
+ MONITOR_BUF_PAGE_WRITTEN("zblob2", "Subsequent Compressed BLOB",
+ ZBLOB2),
+
+ MONITOR_BUF_PAGE_WRITTEN("other", "other/unknown (old version InnoDB)",
+ OTHER),
+
+ /* ========== Counters for OS level operations ========== */
+ {"module_os", "os", "OS Level Operation",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_OS},
+
+ {"os_data_reads", "os",
+ "Number of reads initiated (innodb_data_reads)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_READ},
+
+ {"os_data_writes", "os",
+ "Number of writes initiated (innodb_data_writes)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_WRITE},
+
+ {"os_data_fsyncs", "os",
+ "Number of fsync() calls (innodb_data_fsyncs)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FSYNC},
+
+ {"os_pending_reads", "os", "Number of reads pending",
+ MONITOR_DEFAULT_ON,
+ MONITOR_DEFAULT_START, MONITOR_OS_PENDING_READS},
+
+ {"os_pending_writes", "os", "Number of writes pending",
+ MONITOR_DEFAULT_ON,
+ MONITOR_DEFAULT_START, MONITOR_OS_PENDING_WRITES},
+
+ {"os_log_bytes_written", "os",
+ "Bytes of log written (innodb_os_log_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_WRITTEN},
+
+ /* ========== Counters for Transaction Module ========== */
+ {"module_trx", "transaction", "Transaction Manager",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_TRX},
+
+ {"trx_rw_commits", "transaction",
+ "Number of read-write transactions committed",
+ MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT},
+
+ {"trx_ro_commits", "transaction",
+ "Number of read-only transactions committed",
+ MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT},
+
+ {"trx_nl_ro_commits", "transaction",
+ "Number of non-locking auto-commit read-only transactions committed",
+ MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT},
+
+ {"trx_commits_insert_update", "transaction",
+ "Number of transactions committed with inserts and updates",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_COMMIT_UNDO},
+
+ {"trx_rollbacks", "transaction",
+ "Number of transactions rolled back",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK},
+
+ {"trx_rollbacks_savepoint", "transaction",
+ "Number of transactions rolled back to savepoint",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_SAVEPOINT},
+
+ {"trx_rseg_history_len", "transaction",
+ "Length of the TRX_RSEG_HISTORY list",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_RSEG_HISTORY_LEN},
+
+ {"trx_undo_slots_used", "transaction", "Number of undo slots used",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_USED},
+
+ {"trx_undo_slots_cached", "transaction",
+ "Number of undo slots cached",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_CACHED},
+
+ {"trx_rseg_current_size", "transaction",
+ "Current rollback segment size in pages",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_RSEG_CUR_SIZE},
+
+ /* ========== Counters for Purge Module ========== */
+ {"module_purge", "purge", "Purge Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_PURGE},
+
+ {"purge_del_mark_records", "purge",
+ "Number of delete-marked rows purged",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE},
+
+ {"purge_upd_exist_or_extern_records", "purge",
+ "Number of purges on updates of existing records and"
+ " updates on delete marked record with externally stored field",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN},
+
+ {"purge_invoked", "purge",
+ "Number of times purge was invoked",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_INVOKED},
+
+ {"purge_undo_log_pages", "purge",
+ "Number of undo log pages handled by the purge",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_N_PAGE_HANDLED},
+
+ {"purge_dml_delay_usec", "purge",
+ "Microseconds DML to be delayed due to purge lagging",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_DML_PURGE_DELAY},
+
+ {"purge_stop_count", "purge",
+ "Number of times purge was stopped",
+ MONITOR_DISPLAY_CURRENT,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_STOP_COUNT},
+
+ {"purge_resume_count", "purge",
+ "Number of times purge was resumed",
+ MONITOR_DISPLAY_CURRENT,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_RESUME_COUNT},
+
+ /* ========== Counters for Recovery Module ========== */
+ {"module_log", "recovery", "Recovery Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_RECOVERY},
+
+ {"log_checkpoints", "recovery", "Number of checkpoints",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_CHECKPOINTS},
+
+ {"log_lsn_last_flush", "recovery", "LSN of Last flush",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_FLUSHDISK},
+
+ {"log_lsn_last_checkpoint", "recovery", "LSN at last checkpoint",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CHECKPOINT},
+
+ {"log_lsn_current", "recovery", "Current LSN value",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CURRENT},
+
+ {"log_lsn_checkpoint_age", "recovery",
+ "Current LSN value minus LSN at last checkpoint",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_LSN_CHECKPOINT_AGE},
+
+ {"log_lsn_buf_pool_oldest", "recovery",
+ "The oldest modified block LSN in the buffer pool",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_OLDEST_LSN},
+
+ {"log_max_modified_age_async", "recovery",
+ "Maximum LSN difference; when exceeded, start asynchronous preflush",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_ASYNC},
+
+ {"log_waits", "recovery",
+ "Number of log waits due to small log buffer (innodb_log_waits)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WAITS},
+
+ {"log_write_requests", "recovery",
+ "Number of log write requests (innodb_log_write_requests)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITE_REQUEST},
+
+ {"log_writes", "recovery",
+ "Number of log writes (innodb_log_writes)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES},
+
+ /* ========== Counters for Page Compression ========== */
+ {"module_compress", "compression", "Page Compression Info",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_PAGE},
+
+ {"compress_pages_compressed", "compression",
+ "Number of pages compressed", MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAGE_COMPRESS},
+
+ {"compress_pages_decompressed", "compression",
+ "Number of pages decompressed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS},
+
+ {"compression_pad_increments", "compression",
+ "Number of times padding is incremented to avoid compression failures",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS},
+
+ {"compression_pad_decrements", "compression",
+ "Number of times padding is decremented due to good compressibility",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
+
+ {"compress_saved", "compression",
+ "Number of bytes saved by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
+
+ {"compress_pages_page_compressed", "compression",
+ "Number of pages compressed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED},
+
+ {"compress_page_compressed_trim_op", "compression",
+ "Number of TRIM operation performed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
+
+ {"compress_pages_page_decompressed", "compression",
+ "Number of pages decompressed by page compression",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
+
+ {"compress_pages_page_compression_error", "compression",
+ "Number of page compression errors",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR},
+
+ {"compress_pages_encrypted", "compression",
+ "Number of pages encrypted",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_ENCRYPTED},
+
+ {"compress_pages_decrypted", "compression",
+ "Number of pages decrypted",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_DECRYPTED},
+
+ /* ========== Counters for Index ========== */
+ {"module_index", "index", "Index Manager",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_INDEX},
+
+ {"index_page_splits", "index", "Number of index page splits",
+ MONITOR_EXISTING,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_SPLIT},
+
+ {"index_page_merge_attempts", "index",
+ "Number of index page merge attempts",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_ATTEMPTS},
+
+ {"index_page_merge_successful", "index",
+ "Number of successful index page merges",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_SUCCESSFUL},
+
+ {"index_page_reorg_attempts", "index",
+ "Number of index page reorganization attempts",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_ATTEMPTS},
+
+ {"index_page_reorg_successful", "index",
+ "Number of successful index page reorganizations",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_SUCCESSFUL},
+
+ {"index_page_discards", "index", "Number of index pages discarded",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_DISCARD},
+
+#ifdef BTR_CUR_HASH_ADAPT
+ /* ========== Counters for Adaptive Hash Index ========== */
+ {"module_adaptive_hash", "adaptive_hash_index", "Adaptive Hash Index",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_ADAPTIVE_HASH},
+
+ {"adaptive_hash_searches", "adaptive_hash_index",
+ "Number of successful searches using Adaptive Hash Index",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH},
+
+ {"adaptive_hash_searches_btree", "adaptive_hash_index",
+ "Number of searches using B-tree on an index search",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE},
+
+ {"adaptive_hash_pages_added", "adaptive_hash_index",
+ "Number of index pages on which the Adaptive Hash Index is built",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_ADDED},
+
+ {"adaptive_hash_pages_removed", "adaptive_hash_index",
+ "Number of index pages whose corresponding Adaptive Hash Index"
+ " entries were removed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_REMOVED},
+
+ {"adaptive_hash_rows_added", "adaptive_hash_index",
+ "Number of Adaptive Hash Index rows added",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_ADDED},
+
+ {"adaptive_hash_rows_removed", "adaptive_hash_index",
+ "Number of Adaptive Hash Index rows removed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVED},
+
+ {"adaptive_hash_rows_deleted_no_hash_entry", "adaptive_hash_index",
+ "Number of rows deleted that did not have corresponding Adaptive Hash"
+ " Index entries",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND},
+
+ {"adaptive_hash_rows_updated", "adaptive_hash_index",
+ "Number of Adaptive Hash Index rows updated",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_UPDATED},
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ /* ========== Counters for tablespace ========== */
+ {"module_file", "file_system", "Tablespace and File System Manager",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_FIL_SYSTEM},
+
+ {"file_num_open_files", "file_system",
+ "Number of files currently open (innodb_num_open_files)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_N_FILE_OPENED},
+
+ /* ========== Counters for Change Buffer ========== */
+ {"module_ibuf_system", "change_buffer", "InnoDB Change Buffer",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_IBUF_SYSTEM},
+
+ {"ibuf_merges_insert", "change_buffer",
+ "Number of inserted records merged by change buffering",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_INSERT},
+
+ {"ibuf_merges_delete_mark", "change_buffer",
+ "Number of deleted records merged by change buffering",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DELETE},
+
+ {"ibuf_merges_delete", "change_buffer",
+ "Number of purge records merged by change buffering",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_PURGE},
+
+ {"ibuf_merges_discard_insert", "change_buffer",
+ "Number of insert merged operations discarded",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT},
+
+ {"ibuf_merges_discard_delete_mark", "change_buffer",
+ "Number of deleted merged operations discarded",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE},
+
+ {"ibuf_merges_discard_delete", "change_buffer",
+ "Number of purge merged operations discarded",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE},
+
+ {"ibuf_merges", "change_buffer", "Number of change buffer merges",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGES},
+
+ {"ibuf_size", "change_buffer", "Change buffer size in pages",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_SIZE},
+
+ /* ========== Counters for server operations ========== */
+ {"module_innodb", "innodb",
+ "Counter for general InnoDB server wide operations and properties",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_SERVER},
+
+ {"innodb_master_thread_sleeps", "server",
+ "Number of times (seconds) master thread sleeps",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_MASTER_THREAD_SLEEP},
+
+ {"innodb_activity_count", "server", "Current server activity count",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SERVER_ACTIVITY},
+
+ {"innodb_master_active_loops", "server",
+ "Number of times master thread performs its tasks when"
+ " server is active",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_MASTER_ACTIVE_LOOPS},
+
+ {"innodb_master_idle_loops", "server",
+ "Number of times master thread performs its tasks when server is idle",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_MASTER_IDLE_LOOPS},
+
+ {"innodb_log_flush_usec", "server",
+ "Time (in microseconds) spent to flush log records",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_LOG_FLUSH_MICROSECOND},
+
+ {"innodb_dict_lru_usec", "server",
+ "Time (in microseconds) spent to process DICT LRU list",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_MICROSECOND},
+
+ {"innodb_dict_lru_count_active", "server",
+ "Number of tables evicted from DICT LRU list in the active loop",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE},
+
+ {"innodb_dict_lru_count_idle", "server",
+ "Number of tables evicted from DICT LRU list in the idle loop",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE},
+
+ {"innodb_dblwr_writes", "server",
+ "Number of doublewrite operations that have been performed"
+ " (innodb_dblwr_writes)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_WRITES},
+
+ {"innodb_dblwr_pages_written", "server",
+ "Number of pages that have been written for doublewrite operations"
+ " (innodb_dblwr_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN},
+
+ {"innodb_page_size", "server",
+ "InnoDB page size in bytes (innodb_page_size)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_PAGE_SIZE},
+
+ /* ========== Counters for DDL operations ========== */
+ {"module_ddl", "ddl", "Statistics for DDLs",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS},
+
+ {"ddl_background_drop_indexes", "ddl",
+ "Number of indexes waiting to be dropped after failed index creation",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX},
+
+ {"ddl_online_create_index", "ddl",
+ "Number of indexes being created online",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX},
+
+ {"ddl_pending_alter_table", "ddl",
+ "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE},
+
+ {"ddl_sort_file_alter_table", "ddl",
+ "Number of sort files created during alter table",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_SORT_FILES},
+
+ {"ddl_log_file_alter_table", "ddl",
+ "Number of log files created during alter table",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_LOG_FILES},
+
+ /* ===== Counters for ICP (Index Condition Pushdown) Module ===== */
+ {"module_icp", "icp", "Index Condition Pushdown",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_ICP},
+
+ {"icp_attempts", "icp",
+ "Number of attempts for index push-down condition checks",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_ATTEMPTS},
+
+ {"icp_no_match", "icp", "Index push-down condition does not match",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_NO_MATCH},
+
+ {"icp_out_of_range", "icp", "Index push-down condition out of range",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_OUT_OF_RANGE},
+
+ {"icp_match", "icp", "Index push-down condition matches",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_MATCH},
+
+ /* ========== To turn on/off reset all counters ========== */
+ {"all", "All Counters", "Turn on/off and reset all counters",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_ALL_COUNTER}
+};
+
+/* The "innodb_counter_value" array stores actual counter values */
+monitor_value_t innodb_counter_value[NUM_MONITOR];
+
+/* monitor_set_tbl is used to record and determine whether a monitor
+has been turned on/off. */
+Atomic_relaxed<ulint>
+ monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) / NUM_BITS_ULINT];
+
+/****************************************************************//**
+Get a monitor's "monitor_info" by its monitor id (index into the
+innodb_counter_info array.
+@return Point to corresponding monitor_info_t, or NULL if no such
+monitor */
+monitor_info_t*
+srv_mon_get_info(
+/*=============*/
+ monitor_id_t monitor_id) /*!< id indexing into the
+ innodb_counter_info array */
+{
+ ut_a(monitor_id < NUM_MONITOR);
+
+ return((monitor_id < NUM_MONITOR)
+ ? &innodb_counter_info[monitor_id]
+ : NULL);
+}
+
+/****************************************************************//**
+Get monitor's name by its monitor id (indexing into the
+innodb_counter_info array.
+@return corresponding monitor name, or NULL if no such
+monitor */
+const char*
+srv_mon_get_name(
+/*=============*/
+ monitor_id_t monitor_id) /*!< id index into the
+ innodb_counter_info array */
+{
+ ut_a(monitor_id < NUM_MONITOR);
+
+ return((monitor_id < NUM_MONITOR)
+ ? innodb_counter_info[monitor_id].monitor_name
+ : NULL);
+}
+
+/****************************************************************//**
+Turn on/off, reset monitor counters in a module. If module_id
+is MONITOR_ALL_COUNTER then turn on all monitor counters.
+turned on because it has already been turned on. */
+void
+srv_mon_set_module_control(
+/*=======================*/
+ monitor_id_t module_id, /*!< in: Module ID as in
+ monitor_counter_id. If it is
+ set to MONITOR_ALL_COUNTER, this means
+ we shall turn on all the counters */
+ mon_option_t set_option) /*!< in: Turn on/off reset the
+ counter */
+{
+ lint ix;
+ lint start_id;
+ ibool set_current_module = FALSE;
+
+ ut_a(module_id <= NUM_MONITOR);
+ compile_time_assert(array_elements(innodb_counter_info)
+ == NUM_MONITOR);
+
+ /* The module_id must be an ID of MONITOR_MODULE type */
+ ut_a(innodb_counter_info[module_id].monitor_type & MONITOR_MODULE);
+
+ /* start with the first monitor in the module. If module_id
+ is MONITOR_ALL_COUNTER, this means we need to turn on all
+ monitor counters. */
+ if (module_id == MONITOR_ALL_COUNTER) {
+ start_id = 1;
+ } else if (innodb_counter_info[module_id].monitor_type
+ & MONITOR_GROUP_MODULE) {
+ /* Counters in this module are set as a group together
+ and cannot be turned on/off individually. Need to set
+ the on/off bit in the module counter */
+ start_id = module_id;
+ set_current_module = TRUE;
+
+ } else {
+ start_id = module_id + 1;
+ }
+
+ for (ix = start_id; ix < NUM_MONITOR; ix++) {
+ /* if we hit the next module counter, we will
+ continue if we want to turn on all monitor counters,
+ and break if just turn on the counters in the
+ current module. */
+ if (innodb_counter_info[ix].monitor_type & MONITOR_MODULE) {
+
+ if (set_current_module) {
+ /* Continue to set on/off bit on current
+ module */
+ set_current_module = FALSE;
+ } else if (module_id == MONITOR_ALL_COUNTER) {
+ if (!(innodb_counter_info[ix].monitor_type
+ & MONITOR_GROUP_MODULE)) {
+ continue;
+ }
+ } else {
+ /* Hitting the next module, stop */
+ break;
+ }
+ }
+
+ /* Cannot turn on a monitor already been turned on. User
+ should be aware some counters are already on before
+ turn them on again (which could reset counter value) */
+ if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) {
+ ib::info() << "Monitor '"
+ << srv_mon_get_name((monitor_id_t) ix)
+ << "' is already enabled.";
+ continue;
+ }
+
+ /* For some existing counters (server status variables),
+ we will get its counter value at the start/stop time
+ to calculate the actual value during the time. */
+ if (innodb_counter_info[ix].monitor_type & MONITOR_EXISTING) {
+ srv_mon_process_existing_counter(
+ static_cast<monitor_id_t>(ix), set_option);
+ }
+
+ /* Currently support 4 operations on the monitor counters:
+ turn on, turn off, reset and reset all operations. */
+ switch (set_option) {
+ case MONITOR_TURN_ON:
+ MONITOR_ON(ix);
+ MONITOR_INIT(ix);
+ MONITOR_SET_START(ix);
+ break;
+
+ case MONITOR_TURN_OFF:
+ MONITOR_OFF(ix);
+ MONITOR_SET_OFF(ix);
+ break;
+
+ case MONITOR_RESET_VALUE:
+ srv_mon_reset(static_cast<monitor_id_t>(ix));
+ break;
+
+ case MONITOR_RESET_ALL_VALUE:
+ srv_mon_reset_all(static_cast<monitor_id_t>(ix));
+ break;
+
+ default:
+ ut_error;
+ }
+ }
+}
+
+/****************************************************************//**
+Get transaction system's rollback segment size in pages
+@return size in pages */
+TPOOL_SUPPRESS_TSAN static ulint srv_mon_get_rseg_size()
+{
+ ulint size= 0;
+ for (const auto &rseg : trx_sys.rseg_array)
+ size+= rseg.curr_size;
+ return size;
+}
+
+/** @return number of used undo log slots */
+TPOOL_SUPPRESS_TSAN static ulint srv_mon_get_rseg_used()
+{
+ ulint size= 0;
+ for (const auto &rseg : trx_sys.rseg_array)
+ size+= UT_LIST_GET_LEN(rseg.undo_list);
+ return size;
+}
+
+/** @return number of cached undo log slots */
+TPOOL_SUPPRESS_TSAN static ulint srv_mon_get_rseg_cached()
+{
+ ulint size= 0;
+ for (const auto &rseg : trx_sys.rseg_array)
+ size+= UT_LIST_GET_LEN(rseg.undo_cached);
+ return size;
+}
+
+/****************************************************************//**
+This function consolidates some existing server counters used
+by "system status variables". These existing system variables do not have
+mechanism to start/stop and reset the counters, so we simulate these
+controls by remembering the corresponding counter values when the
+corresponding monitors are turned on/off/reset, and do appropriate
+mathematics to deduct the actual value. Please also refer to
+srv_export_innodb_status() for related global counters used by
+the existing status variables.*/
+TPOOL_SUPPRESS_TSAN
+void
+srv_mon_process_existing_counter(
+/*=============================*/
+ monitor_id_t monitor_id, /*!< in: the monitor's ID as in
+ monitor_counter_id */
+ mon_option_t set_option) /*!< in: Turn on/off reset the
+ counter */
+{
+ mon_type_t value;
+ monitor_info_t* monitor_info;
+ ibool update_min = FALSE;
+
+ monitor_info = srv_mon_get_info(monitor_id);
+
+ ut_a(monitor_info->monitor_type & MONITOR_EXISTING);
+ ut_a(monitor_id < NUM_MONITOR);
+
+ /* Get the value from corresponding global variable */
+ switch (monitor_id) {
+ case MONITOR_INDEX_SPLIT:
+ value = buf_pool.pages_split;
+ break;
+
+ case MONITOR_OVLD_BUF_POOL_READS:
+ value = buf_pool.stat.n_pages_read;
+ break;
+
+ /* innodb_buffer_pool_read_requests, the number of logical
+ read requests */
+ case MONITOR_OVLD_BUF_POOL_READ_REQUESTS:
+ value = buf_pool.stat.n_page_gets;
+ break;
+
+ /* innodb_buffer_pool_write_requests, the number of
+ write request */
+ case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST:
+ value = buf_pool.flush_list_requests;
+ break;
+
+ /* innodb_buffer_pool_wait_free */
+ case MONITOR_OVLD_BUF_POOL_WAIT_FREE:
+ value = buf_pool.stat.LRU_waits;
+ break;
+
+ /* innodb_buffer_pool_read_ahead */
+ case MONITOR_OVLD_BUF_POOL_READ_AHEAD:
+ value = buf_pool.stat.n_ra_pages_read;
+ break;
+
+ /* innodb_buffer_pool_read_ahead_evicted */
+ case MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED:
+ value = buf_pool.stat.n_ra_pages_evicted;
+ break;
+
+ /* innodb_buffer_pool_pages_total */
+ case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL:
+ value = buf_pool.get_n_pages();
+ break;
+
+ /* innodb_buffer_pool_pages_misc */
+ case MONITOR_OVLD_BUF_POOL_PAGE_MISC:
+ value = buf_pool.get_n_pages()
+ - UT_LIST_GET_LEN(buf_pool.LRU)
+ - UT_LIST_GET_LEN(buf_pool.free);
+ break;
+
+ /* innodb_buffer_pool_pages_data */
+ case MONITOR_OVLD_BUF_POOL_PAGES_DATA:
+ value = UT_LIST_GET_LEN(buf_pool.LRU);
+ break;
+
+ /* innodb_buffer_pool_bytes_data */
+ case MONITOR_OVLD_BUF_POOL_BYTES_DATA:
+ value = buf_pool.stat.LRU_bytes
+ + (UT_LIST_GET_LEN(buf_pool.unzip_LRU)
+ << srv_page_size_shift);
+ break;
+
+ /* innodb_buffer_pool_pages_dirty */
+ case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY:
+ value = UT_LIST_GET_LEN(buf_pool.flush_list);
+ break;
+
+ /* innodb_buffer_pool_bytes_dirty */
+ case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY:
+ value = buf_pool.flush_list_bytes;
+ break;
+
+ /* innodb_buffer_pool_pages_free */
+ case MONITOR_OVLD_BUF_POOL_PAGES_FREE:
+ value = UT_LIST_GET_LEN(buf_pool.free);
+ break;
+
+ /* innodb_pages_created, the number of pages created */
+ case MONITOR_OVLD_PAGE_CREATED:
+ value = buf_pool.stat.n_pages_created;
+ break;
+
+ /* innodb_pages_written, the number of page written */
+ case MONITOR_OVLD_PAGES_WRITTEN:
+ value = buf_pool.stat.n_pages_written;
+ break;
+
+ case MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE:
+ value = buf_lru_flush_page_count;
+ break;
+
+ case MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE:
+ value = buf_lru_freed_page_count;
+ break;
+
+ /* innodb_pages_read */
+ case MONITOR_OVLD_PAGES_READ:
+ value = buf_pool.stat.n_pages_read;
+ break;
+
+ /* innodb_data_reads, the total number of data reads */
+ case MONITOR_OVLD_BYTE_READ:
+ value = srv_stats.data_read;
+ break;
+
+ /* innodb_data_writes, the total number of data writes. */
+ case MONITOR_OVLD_BYTE_WRITTEN:
+ value = srv_stats.data_written;
+ break;
+
+ /* innodb_data_reads, the total number of data reads. */
+ case MONITOR_OVLD_OS_FILE_READ:
+ value = os_n_file_reads;
+ break;
+
+ /* innodb_data_writes, the total number of data writes*/
+ case MONITOR_OVLD_OS_FILE_WRITE:
+ value = os_n_file_writes;
+ break;
+
+ /* innodb_data_fsyncs, number of fsync() operations so far. */
+ case MONITOR_OVLD_OS_FSYNC:
+ value = os_n_fsyncs;
+ break;
+
+ /* innodb_os_log_written */
+ case MONITOR_OVLD_OS_LOG_WRITTEN:
+ value = log_sys.get_lsn() - recv_sys.lsn;
+ break;
+
+ /* innodb_log_waits */
+ case MONITOR_OVLD_LOG_WAITS:
+ value = log_sys.waits;
+ break;
+
+ /* innodb_log_write_requests */
+ case MONITOR_OVLD_LOG_WRITE_REQUEST:
+ value = log_sys.write_to_buf;
+ break;
+
+ /* innodb_log_writes */
+ case MONITOR_OVLD_LOG_WRITES:
+ value = log_sys.write_to_log;
+ break;
+
+ /* innodb_dblwr_writes */
+ case MONITOR_OVLD_SRV_DBLWR_WRITES:
+ buf_dblwr.lock();
+ value = buf_dblwr.batches();
+ buf_dblwr.unlock();
+ break;
+
+ /* innodb_dblwr_pages_written */
+ case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN:
+ buf_dblwr.lock();
+ value = buf_dblwr.written();
+ buf_dblwr.unlock();
+ break;
+
+ /* innodb_page_size */
+ case MONITOR_OVLD_SRV_PAGE_SIZE:
+ value = srv_page_size;
+ break;
+
+ case MONITOR_OVLD_BUFFER_POOL_SIZE:
+ value = srv_buf_pool_size;
+ break;
+
+ /* innodb_row_lock_current_waits */
+ case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT:
+ // dirty read without lock_sys.wait_mutex
+ value = lock_sys.get_wait_pending();
+ break;
+
+ /* innodb_row_lock_time */
+ case MONITOR_OVLD_LOCK_WAIT_TIME:
+ // dirty read without lock_sys.wait_mutex
+ value = lock_sys.get_wait_time_cumulative();
+ break;
+
+ /* innodb_row_lock_time_max */
+ case MONITOR_OVLD_LOCK_MAX_WAIT_TIME:
+ // dirty read without lock_sys.wait_mutex
+ value = lock_sys.get_wait_time_max();
+ break;
+
+ /* innodb_row_lock_time_avg */
+ case MONITOR_OVLD_LOCK_AVG_WAIT_TIME:
+ mysql_mutex_lock(&lock_sys.wait_mutex);
+ if (auto count = lock_sys.get_wait_cumulative()) {
+ value = lock_sys.get_wait_time_cumulative() / count;
+ } else {
+ value = 0;
+ }
+ mysql_mutex_unlock(&lock_sys.wait_mutex);
+ break;
+
+ /* innodb_row_lock_waits */
+ case MONITOR_OVLD_ROW_LOCK_WAIT:
+ // dirty read without lock_sys.wait_mutex
+ value = lock_sys.get_wait_cumulative();
+ break;
+
+ case MONITOR_RSEG_HISTORY_LEN:
+ value = trx_sys.history_size_approx();
+ break;
+
+ case MONITOR_RSEG_CUR_SIZE:
+ value = srv_mon_get_rseg_size();
+ break;
+ case MONITOR_DML_PURGE_DELAY:
+ value = srv_max_purge_lag_delay;
+ break;
+ case MONITOR_NUM_UNDO_SLOT_USED:
+ value = srv_mon_get_rseg_used();
+ break;
+ case MONITOR_NUM_UNDO_SLOT_CACHED:
+ value = srv_mon_get_rseg_cached();
+ break;
+ case MONITOR_OVLD_N_FILE_OPENED:
+ value = fil_system.n_open;
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_INSERT:
+ value = ibuf.n_merged_ops[IBUF_OP_INSERT];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DELETE:
+ value = ibuf.n_merged_ops[IBUF_OP_DELETE_MARK];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_PURGE:
+ value = ibuf.n_merged_ops[IBUF_OP_DELETE];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT:
+ value = ibuf.n_discarded_ops[IBUF_OP_INSERT];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE:
+ value = ibuf.n_discarded_ops[IBUF_OP_DELETE_MARK];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE:
+ value = ibuf.n_discarded_ops[IBUF_OP_DELETE];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGES:
+ value = ibuf.n_merges;
+ break;
+
+ case MONITOR_OVLD_IBUF_SIZE:
+ value = ibuf.size;
+ break;
+
+ case MONITOR_OVLD_SERVER_ACTIVITY:
+ value = srv_get_activity_count();
+ break;
+
+ case MONITOR_OVLD_LSN_FLUSHDISK:
+ value = log_sys.get_flushed_lsn();
+ break;
+
+ case MONITOR_OVLD_LSN_CURRENT:
+ value = log_sys.get_lsn();
+ break;
+
+ case MONITOR_OVLD_CHECKPOINTS:
+ value = log_sys.next_checkpoint_no;
+ break;
+
+ case MONITOR_LSN_CHECKPOINT_AGE:
+ log_sys.latch.rd_lock(SRW_LOCK_CALL);
+ value = static_cast<mon_type_t>(log_sys.get_lsn()
+ - log_sys.last_checkpoint_lsn);
+ log_sys.latch.rd_unlock();
+ break;
+
+ case MONITOR_OVLD_BUF_OLDEST_LSN:
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ value = (mon_type_t) buf_pool.get_oldest_modification(0);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ break;
+
+ case MONITOR_OVLD_LSN_CHECKPOINT:
+ value = (mon_type_t) log_sys.last_checkpoint_lsn;
+ break;
+
+ case MONITOR_OVLD_MAX_AGE_ASYNC:
+ value = log_sys.max_modified_age_async;
+ break;
+
+#ifdef BTR_CUR_HASH_ADAPT
+ case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH:
+ value = btr_cur_n_sea;
+ break;
+
+ case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE:
+ value = btr_cur_n_non_sea;
+ break;
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
+ value = srv_stats.page_compression_saved;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
+ value = srv_stats.pages_page_compressed;
+ break;
+ case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
+ value = srv_stats.page_compressed_trim_op;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
+ value = srv_stats.pages_page_decompressed;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR:
+ value = srv_stats.pages_page_compression_error;
+ break;
+ case MONITOR_OVLD_PAGES_ENCRYPTED:
+ value = srv_stats.pages_encrypted;
+ break;
+ case MONITOR_OVLD_PAGES_DECRYPTED:
+ value = srv_stats.pages_decrypted;
+ break;
+ case MONITOR_DEADLOCK:
+ value = lock_sys.deadlocks;
+ break;
+ case MONITOR_TIMEOUT:
+ value = lock_sys.timeouts;
+ break;
+ default:
+ ut_error;
+ }
+
+ switch (set_option) {
+ case MONITOR_TURN_ON:
+ /* Save the initial counter value in mon_start_value
+ field */
+ MONITOR_SAVE_START(monitor_id, value);
+ return;
+
+ case MONITOR_TURN_OFF:
+ /* Save the counter value to mon_last_value when we
+ turn off the monitor but not yet reset. Note the
+ counter has not yet been set to off in the bitmap
+ table for normal turn off. We need to check the
+ count status (on/off) to avoid reset the value
+ for an already off conte */
+ if (MONITOR_IS_ON(monitor_id)) {
+ srv_mon_process_existing_counter(monitor_id,
+ MONITOR_GET_VALUE);
+ MONITOR_SAVE_LAST(monitor_id);
+ }
+ return;
+
+ case MONITOR_GET_VALUE:
+ if (MONITOR_IS_ON(monitor_id)) {
+
+ /* If MONITOR_DISPLAY_CURRENT bit is on, we
+ only record the current value, rather than
+ incremental value over a period. Most of
+` this type of counters are resource related
+ counters such as number of buffer pages etc. */
+ if (monitor_info->monitor_type
+ & MONITOR_DISPLAY_CURRENT) {
+ MONITOR_SET(monitor_id, value);
+ } else {
+ /* Most status counters are montonically
+ increasing, no need to update their
+ minimum values. Only do so
+ if "update_min" set to TRUE */
+ MONITOR_SET_DIFF(monitor_id, value);
+
+ if (update_min
+ && (MONITOR_VALUE(monitor_id)
+ < MONITOR_MIN_VALUE(monitor_id))) {
+ MONITOR_MIN_VALUE(monitor_id) =
+ MONITOR_VALUE(monitor_id);
+ }
+ }
+ }
+ return;
+
+ case MONITOR_RESET_VALUE:
+ if (!MONITOR_IS_ON(monitor_id)) {
+ MONITOR_LAST_VALUE(monitor_id) = 0;
+ }
+ return;
+
+ /* Nothing special for reset all operation for these existing
+ counters */
+ case MONITOR_RESET_ALL_VALUE:
+ return;
+ }
+}
+
+/*************************************************************//**
+Reset a monitor, create a new base line with the current monitor
+value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
+void
+srv_mon_reset(
+/*==========*/
+ monitor_id_t monitor) /*!< in: monitor id */
+{
+ ibool monitor_was_on;
+
+ monitor_was_on = MONITOR_IS_ON(monitor);
+
+ if (monitor_was_on) {
+ /* Temporarily turn off the counter for the resetting
+ operation */
+ MONITOR_OFF(monitor);
+ }
+
+ /* Before resetting the current monitor value, first
+ calculate and set the max/min value since monitor
+ start */
+ srv_mon_calc_max_since_start(monitor);
+ srv_mon_calc_min_since_start(monitor);
+
+ /* Monitors with MONITOR_DISPLAY_CURRENT bit
+ are not incremental, no need to remember
+ the reset value. */
+ if (innodb_counter_info[monitor].monitor_type
+ & MONITOR_DISPLAY_CURRENT) {
+ MONITOR_VALUE_RESET(monitor) = 0;
+ } else {
+ /* Remember the new baseline */
+ MONITOR_VALUE_RESET(monitor) = MONITOR_VALUE_RESET(monitor)
+ + MONITOR_VALUE(monitor);
+ }
+
+ /* Reset the counter value */
+ MONITOR_VALUE(monitor) = 0;
+ MONITOR_MAX_VALUE(monitor) = MAX_RESERVED;
+ MONITOR_MIN_VALUE(monitor) = MIN_RESERVED;
+
+ MONITOR_FIELD((monitor), mon_reset_time) = time(NULL);
+
+ if (monitor_was_on) {
+ MONITOR_ON(monitor);
+ }
+}
+
+/*************************************************************//**
+Turn on monitor counters that are marked as default ON. */
+void
+srv_mon_default_on(void)
+/*====================*/
+{
+ ulint ix;
+
+ for (ix = 0; ix < NUM_MONITOR; ix++) {
+ if (innodb_counter_info[ix].monitor_type
+ & MONITOR_DEFAULT_ON) {
+ /* Turn on monitor counters that are default on */
+ MONITOR_ON(ix);
+ MONITOR_INIT(ix);
+ MONITOR_SET_START(ix);
+ }
+ }
+}
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
new file mode 100644
index 00000000..bf9755fb
--- /dev/null
+++ b/storage/innobase/srv/srv0srv.cc
@@ -0,0 +1,1659 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2009 Google Inc.
+Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2022, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0srv.cc
+The database server main program
+
+Created 10/8/1995 Heikki Tuuri
+*******************************************************/
+
+#include "my_global.h"
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
+#include "btr0sea.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "dict0boot.h"
+#include "dict0load.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0recv.h"
+#include "mem0mem.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "row0mysql.h"
+#include "row0log.h"
+#include "srv0mon.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "trx0i_s.h"
+#include "trx0purge.h"
+#include "btr0defragment.h"
+#include "ut0mem.h"
+#include "fil0fil.h"
+#include "fil0crypt.h"
+#include "fil0pagecompress.h"
+#include "trx0types.h"
+#include <list>
+#include "log.h"
+
+#include "transactional_lock_guard.h"
+
+#include <my_service_manager.h>
+/* The following is the maximum allowed duration of a lock wait. */
+ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT;
+
+/* How much data manipulation language (DML) statements need to be delayed,
+in microseconds, in order to reduce the lagging of the purge thread. */
+ulint srv_dml_needed_delay;
+
+const char* srv_main_thread_op_info = "";
+
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
+const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
+
+/* Server parameters which are read from the initfile */
+
+/* The following three are dir paths which are catenated before file
+names, where the file name itself may also contain a path */
+
+char* srv_data_home;
+
+/** Rollback files directory, can be absolute. */
+char* srv_undo_dir;
+
+/** The number of tablespaces to use for rollback segments. */
+uint srv_undo_tablespaces;
+
+/** The number of UNDO tablespaces that are open and ready to use. */
+uint32_t srv_undo_tablespaces_open;
+
+/** The number of UNDO tablespaces that are active (hosting some rollback
+segment). It is quite possible that some of the tablespaces doesn't host
+any of the rollback-segment based on configuration used. */
+uint32_t srv_undo_tablespaces_active;
+
+/** Rate at which UNDO records should be purged. */
+ulong srv_purge_rseg_truncate_frequency;
+
+/** Enable or Disable Truncate of UNDO tablespace.
+Note: If enabled then UNDO tablespace will be selected for truncate.
+While Server waits for undo-tablespace to truncate if user disables
+it, truncate action is completed but no new tablespace is marked
+for truncate (action is never aborted). */
+my_bool srv_undo_log_truncate;
+
+/** Maximum size of undo tablespace. */
+unsigned long long srv_max_undo_log_size;
+
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+my_bool srv_read_only_mode;
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+my_bool srv_file_per_table;
+/** Set if InnoDB operates in read-only mode or innodb-force-recovery
+is greater than SRV_FORCE_NO_TRX_UNDO. */
+my_bool high_level_read_only;
+
+/** Sort buffer size in index creation */
+ulong srv_sort_buf_size;
+/** Maximum modification log file size for online index creation */
+unsigned long long srv_online_max_size;
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads.
+Currently we support native aio on windows and linux */
+my_bool srv_use_native_aio;
+my_bool srv_numa_interleave;
+/** copy of innodb_use_atomic_writes; @see innodb_init_params() */
+my_bool srv_use_atomic_writes;
+/** innodb_compression_algorithm; used with page compression */
+ulong innodb_compression_algorithm;
+
+/*------------------------- LOG FILES ------------------------ */
+char* srv_log_group_home_dir;
+
+/** The InnoDB redo log file size, or 0 when changing the redo log format
+at startup (while disallowing writes to the redo log). */
+ulonglong srv_log_file_size;
+/** innodb_flush_log_at_trx_commit */
+ulong srv_flush_log_at_trx_commit;
+/** innodb_flush_log_at_timeout */
+uint srv_flush_log_at_timeout;
+/** innodb_page_size */
+ulong srv_page_size;
+/** log2 of innodb_page_size; @see innodb_init_params() */
+uint32_t srv_page_size_shift;
+
+/** innodb_adaptive_flushing; try to flush dirty pages so as to avoid
+IO bursts at the checkpoints. */
+my_bool srv_adaptive_flushing;
+
+/** innodb_flush_sync; whether to ignore io_capacity at log checkpoints */
+my_bool srv_flush_sync;
+
+/** common thread pool*/
+tpool::thread_pool* srv_thread_pool;
+
+/** Maximum number of times allowed to conditionally acquire
+mutex before switching to blocking wait on the mutex */
+#define MAX_MUTEX_NOWAIT 2
+
+/** Check whether the number of failed nonblocking mutex
+acquisition attempts exceeds maximum allowed value. If so,
+srv_printf_innodb_monitor() will request mutex acquisition
+with mysql_mutex_lock(), which will wait until it gets the mutex. */
+#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
+
+/** copy of innodb_buffer_pool_size */
+ulint srv_buf_pool_size;
+/** Requested buffer pool chunk size */
+size_t srv_buf_pool_chunk_unit;
+/** innodb_lru_scan_depth; number of blocks scanned in LRU flush batch */
+ulong srv_LRU_scan_depth;
+/** innodb_flush_neighbors; whether or not to flush neighbors of a block */
+ulong srv_flush_neighbors;
+/** Previously requested size */
+ulint srv_buf_pool_old_size;
+/** Current size as scaling factor for the other components */
+ulint srv_buf_pool_base_size;
+/** Current size in bytes */
+ulint srv_buf_pool_curr_size;
+/** Dump this % of each buffer pool during BP dump */
+ulong srv_buf_pool_dump_pct;
+/** Abort load after this amount of pages */
+#ifdef UNIV_DEBUG
+ulong srv_buf_pool_load_pages_abort = LONG_MAX;
+#endif
+/** Lock table size in bytes */
+ulint srv_lock_table_size = ULINT_MAX;
+
+/** the value of innodb_checksum_algorithm */
+ulong srv_checksum_algorithm;
+
+/** innodb_read_io_threads */
+uint srv_n_read_io_threads;
+/** innodb_write_io_threads */
+uint srv_n_write_io_threads;
+
+/** innodb_random_read_ahead */
+my_bool srv_random_read_ahead;
+/** innodb_read_ahead_threshold; the number of pages that must be present
+in the buffer cache and accessed sequentially for InnoDB to trigger a
+readahead request. */
+ulong srv_read_ahead_threshold;
+
+/** innodb_change_buffer_max_size; maximum on-disk size of change
+buffer in terms of percentage of the buffer pool. */
+uint srv_change_buffer_max_size;
+
+ulong srv_file_flush_method;
+
+
+/** copy of innodb_open_files; @see innodb_init_params() */
+ulint srv_max_n_open_files;
+
+/** innodb_io_capacity */
+ulong srv_io_capacity;
+/** innodb_io_capacity_max */
+ulong srv_max_io_capacity;
+
+/* The InnoDB main thread tries to keep the ratio of modified pages
+in the buffer pool to all database pages in the buffer pool smaller than
+the following number. But it is not guaranteed that the value stays below
+that during a time of heavy update/insert activity. */
+
+/** innodb_max_dirty_pages_pct */
+double srv_max_buf_pool_modified_pct;
+/** innodb_max_dirty_pages_pct_lwm */
+double srv_max_dirty_pages_pct_lwm;
+
+/** innodb_adaptive_flushing_lwm; the percentage of log capacity at
+which adaptive flushing, if enabled, will kick in. */
+double srv_adaptive_flushing_lwm;
+
+/** innodb_flushing_avg_loops; number of iterations over which
+adaptive flushing is averaged */
+ulong srv_flushing_avg_loops;
+
+/** innodb_purge_threads; the number of purge tasks to use */
+uint srv_n_purge_threads;
+
+/** innodb_purge_batch_size, in pages */
+ulong srv_purge_batch_size;
+
+/** innodb_stats_method decides how InnoDB treats
+NULL value when collecting statistics. By default, it is set to
+SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
+ulong srv_innodb_stats_method;
+
+srv_stats_t srv_stats;
+
+/* structure to pass status variables to MySQL */
+export_var_t export_vars;
+
+/** Normally 0. When nonzero, skip some phases of crash recovery,
+starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
+by SELECT or mysqldump. When this is nonzero, we do not allow any user
+modifications to the data. */
+ulong srv_force_recovery;
+
+/** innodb_print_all_deadlocks; whether to print all user-level
+transactions deadlocks to the error log */
+my_bool srv_print_all_deadlocks;
+
+/** innodb_cmp_per_index_enabled; enable
+INFORMATION_SCHEMA.innodb_cmp_per_index */
+my_bool srv_cmp_per_index_enabled;
+
+/** innodb_fast_shutdown=1 skips purge and change buffer merge.
+innodb_fast_shutdown=2 effectively crashes the server (no log checkpoint).
+innodb_fast_shutdown=3 is a clean shutdown that skips the rollback
+of active transaction (to be done on restart). */
+uint srv_fast_shutdown;
+
+/** copy of innodb_status_file; generate a innodb_status.<pid> file */
+ibool srv_innodb_status;
+
+/** innodb_stats_transient_sample_pages;
+When estimating number of different key values in an index, sample
+this many index pages, there are 2 ways to calculate statistics:
+* persistent stats that are calculated by ANALYZE TABLE and saved
+ in the innodb database.
+* quick transient stats, that are used if persistent stats for the given
+ table/index are not found in the innodb database */
+unsigned long long srv_stats_transient_sample_pages;
+/** innodb_stats_persistent */
+my_bool srv_stats_persistent;
+/** innodb_stats_include_delete_marked */
+my_bool srv_stats_include_delete_marked;
+/** innodb_stats_persistent_sample_pages */
+unsigned long long srv_stats_persistent_sample_pages;
+/** innodb_stats_auto_recalc */
+my_bool srv_stats_auto_recalc;
+
+/** innodb_stats_modified_counter; The number of rows modified before
+we calculate new statistics (default 0 = current limits) */
+unsigned long long srv_stats_modified_counter;
+
+/** innodb_stats_traditional; enable traditional statistic calculation
+based on number of configured pages */
+my_bool srv_stats_sample_traditional;
+
+my_bool srv_use_doublewrite_buf;
+
+/** innodb_sync_spin_loops */
+ulong srv_n_spin_wait_rounds;
+/** innodb_spin_wait_delay */
+uint srv_spin_wait_delay;
+
+/** Number of initialized rollback segments for persistent undo log */
+ulong srv_available_undo_logs;
+
+/* Defragmentation */
+my_bool srv_defragment;
+/** innodb_defragment_n_pages */
+uint srv_defragment_n_pages;
+uint srv_defragment_stats_accuracy;
+/** innodb_defragment_fill_factor_n_recs */
+uint srv_defragment_fill_factor_n_recs;
+/** innodb_defragment_fill_factor */
+double srv_defragment_fill_factor;
+/** innodb_defragment_frequency */
+uint srv_defragment_frequency;
+/** derived from innodb_defragment_frequency;
+@see innodb_defragment_frequency_update() */
+ulonglong srv_defragment_interval;
+
+/** Current mode of operation */
+enum srv_operation_mode srv_operation;
+
+/** whether this is the server's first start after mariabackup --prepare */
+bool srv_start_after_restore;
+
+/* Set the following to 0 if you want InnoDB to write messages on
+stderr on startup/shutdown. Not enabled on the embedded server. */
+ibool srv_print_verbose_log;
+my_bool srv_print_innodb_monitor;
+my_bool srv_print_innodb_lock_monitor;
+/** innodb_force_primary_key; whether to disallow CREATE TABLE without
+PRIMARY KEY */
+my_bool srv_force_primary_key;
+
+/** Key version to encrypt the temporary tablespace */
+my_bool innodb_encrypt_temporary_tables;
+
+my_bool srv_immediate_scrub_data_uncompressed;
+
+static time_t srv_last_monitor_time;
+
+static mysql_mutex_t srv_innodb_monitor_mutex;
+
+/** Mutex protecting page_zip_stat_per_index */
+mysql_mutex_t page_zip_stat_per_index_mutex;
+
+/** Mutex for locking srv_monitor_file */
+mysql_mutex_t srv_monitor_file_mutex;
+
+/** Temporary file for innodb monitor output */
+FILE* srv_monitor_file;
+/** Mutex for locking srv_misc_tmpfile */
+mysql_mutex_t srv_misc_tmpfile_mutex;
+/** Temporary file for miscellanous diagnostic output */
+FILE* srv_misc_tmpfile;
+
+/* The following counts are used by the srv_master_callback. */
+
+/** Iterations of the loop bounded by 'srv_active' label. */
+ulint srv_main_active_loops;
+/** Iterations of the loop bounded by the 'srv_idle' label. */
+ulint srv_main_idle_loops;
+/** Iterations of the loop bounded by the 'srv_shutdown' label. */
+static ulint srv_main_shutdown_loops;
+/** Log writes involving flush. */
+ulint srv_log_writes_and_flush;
+
+/* This is only ever touched by the master thread. It records the
+time when the last flush of log file has happened. The master
+thread ensures that we flush the log files at least once per
+second. */
+static time_t srv_last_log_flush_time;
+
+/** Buffer pool dump status frequence in percentages */
+ulong srv_buf_dump_status_frequency;
+
+/*
+ IMPLEMENTATION OF THE SERVER MAIN PROGRAM
+ =========================================
+
+There is the following analogue between this database
+server and an operating system kernel:
+
+DB concept equivalent OS concept
+---------- ---------------------
+transaction -- process;
+
+query thread -- thread;
+
+lock -- semaphore;
+
+kernel -- kernel;
+
+query thread execution:
+(a) without lock_sys.latch
+reserved -- process executing in user mode;
+(b) with lock_sys.latch reserved
+ -- process executing in kernel mode;
+
+The server has several background threads all running at the same
+priority as user threads.
+
+The threads which we call user threads serve the queries of the MySQL
+server. They run at normal priority.
+
+When there is no activity in the system, also the master thread
+suspends itself to wait for an event making the server totally silent.
+
+There is still one complication in our server design. If a
+background utility thread obtains a resource (e.g., mutex) needed by a user
+thread, and there is also some other user activity in the system,
+the user thread may have to wait indefinitely long for the
+resource, as the OS does not schedule a background thread if
+there is some other runnable user thread. This problem is called
+priority inversion in real-time programming.
+
+One solution to the priority inversion problem would be to keep record
+of which thread owns which resource and in the above case boost the
+priority of the background thread so that it will be scheduled and it
+can release the resource. This solution is called priority inheritance
+in real-time programming. A drawback of this solution is that the overhead
+of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
+MHz Pentium, because the thread has to call pthread_self. This may
+be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
+that the thread cannot store the information in the resource , say mutex,
+itself, because competing threads could wipe out the information if it is
+stored before acquiring the mutex, and if it stored afterwards, the
+information is outdated for the time of one machine instruction, at least.
+(To be precise, the information could be stored to lock_word in mutex if
+the machine supports atomic swap.)
+
+The above solution with priority inheritance may become actual in the
+future, currently we do not implement any priority twiddling solution.
+Our general aim is to reduce the contention of all mutexes by making
+them more fine grained.
+
+The thread table contains information of the current status of each
+thread existing in the system, and also the event semaphores used in
+suspending the master thread and utility threads when they have nothing
+to do. The thread table can be seen as an analogue to the process table
+in a traditional Unix implementation. */
+
+/** The server system struct */
+struct srv_sys_t{
+ mysql_mutex_t tasks_mutex; /*!< variable protecting the
+ tasks queue */
+ UT_LIST_BASE_NODE_T(que_thr_t)
+ tasks; /*!< task queue */
+
+ srv_stats_t::ulint_ctr_1_t
+ activity_count; /*!< For tracking server
+ activity */
+};
+
+static srv_sys_t srv_sys;
+
+/*
+ Structure shared by timer and coordinator_callback.
+ No protection necessary since timer and task never run
+ in parallel (being in the same task group of size 1).
+*/
+struct purge_coordinator_state
+{
+ /** Snapshot of the last history length before the purge call.*/
+ size_t history_size;
+ Atomic_counter<int> m_running;
+public:
+ inline void do_purge();
+};
+
+static purge_coordinator_state purge_state;
+
+/** threadpool timer for srv_monitor_task() */
+std::unique_ptr<tpool::timer> srv_monitor_timer;
+
+
+/** The buffer pool dump/load file name */
+char* srv_buf_dump_filename;
+
+/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
+and/or load it during startup. */
+char srv_buffer_pool_dump_at_shutdown = TRUE;
+char srv_buffer_pool_load_at_startup = TRUE;
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Performance schema stage event for monitoring ALTER TABLE progress
+in ha_innobase::commit_inplace_alter_table(). */
+PSI_stage_info srv_stage_alter_table_end
+ = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_insert_index_tuples(). */
+PSI_stage_info srv_stage_alter_table_insert
+ = {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_apply(). */
+PSI_stage_info srv_stage_alter_table_log_index
+ = {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_table_apply(). */
+PSI_stage_info srv_stage_alter_table_log_table
+ = {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_sort(). */
+PSI_stage_info srv_stage_alter_table_merge_sort
+ = {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_read_clustered_index(). */
+PSI_stage_info srv_stage_alter_table_read_pk_internal_sort
+ = {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring buffer pool load progress. */
+PSI_stage_info srv_stage_buffer_pool_load
+ = {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS};
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+/*********************************************************************//**
+Prints counters for work done by srv_master_thread. */
+static
+void
+srv_print_master_thread_info(
+/*=========================*/
+ FILE *file) /* in: output stream */
+{
+ fprintf(file, "srv_master_thread loops: " ULINTPF " srv_active, "
+ ULINTPF " srv_shutdown, " ULINTPF " srv_idle\n"
+ "srv_master_thread log flush and writes: " ULINTPF "\n",
+ srv_main_active_loops,
+ srv_main_shutdown_loops,
+ srv_main_idle_loops,
+ srv_log_writes_and_flush);
+}
+
+static void thread_pool_thread_init()
+{
+ my_thread_init();
+ pfs_register_thread(thread_pool_thread_key);
+}
+static void thread_pool_thread_end()
+{
+ pfs_delete_thread();
+ my_thread_end();
+}
+
+
+void srv_thread_pool_init()
+{
+ DBUG_ASSERT(!srv_thread_pool);
+
+#if defined (_WIN32)
+ srv_thread_pool= tpool::create_thread_pool_win();
+#else
+ srv_thread_pool= tpool::create_thread_pool_generic();
+#endif
+ srv_thread_pool->set_thread_callbacks(thread_pool_thread_init,
+ thread_pool_thread_end);
+}
+
+
+void srv_thread_pool_end()
+{
+ ut_ad(!srv_master_timer);
+ delete srv_thread_pool;
+ srv_thread_pool= nullptr;
+}
+
+static bool need_srv_free;
+
+/** Initialize the server. */
+static void srv_init()
+{
+ mysql_mutex_init(srv_innodb_monitor_mutex_key,
+ &srv_innodb_monitor_mutex, nullptr);
+ mysql_mutex_init(srv_threads_mutex_key, &srv_sys.tasks_mutex, nullptr);
+ UT_LIST_INIT(srv_sys.tasks, &que_thr_t::queue);
+
+ need_srv_free = true;
+
+ mysql_mutex_init(page_zip_stat_per_index_mutex_key,
+ &page_zip_stat_per_index_mutex, nullptr);
+
+ /* Initialize some INFORMATION SCHEMA internal structures */
+ trx_i_s_cache_init(trx_i_s_cache);
+}
+
+/*********************************************************************//**
+Frees the data structures created in srv_init(). */
+void
+srv_free(void)
+/*==========*/
+{
+ if (!need_srv_free) {
+ return;
+ }
+
+ mysql_mutex_destroy(&srv_innodb_monitor_mutex);
+ mysql_mutex_destroy(&page_zip_stat_per_index_mutex);
+ mysql_mutex_destroy(&srv_sys.tasks_mutex);
+
+ trx_i_s_cache_free(trx_i_s_cache);
+ srv_thread_pool_end();
+}
+
+/*********************************************************************//**
+Boots the InnoDB server. */
+void srv_boot()
+{
+#ifndef NO_ELISION
+ if (transactional_lock_enabled())
+ sql_print_information("InnoDB: Using transactional memory");
+#endif
+ buf_dblwr.init();
+ srv_thread_pool_init();
+ trx_pool_init();
+ srv_init();
+}
+
+/******************************************************************//**
+Refreshes the values used to calculate per-second averages. */
+static void srv_refresh_innodb_monitor_stats(time_t current_time)
+{
+ mysql_mutex_lock(&srv_innodb_monitor_mutex);
+
+ if (difftime(current_time, srv_last_monitor_time) < 60) {
+ /* We refresh InnoDB Monitor values so that averages are
+ printed from at most 60 last seconds */
+ mysql_mutex_unlock(&srv_innodb_monitor_mutex);
+ return;
+ }
+
+ srv_last_monitor_time = current_time;
+
+ os_aio_refresh_stats();
+
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_cur_n_sea_old = btr_cur_n_sea;
+ btr_cur_n_non_sea_old = btr_cur_n_non_sea;
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ buf_refresh_io_stats();
+
+ mysql_mutex_unlock(&srv_innodb_monitor_mutex);
+}
+
+/******************************************************************//**
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
+ibool
+srv_printf_innodb_monitor(
+/*======================*/
+ FILE* file, /*!< in: output stream */
+ ibool nowait, /*!< in: whether to wait for lock_sys.latch */
+ ulint* trx_start_pos, /*!< out: file position of the start of
+ the list of active transactions */
+ ulint* trx_end) /*!< out: file position of the end of
+ the list of active transactions */
+{
+ double time_elapsed;
+ time_t current_time;
+ ibool ret;
+
+ mysql_mutex_lock(&srv_innodb_monitor_mutex);
+
+ current_time = time(NULL);
+
+ /* We add 0.001 seconds to time_elapsed to prevent division
+ by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
+ same time */
+
+ time_elapsed = difftime(current_time, srv_last_monitor_time)
+ + 0.001;
+
+ srv_last_monitor_time = time(NULL);
+
+ fputs("\n=====================================\n", file);
+
+ ut_print_timestamp(file);
+ fprintf(file,
+ " INNODB MONITOR OUTPUT\n"
+ "=====================================\n"
+ "Per second averages calculated from the last %lu seconds\n",
+ (ulong) time_elapsed);
+
+ fputs("-----------------\n"
+ "BACKGROUND THREAD\n"
+ "-----------------\n", file);
+ srv_print_master_thread_info(file);
+
+ /* This section is intentionally left blank, for tools like "innotop" */
+ fputs("----------\n"
+ "SEMAPHORES\n"
+ "----------\n", file);
+ /* End of intentionally blank section */
+
+ /* Conceptually, srv_innodb_monitor_mutex has a very high latching
+ order level, while dict_foreign_err_mutex has a very low level.
+ Therefore we can reserve the latter mutex here without
+ a danger of a deadlock of threads. */
+
+ mysql_mutex_lock(&dict_foreign_err_mutex);
+
+ if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
+ fputs("------------------------\n"
+ "LATEST FOREIGN KEY ERROR\n"
+ "------------------------\n", file);
+ ut_copy_file(file, dict_foreign_err_file);
+ }
+
+ mysql_mutex_unlock(&dict_foreign_err_mutex);
+
+ /* Only if lock_print_info_summary proceeds correctly,
+ before we call the lock_print_info_all_transactions
+ to print all the lock information. IMPORTANT NOTE: This
+ function acquires exclusive lock_sys.latch on success. */
+ ret = lock_print_info_summary(file, nowait);
+
+ if (ret) {
+ if (trx_start_pos) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_start_pos = ULINT_UNDEFINED;
+ } else {
+ *trx_start_pos = (ulint) t;
+ }
+ }
+
+ /* NOTE: The following function will release the lock_sys.latch
+ that lock_print_info_summary() acquired. */
+
+ lock_print_info_all_transactions(file);
+
+ if (trx_end) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_end = ULINT_UNDEFINED;
+ } else {
+ *trx_end = (ulint) t;
+ }
+ }
+ }
+
+ fputs("--------\n"
+ "FILE I/O\n"
+ "--------\n", file);
+ os_aio_print(file);
+
+ ibuf_print(file);
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (btr_search_enabled) {
+ fputs("-------------------\n"
+ "ADAPTIVE HASH INDEX\n"
+ "-------------------\n", file);
+ for (ulint i = 0; i < btr_ahi_parts; ++i) {
+ const auto part= &btr_search_sys.parts[i];
+ part->latch.rd_lock(SRW_LOCK_CALL);
+ ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH);
+ fprintf(file, "Hash table size " ULINTPF
+ ", node heap has " ULINTPF " buffer(s)\n",
+ part->table.n_cells,
+ part->heap->base.count
+ - !part->heap->free_block);
+ part->latch.rd_unlock();
+ }
+
+ const ulint with_ahi = btr_cur_n_sea;
+ const ulint without_ahi = btr_cur_n_non_sea;
+ fprintf(file,
+ "%.2f hash searches/s, %.2f non-hash searches/s\n",
+ static_cast<double>(with_ahi - btr_cur_n_sea_old)
+ / time_elapsed,
+ static_cast<double>(without_ahi - btr_cur_n_non_sea_old)
+ / time_elapsed);
+ btr_cur_n_sea_old = with_ahi;
+ btr_cur_n_non_sea_old = without_ahi;
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ fputs("---\n"
+ "LOG\n"
+ "---\n", file);
+ log_print(file);
+
+ fputs("----------------------\n"
+ "BUFFER POOL AND MEMORY\n"
+ "----------------------\n", file);
+ fprintf(file,
+ "Total large memory allocated " ULINTPF "\n"
+ "Dictionary memory allocated " ULINTPF "\n",
+ ulint{os_total_large_mem_allocated},
+ dict_sys.rough_size());
+
+ buf_print_io(file);
+
+ fputs("--------------\n"
+ "ROW OPERATIONS\n"
+ "--------------\n", file);
+ fprintf(file, ULINTPF " read views open inside InnoDB\n",
+ trx_sys.view_count());
+
+ if (ulint n_reserved = fil_system.sys_space->n_reserved_extents) {
+ fprintf(file,
+ ULINTPF " tablespace extents now reserved for"
+ " B-tree split operations\n",
+ n_reserved);
+ }
+
+ fprintf(file, "state: %s\n", srv_main_thread_op_info);
+
+ fputs("----------------------------\n"
+ "END OF INNODB MONITOR OUTPUT\n"
+ "============================\n", file);
+ mysql_mutex_unlock(&srv_innodb_monitor_mutex);
+ fflush(file);
+
+ return(ret);
+}
+
+/******************************************************************//**
+Function to pass InnoDB status variables to MySQL */
+void
+srv_export_innodb_status(void)
+/*==========================*/
+{
+ fil_crypt_stat_t crypt_stat;
+
+ if (!srv_read_only_mode) {
+ fil_crypt_total_stat(&crypt_stat);
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ export_vars.innodb_ahi_hit = btr_cur_n_sea;
+ export_vars.innodb_ahi_miss = btr_cur_n_non_sea;
+
+ ulint mem_adaptive_hash = 0;
+ for (ulong i = 0; i < btr_ahi_parts; i++) {
+ const auto part= &btr_search_sys.parts[i];
+ part->latch.rd_lock(SRW_LOCK_CALL);
+ if (part->heap) {
+ ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH);
+
+ mem_adaptive_hash += mem_heap_get_size(part->heap)
+ + part->table.n_cells * sizeof(hash_cell_t);
+ }
+ part->latch.rd_unlock();
+ }
+ export_vars.innodb_mem_adaptive_hash = mem_adaptive_hash;
+#endif
+
+ export_vars.innodb_mem_dictionary = dict_sys.rough_size();
+
+ mysql_mutex_lock(&srv_innodb_monitor_mutex);
+
+ export_vars.innodb_data_pending_reads =
+ ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS));
+
+ export_vars.innodb_data_pending_writes =
+ ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
+
+ export_vars.innodb_data_read = srv_stats.data_read;
+
+ export_vars.innodb_data_reads = os_n_file_reads;
+
+ export_vars.innodb_data_writes = os_n_file_writes;
+
+ buf_dblwr.lock();
+ ulint dblwr = buf_dblwr.written();
+ export_vars.innodb_dblwr_pages_written = dblwr;
+ export_vars.innodb_dblwr_writes = buf_dblwr.batches();
+ buf_dblwr.unlock();
+
+ export_vars.innodb_data_written = srv_stats.data_written
+ + (dblwr << srv_page_size_shift);
+
+ export_vars.innodb_buffer_pool_bytes_data =
+ buf_pool.stat.LRU_bytes
+ + (UT_LIST_GET_LEN(buf_pool.unzip_LRU)
+ << srv_page_size_shift);
+
+#ifdef UNIV_DEBUG
+ export_vars.innodb_buffer_pool_pages_latched =
+ buf_get_latched_pages_number();
+#endif /* UNIV_DEBUG */
+ export_vars.innodb_buffer_pool_pages_total = buf_pool.get_n_pages();
+
+ export_vars.innodb_buffer_pool_pages_misc =
+ buf_pool.get_n_pages()
+ - UT_LIST_GET_LEN(buf_pool.LRU)
+ - UT_LIST_GET_LEN(buf_pool.free);
+
+ export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id();
+ export_vars.innodb_history_list_length = trx_sys.history_size_approx();
+
+ mysql_mutex_lock(&lock_sys.wait_mutex);
+ export_vars.innodb_row_lock_waits = lock_sys.get_wait_cumulative();
+
+ export_vars.innodb_row_lock_current_waits= lock_sys.get_wait_pending();
+
+ export_vars.innodb_row_lock_time = lock_sys.get_wait_time_cumulative();
+ export_vars.innodb_row_lock_time_max = lock_sys.get_wait_time_max();
+
+ mysql_mutex_unlock(&lock_sys.wait_mutex);
+
+ export_vars.innodb_row_lock_time_avg= export_vars.innodb_row_lock_waits
+ ? static_cast<ulint>(export_vars.innodb_row_lock_time
+ / export_vars.innodb_row_lock_waits)
+ : 0;
+
+ export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
+ export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
+ export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
+ export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
+ export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error;
+ export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;
+ export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted;
+ export_vars.innodb_n_merge_blocks_encrypted = srv_stats.n_merge_blocks_encrypted;
+ export_vars.innodb_n_merge_blocks_decrypted = srv_stats.n_merge_blocks_decrypted;
+ export_vars.innodb_n_rowlog_blocks_encrypted = srv_stats.n_rowlog_blocks_encrypted;
+ export_vars.innodb_n_rowlog_blocks_decrypted = srv_stats.n_rowlog_blocks_decrypted;
+
+ export_vars.innodb_n_temp_blocks_encrypted =
+ srv_stats.n_temp_blocks_encrypted;
+
+ export_vars.innodb_n_temp_blocks_decrypted =
+ srv_stats.n_temp_blocks_decrypted;
+
+ export_vars.innodb_defragment_compression_failures =
+ btr_defragment_compression_failures;
+ export_vars.innodb_defragment_failures = btr_defragment_failures;
+ export_vars.innodb_defragment_count = btr_defragment_count;
+
+ export_vars.innodb_onlineddl_rowlog_rows = onlineddl_rowlog_rows;
+ export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used;
+ export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress;
+
+ if (!srv_read_only_mode) {
+ export_vars.innodb_encryption_rotation_pages_read_from_cache =
+ crypt_stat.pages_read_from_cache;
+ export_vars.innodb_encryption_rotation_pages_read_from_disk =
+ crypt_stat.pages_read_from_disk;
+ export_vars.innodb_encryption_rotation_pages_modified =
+ crypt_stat.pages_modified;
+ export_vars.innodb_encryption_rotation_pages_flushed =
+ crypt_stat.pages_flushed;
+ export_vars.innodb_encryption_rotation_estimated_iops =
+ crypt_stat.estimated_iops;
+ export_vars.innodb_encryption_key_requests =
+ srv_stats.n_key_requests;
+ }
+
+ mysql_mutex_unlock(&srv_innodb_monitor_mutex);
+
+ log_sys.latch.rd_lock(SRW_LOCK_CALL);
+ export_vars.innodb_lsn_current = log_sys.get_lsn();
+ export_vars.innodb_lsn_flushed = log_sys.get_flushed_lsn();
+ export_vars.innodb_lsn_last_checkpoint = log_sys.last_checkpoint_lsn;
+ export_vars.innodb_checkpoint_max_age = static_cast<ulint>(
+ log_sys.max_checkpoint_age);
+ log_sys.latch.rd_unlock();
+ export_vars.innodb_os_log_written = export_vars.innodb_lsn_current
+ - recv_sys.lsn;
+
+ export_vars.innodb_checkpoint_age = static_cast<ulint>(
+ export_vars.innodb_lsn_current
+ - export_vars.innodb_lsn_last_checkpoint);
+}
+
+struct srv_monitor_state_t
+{
+ time_t last_monitor_time;
+ ulint mutex_skipped;
+ bool last_srv_print_monitor;
+ srv_monitor_state_t() : mutex_skipped(0), last_srv_print_monitor(false)
+ {
+ srv_last_monitor_time = time(NULL);
+ last_monitor_time= srv_last_monitor_time;
+ }
+};
+
+static srv_monitor_state_t monitor_state;
+
+/** A task which prints the info output by various InnoDB monitors.*/
+static void srv_monitor()
+{
+ time_t current_time = time(NULL);
+
+ if (difftime(current_time, monitor_state.last_monitor_time) >= 15) {
+ monitor_state.last_monitor_time = current_time;
+
+ if (srv_print_innodb_monitor) {
+ /* Reset mutex_skipped counter everytime
+ srv_print_innodb_monitor changes. This is to
+ ensure we will not be blocked by lock_sys.latch
+ for short duration information printing */
+ if (!monitor_state.last_srv_print_monitor) {
+ monitor_state.mutex_skipped = 0;
+ monitor_state.last_srv_print_monitor = true;
+ }
+
+ if (!srv_printf_innodb_monitor(stderr,
+ MUTEX_NOWAIT(monitor_state.mutex_skipped),
+ NULL, NULL)) {
+ monitor_state.mutex_skipped++;
+ } else {
+ /* Reset the counter */
+ monitor_state.mutex_skipped = 0;
+ }
+ } else {
+ monitor_state.last_monitor_time = 0;
+ }
+
+
+ /* We don't create the temp files or associated
+ mutexes in read-only-mode */
+
+ if (!srv_read_only_mode && srv_innodb_status) {
+ mysql_mutex_lock(&srv_monitor_file_mutex);
+ rewind(srv_monitor_file);
+ if (!srv_printf_innodb_monitor(srv_monitor_file,
+ MUTEX_NOWAIT(monitor_state.mutex_skipped),
+ NULL, NULL)) {
+ monitor_state.mutex_skipped++;
+ } else {
+ monitor_state.mutex_skipped = 0;
+ }
+
+ os_file_set_eof(srv_monitor_file);
+ mysql_mutex_unlock(&srv_monitor_file_mutex);
+ }
+ }
+
+ srv_refresh_innodb_monitor_stats(current_time);
+}
+
+/** Periodic task which prints the info output by various InnoDB monitors.*/
+void srv_monitor_task(void*)
+{
+ /* number of successive fatal timeouts observed */
+ static lsn_t old_lsn = recv_sys.lsn;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* Try to track a strange bug reported by Harald Fuchs and others,
+ where the lsn seems to decrease at times */
+
+ lsn_t new_lsn = log_sys.get_lsn();
+ ut_a(new_lsn >= old_lsn);
+ old_lsn = new_lsn;
+
+ /* Update the statistics collected for deciding LRU
+ eviction policy. */
+ buf_LRU_stat_update();
+
+ ulonglong now = my_hrtime_coarse().val;
+ const ulong threshold = srv_fatal_semaphore_wait_threshold;
+
+ if (ulonglong start = dict_sys.oldest_wait()) {
+ if (now >= start) {
+ now -= start;
+ ulong waited = static_cast<ulong>(now / 1000000);
+ if (waited >= threshold) {
+ ib::fatal() << dict_sys.fatal_msg;
+ }
+
+ if (waited == threshold / 4
+ || waited == threshold / 2
+ || waited == threshold / 4 * 3) {
+ ib::warn() << "Long wait (" << waited
+ << " seconds) for dict_sys.latch";
+ }
+ }
+ }
+
+ srv_monitor();
+}
+
+/******************************************************************//**
+Increment the server activity count. */
+void
+srv_inc_activity_count(void)
+/*========================*/
+{
+ srv_sys.activity_count.inc();
+}
+
+#ifdef UNIV_DEBUG
+/** @return whether purge or master task is active */
+bool srv_any_background_activity()
+{
+ if (purge_sys.enabled() || srv_master_timer.get())
+ {
+ ut_ad(!srv_read_only_mode);
+ return true;
+ }
+ return false;
+}
+#endif /* UNIV_DEBUG */
+
+static void purge_worker_callback(void*);
+static void purge_coordinator_callback(void*);
+static void purge_truncation_callback(void*)
+{
+ purge_sys.latch.rd_lock(SRW_LOCK_CALL);
+ const purge_sys_t::iterator head= purge_sys.head;
+ purge_sys.latch.rd_unlock();
+ head.free_history();
+}
+
+static tpool::task_group purge_task_group;
+tpool::waitable_task purge_worker_task(purge_worker_callback, nullptr,
+ &purge_task_group);
+static tpool::task_group purge_coordinator_task_group(1);
+static tpool::waitable_task purge_coordinator_task
+ (purge_coordinator_callback, nullptr, &purge_coordinator_task_group);
+static tpool::task_group purge_truncation_task_group(1);
+static tpool::waitable_task purge_truncation_task
+ (purge_truncation_callback, nullptr, &purge_truncation_task_group);
+
+/** Wake up the purge threads if there is work to do. */
+void purge_sys_t::wake_if_not_active()
+{
+ if (enabled() && !paused() && !purge_state.m_running &&
+ (srv_undo_log_truncate || trx_sys.history_exists()) &&
+ ++purge_state.m_running == 1)
+ srv_thread_pool->submit_task(&purge_coordinator_task);
+}
+
+/** @return whether the purge tasks are active */
+bool purge_sys_t::running()
+{
+ return purge_coordinator_task.is_running();
+}
+
+void purge_sys_t::stop_FTS()
+{
+ latch.rd_lock(SRW_LOCK_CALL);
+ m_FTS_paused++;
+ latch.rd_unlock();
+ while (m_active)
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+}
+
+/** Stop purge during FLUSH TABLES FOR EXPORT */
+void purge_sys_t::stop()
+{
+ latch.wr_lock(SRW_LOCK_CALL);
+
+ if (!enabled())
+ {
+ /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ latch.wr_unlock();
+ return;
+ }
+
+ ut_ad(srv_n_purge_threads > 0);
+
+ const auto paused= m_paused++;
+
+ latch.wr_unlock();
+
+ if (!paused)
+ {
+ ib::info() << "Stopping purge";
+ MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT);
+ purge_coordinator_task.disable();
+ }
+}
+
+/** Resume purge in data dictionary tables */
+void purge_sys_t::resume_SYS(void *)
+{
+ ut_d(auto paused=) purge_sys.m_SYS_paused--;
+ ut_ad(paused);
+}
+
+/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
+void purge_sys_t::resume()
+{
+ if (!enabled())
+ {
+ /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ return;
+ }
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+ purge_coordinator_task.enable();
+ latch.wr_lock(SRW_LOCK_CALL);
+ int32_t paused= m_paused--;
+ ut_a(paused);
+
+ if (paused == 1)
+ {
+ ib::info() << "Resuming purge";
+ purge_state.m_running= 1;
+ srv_thread_pool->submit_task(&purge_coordinator_task);
+ MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT);
+ }
+ latch.wr_unlock();
+}
+
+/*******************************************************************//**
+Get current server activity count.
+@return activity count. */
+ulint
+srv_get_activity_count(void)
+/*========================*/
+{
+ return(srv_sys.activity_count);
+}
+
+/** Check if srv_inc_activity_count() has been called.
+@param activity_count copy of srv_sys.activity_count
+@return whether the activity_count had changed */
+static bool srv_check_activity(ulint *activity_count)
+{
+ ulint new_activity_count= srv_sys.activity_count;
+ if (new_activity_count != *activity_count)
+ {
+ *activity_count= new_activity_count;
+ return true;
+ }
+
+ return false;
+}
+
+/********************************************************************//**
+The master thread is tasked to ensure that flush of log file happens
+once every second in the background. This is to ensure that not more
+than one second of trxs are lost in case of crash when
+innodb_flush_logs_at_trx_commit != 1 */
+static void srv_sync_log_buffer_in_background()
+{
+ time_t current_time = time(NULL);
+
+ srv_main_thread_op_info = "flushing log";
+ if (difftime(current_time, srv_last_log_flush_time)
+ >= srv_flush_log_at_timeout) {
+ log_buffer_flush_to_disk();
+ srv_last_log_flush_time = current_time;
+ srv_log_writes_and_flush++;
+ }
+}
+
+/** Report progress during shutdown.
+@param last time of last output
+@param n_read number of page reads initiated for change buffer merge */
+static void srv_shutdown_print(time_t &last, ulint n_read)
+{
+ time_t now= time(nullptr);
+ if (now - last >= 15)
+ {
+ last= now;
+
+ const ulint ibuf_size= ibuf.size;
+ sql_print_information("Completing change buffer merge;"
+ " %zu page reads initiated;"
+ " %zu change buffer pages remain",
+ n_read, ibuf_size);
+#if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY
+ service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "Completing change buffer merge;"
+ " %zu page reads initiated;"
+ " %zu change buffer pages remain",
+ n_read, ibuf_size);
+#endif
+ }
+}
+
+/** Perform periodic tasks whenever the server is active.
+@param counter_time microsecond_interval_timer() */
+static void srv_master_do_active_tasks(ulonglong counter_time)
+{
+ ++srv_main_active_loops;
+
+ MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
+
+ if (!(counter_time % (47 * 1000000ULL))) {
+ srv_main_thread_op_info = "enforcing dict cache limit";
+ if (ulint n_evicted = dict_sys.evict_table_LRU(true)) {
+ MONITOR_INC_VALUE(
+ MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE,
+ n_evicted);
+ }
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
+ }
+}
+
+/** Perform periodic tasks whenever the server is idle.
+@param counter_time microsecond_interval_timer() */
+static void srv_master_do_idle_tasks(ulonglong counter_time)
+{
+ ++srv_main_idle_loops;
+
+ MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
+
+ srv_main_thread_op_info = "enforcing dict cache limit";
+ if (ulint n_evicted = dict_sys.evict_table_LRU(false)) {
+ MONITOR_INC_VALUE(
+ MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted);
+ }
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
+}
+
+/**
+Complete the shutdown tasks such as background DROP TABLE,
+and optionally change buffer merge (on innodb_fast_shutdown=0). */
+void srv_shutdown(bool ibuf_merge)
+{
+ ulint n_read = 0;
+ time_t now = time(NULL);
+
+ do {
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
+ ++srv_main_shutdown_loops;
+
+ if (ibuf_merge) {
+ srv_main_thread_op_info = "doing insert buffer merge";
+ /* Disallow the use of change buffer to
+ avoid a race condition with
+ ibuf_read_merge_pages() */
+ ibuf_max_size_update(0);
+ log_free_check();
+ n_read = ibuf_contract();
+ srv_shutdown_print(now, n_read);
+ }
+ } while (n_read);
+}
+
+/** The periodic master task controlling the server. */
+void srv_master_callback(void*)
+{
+ static ulint old_activity_count;
+
+ ut_a(srv_shutdown_state <= SRV_SHUTDOWN_INITIATED);
+
+ MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
+ purge_sys.wake_if_not_active();
+ ulonglong counter_time= microsecond_interval_timer();
+ srv_sync_log_buffer_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_LOG_FLUSH_MICROSECOND,
+ counter_time);
+
+ if (srv_check_activity(&old_activity_count))
+ srv_master_do_active_tasks(counter_time);
+ else
+ srv_master_do_idle_tasks(counter_time);
+
+ srv_main_thread_op_info= "sleeping";
+}
+
+/** @return whether purge should exit due to shutdown */
+static bool srv_purge_should_exit(size_t old_history_size)
+{
+ ut_ad(srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP);
+
+ if (srv_undo_sources)
+ return false;
+
+ if (srv_fast_shutdown)
+ return true;
+
+ /* Slow shutdown was requested. */
+ size_t prepared, active= trx_sys.any_active_transactions(&prepared);
+ const size_t history_size= trx_sys.history_size();
+
+ if (!history_size);
+ else if (!active && history_size == old_history_size && prepared);
+ else
+ {
+ static time_t progress_time;
+ time_t now= time(NULL);
+ if (now - progress_time >= 15)
+ {
+ progress_time= now;
+#if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY
+ service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "InnoDB: to purge %zu transactions",
+ history_size);
+ sql_print_information("InnoDB: to purge %zu transactions", history_size);
+#endif
+ }
+ return false;
+ }
+
+ return !active;
+}
+
+/*********************************************************************//**
+Fetch and execute a task from the work queue.
+@param [in,out] slot purge worker thread slot
+@return true if a task was executed */
+static bool srv_task_execute()
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+
+ mysql_mutex_lock(&srv_sys.tasks_mutex);
+
+ if (que_thr_t* thr = UT_LIST_GET_FIRST(srv_sys.tasks)) {
+ ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
+ UT_LIST_REMOVE(srv_sys.tasks, thr);
+ mysql_mutex_unlock(&srv_sys.tasks_mutex);
+ que_run_threads(thr);
+ return true;
+ }
+
+ ut_ad(UT_LIST_GET_LEN(srv_sys.tasks) == 0);
+ mysql_mutex_unlock(&srv_sys.tasks_mutex);
+ return false;
+}
+
+static void purge_create_background_thds(int );
+
+/** Flag which is set, whenever innodb_purge_threads changes. */
+static Atomic_relaxed<bool> srv_purge_thread_count_changed;
+
+static std::mutex purge_thread_count_mtx;
+void srv_update_purge_thread_count(uint n)
+{
+ std::lock_guard<std::mutex> lk(purge_thread_count_mtx);
+ ut_ad(n > 0);
+ ut_ad(n <= innodb_purge_threads_MAX);
+ srv_n_purge_threads = n;
+ srv_purge_thread_count_changed = true;
+}
+
+inline void purge_coordinator_state::do_purge()
+{
+ ut_ad(!srv_read_only_mode);
+
+ if (!purge_sys.enabled() || purge_sys.paused())
+ return;
+
+ uint n_threads;
+
+ {
+ std::lock_guard<std::mutex> lk(purge_thread_count_mtx);
+ n_threads= srv_n_purge_threads;
+ srv_purge_thread_count_changed= false;
+ goto first_loop;
+ }
+
+ do
+ {
+ if (UNIV_UNLIKELY(srv_purge_thread_count_changed))
+ {
+ /* Read the fresh value of srv_n_purge_threads, reset
+ the changed flag. Both are protected by purge_thread_count_mtx. */
+ {
+ std::lock_guard<std::mutex> lk(purge_thread_count_mtx);
+ n_threads= srv_n_purge_threads;
+ srv_purge_thread_count_changed= false;
+ }
+ }
+ first_loop:
+ ut_ad(n_threads);
+
+ history_size= trx_sys.history_size();
+
+ if (!history_size)
+ {
+ no_history:
+ srv_dml_needed_delay= 0;
+ purge_truncation_task.wait();
+ trx_purge_truncate_history();
+ break;
+ }
+
+ ulint n_pages_handled= trx_purge(n_threads, history_size);
+ if (!trx_sys.history_exists())
+ goto no_history;
+ if (purge_sys.truncate.current || srv_shutdown_state != SRV_SHUTDOWN_NONE)
+ {
+ purge_truncation_task.wait();
+ trx_purge_truncate_history();
+ }
+ else
+ srv_thread_pool->submit_task(&purge_truncation_task);
+ if (!n_pages_handled)
+ break;
+ }
+ while (purge_sys.enabled() && !purge_sys.paused() &&
+ !srv_purge_should_exit(history_size));
+
+ m_running= 0;
+}
+
+static std::list<THD*> purge_thds;
+static std::mutex purge_thd_mutex;
+extern void* thd_attach_thd(THD*);
+extern void thd_detach_thd(void *);
+static int n_purge_thds;
+
+/* Ensure that we have at least n background THDs for purge */
+static void purge_create_background_thds(int n)
+{
+ THD *thd= current_thd;
+ std::unique_lock<std::mutex> lk(purge_thd_mutex);
+ while (n_purge_thds < n)
+ {
+ purge_thds.push_back(innobase_create_background_thd("InnoDB purge worker"));
+ n_purge_thds++;
+ }
+ set_current_thd(thd);
+}
+
+static THD *acquire_thd(void **ctx)
+{
+ std::unique_lock<std::mutex> lk(purge_thd_mutex);
+ ut_a(!purge_thds.empty());
+ THD* thd = purge_thds.front();
+ purge_thds.pop_front();
+ lk.unlock();
+
+ /* Set current thd, and thd->mysys_var as well,
+ it might be used by something in the server.*/
+ *ctx = thd_attach_thd(thd);
+ return thd;
+}
+
+static void release_thd(THD *thd, void *ctx)
+{
+ thd_detach_thd(ctx);
+ std::unique_lock<std::mutex> lk(purge_thd_mutex);
+ purge_thds.push_back(thd);
+ lk.unlock();
+ set_current_thd(0);
+}
+
+static void purge_worker_callback(void*)
+{
+ ut_ad(!current_thd);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+ void *ctx;
+ THD *thd= acquire_thd(&ctx);
+ while (srv_task_execute())
+ ut_ad(purge_sys.running());
+ release_thd(thd,ctx);
+}
+
+static void purge_coordinator_callback(void*)
+{
+ void *ctx;
+ THD *thd= acquire_thd(&ctx);
+ purge_state.do_purge();
+ release_thd(thd, ctx);
+}
+
+void srv_init_purge_tasks()
+{
+ purge_create_background_thds(innodb_purge_threads_MAX);
+ purge_sys.coordinator_startup();
+}
+
+static void srv_shutdown_purge_tasks()
+{
+ purge_coordinator_task.disable();
+ purge_worker_task.wait();
+ std::unique_lock<std::mutex> lk(purge_thd_mutex);
+ while (!purge_thds.empty())
+ {
+ destroy_background_thd(purge_thds.front());
+ purge_thds.pop_front();
+ }
+ n_purge_thds= 0;
+ purge_truncation_task.wait();
+}
+
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ut_ad(!srv_read_only_mode);
+ mysql_mutex_lock(&srv_sys.tasks_mutex);
+
+ UT_LIST_ADD_LAST(srv_sys.tasks, thr);
+
+ mysql_mutex_unlock(&srv_sys.tasks_mutex);
+}
+
+#ifdef UNIV_DEBUG
+/** @return number of tasks in queue */
+ulint srv_get_task_queue_length()
+{
+ ulint n_tasks;
+
+ ut_ad(!srv_read_only_mode);
+
+ mysql_mutex_lock(&srv_sys.tasks_mutex);
+
+ n_tasks = UT_LIST_GET_LEN(srv_sys.tasks);
+
+ mysql_mutex_unlock(&srv_sys.tasks_mutex);
+
+ return(n_tasks);
+}
+#endif
+
+/** Shut down the purge threads. */
+void srv_purge_shutdown()
+{
+ if (purge_sys.enabled())
+ {
+ if (!srv_fast_shutdown && !opt_bootstrap)
+ {
+ srv_purge_batch_size= innodb_purge_batch_size_MAX;
+ srv_update_purge_thread_count(innodb_purge_threads_MAX);
+ }
+ size_t history_size= trx_sys.history_size();
+ while (!srv_purge_should_exit(history_size))
+ {
+ history_size= trx_sys.history_size();
+ ut_a(!purge_sys.paused());
+ srv_thread_pool->submit_task(&purge_coordinator_task);
+ purge_coordinator_task.wait();
+ }
+ purge_sys.coordinator_shutdown();
+ srv_shutdown_purge_tasks();
+ }
+}
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
new file mode 100644
index 00000000..ef5bcb67
--- /dev/null
+++ b/storage/innobase/srv/srv0start.cc
@@ -0,0 +1,2101 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2022, MariaDB Corporation.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file srv/srv0start.cc
+Starts the InnoDB database server
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "my_global.h"
+
+#include "mysqld.h"
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
+#include "row0ftsort.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "buf0dblwr.h"
+#include "buf0dump.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "fil0crypt.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "mtr0mtr.h"
+#include "log0crypt.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "trx0sys.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "rem0rec.h"
+#include "ibuf0ibuf.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "btr0defragment.h"
+#include "mysql/service_wsrep.h" /* wsrep_recovery */
+#include "trx0rseg.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "dict0boot.h"
+#include "dict0load.h"
+#include "dict0stats_bg.h"
+#include "que0que.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "trx0purge.h"
+#include "lock0lock.h"
+#include "pars0pars.h"
+#include "btr0sea.h"
+#include "rem0cmp.h"
+#include "dict0crea.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "row0row.h"
+#include "row0mysql.h"
+#include "btr0pcur.h"
+#include "zlib.h"
+#include "log.h"
+
+/** We are prepared for a situation that we have this many threads waiting for
+a transactional lock inside InnoDB. srv_start() sets the value. */
+ulint srv_max_n_threads;
+
+/** Log sequence number at shutdown */
+lsn_t srv_shutdown_lsn;
+
+/** TRUE if a raw partition is in use */
+ibool srv_start_raw_disk_in_use;
+
+/** UNDO tablespaces starts with space id. */
+uint32_t srv_undo_space_id_start;
+
+/** TRUE if the server is being started, before rolling back any
+incomplete transactions */
+bool srv_startup_is_before_trx_rollback_phase;
+/** TRUE if the server is being started */
+bool srv_is_being_started;
+/** TRUE if the server was successfully started */
+bool srv_was_started;
+/** whether srv_start() has been called */
+static bool srv_start_has_been_called;
+
+/** Whether any undo log records can be generated */
+bool srv_undo_sources;
+
+/** innodb_encrypt_log */
+my_bool srv_encrypt_log;
+
+#ifdef UNIV_DEBUG
+/** InnoDB system tablespace to set during recovery */
+uint srv_sys_space_size_debug;
+/** whether redo log file have been created at startup */
+bool srv_log_file_created;
+#endif /* UNIV_DEBUG */
+
+/** whether some background threads that create redo log have been started */
+static bool srv_started_redo;
+
+/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
+SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
+enum srv_shutdown_t srv_shutdown_state = SRV_SHUTDOWN_NONE;
+
+/** Name of srv_monitor_file */
+static char* srv_monitor_file_name;
+std::unique_ptr<tpool::timer> srv_master_timer;
+
+/** */
+#define SRV_MAX_N_PENDING_SYNC_IOS 100
+
+#ifdef UNIV_PFS_THREAD
+/* Keys to register InnoDB threads with performance schema */
+mysql_pfs_key_t thread_pool_thread_key;
+#endif /* UNIV_PFS_THREAD */
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Array of all InnoDB stage events for monitoring activities via
+performance schema. */
+static PSI_stage_info* srv_stages[] =
+{
+ &srv_stage_alter_table_end,
+ &srv_stage_alter_table_insert,
+ &srv_stage_alter_table_log_index,
+ &srv_stage_alter_table_log_table,
+ &srv_stage_alter_table_merge_sort,
+ &srv_stage_alter_table_read_pk_internal_sort,
+ &srv_stage_buffer_pool_load,
+};
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+/** Delete any garbage log files */
+static void delete_log_files()
+{
+ for (size_t i= 1; i < 102; i++)
+ delete_log_file(std::to_string(i).c_str());
+}
+
+/** Creates log file.
+@param create_new_db whether the database is being initialized
+@param lsn log sequence number
+@param logfile0 name of the log file
+@return DB_SUCCESS or error code */
+static dberr_t create_log_file(bool create_new_db, lsn_t lsn)
+{
+ ut_ad(!srv_read_only_mode);
+
+ /* We will retain ib_logfile0 until we have written a new logically
+ empty log as ib_logfile101 and atomically renamed it to
+ ib_logfile0 in log_t::rename_resized(). */
+ delete_log_files();
+
+ ut_ad(!os_aio_pending_reads());
+ ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex));
+ ut_ad(!buf_pool.get_oldest_modification(0));
+ ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex));
+ /* os_aio_pending_writes() may hold here if some
+ write_io_callback() did not release the slot yet. However,
+ the page write itself must have completed, because the
+ buf_pool.flush_list is empty. In debug builds, we wait for
+ this to happen, hoping to get a hung process if this
+ assumption does not hold. */
+ ut_d(os_aio_wait_until_no_pending_writes(false));
+
+ log_sys.latch.wr_lock(SRW_LOCK_CALL);
+ log_sys.set_capacity();
+
+ std::string logfile0{get_log_file_path("ib_logfile101")};
+ bool ret;
+ os_file_t file{
+ os_file_create_func(logfile0.c_str(),
+ OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
+ OS_FILE_NORMAL, OS_LOG_FILE, false, &ret)
+ };
+
+ if (!ret) {
+ sql_print_error("InnoDB: Cannot create %.*s",
+ int(logfile0.size()), logfile0.data());
+err_exit:
+ log_sys.latch.wr_unlock();
+ return DB_ERROR;
+ }
+
+ ret = os_file_set_size(logfile0.c_str(), file, srv_log_file_size);
+ if (!ret) {
+ ib::error() << "Cannot set log file " << logfile0
+ << " size to " << ib::bytes_iec{srv_log_file_size};
+close_and_exit:
+ os_file_close_func(file);
+ goto err_exit;
+ }
+
+ log_sys.set_latest_format(srv_encrypt_log);
+ if (!log_sys.attach(file, srv_log_file_size)) {
+ goto close_and_exit;
+ }
+ if (!fil_system.sys_space->open(create_new_db)) {
+ goto err_exit;
+ }
+
+ /* Create a log checkpoint. */
+ if (log_sys.is_encrypted() && !log_crypt_init()) {
+ goto err_exit;
+ }
+ ut_d(recv_no_log_write = false);
+ log_sys.create(lsn);
+
+ ut_ad(srv_startup_is_before_trx_rollback_phase);
+ if (create_new_db) {
+ srv_startup_is_before_trx_rollback_phase = false;
+ }
+
+ /* Enable checkpoints in buf_flush_page_cleaner(). */
+ recv_sys.recovery_on = false;
+ log_sys.latch.wr_unlock();
+
+ log_make_checkpoint();
+ log_buffer_flush_to_disk();
+
+ return DB_SUCCESS;
+}
+
+/** Rename the redo log file after resizing.
+@return whether an error occurred */
+bool log_t::resize_rename() noexcept
+{
+ std::string old_name{get_log_file_path("ib_logfile101")};
+ std::string new_name{get_log_file_path()};
+
+ if (IF_WIN(MoveFileEx(old_name.c_str(), new_name.c_str(),
+ MOVEFILE_REPLACE_EXISTING),
+ !rename(old_name.c_str(), new_name.c_str())))
+ return false;
+
+ sql_print_error("InnoDB: Failed to rename log from %.*s to %.*s (error %d)",
+ int(old_name.size()), old_name.data(),
+ int(new_name.size()), new_name.data(),
+ IF_WIN(int(GetLastError()), errno));
+ return true;
+}
+
+/** Create an undo tablespace file
+@param[in] name file name
+@return DB_SUCCESS or error code */
+static dberr_t srv_undo_tablespace_create(const char* name)
+{
+ pfs_os_file_t fh;
+ bool ret;
+ dberr_t err = DB_SUCCESS;
+
+ os_file_create_subdirs_if_needed(name);
+
+ fh = os_file_create(
+ innodb_data_file_key,
+ name,
+ srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
+ OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret);
+
+ if (!ret) {
+ if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
+#ifdef _AIX
+ /* AIX 5.1 after security patch ML7 may have
+ errno set to 0 here, which causes our function
+ to return 100; work around that AIX problem */
+ && os_file_get_last_error(false) != 100
+#endif
+ ) {
+ ib::error() << "Can't create UNDO tablespace "
+ << name;
+ }
+ err = DB_ERROR;
+ } else if (srv_read_only_mode) {
+ ib::info() << name << " opened in read-only mode";
+ } else {
+ /* We created the data file and now write it full of zeros */
+
+ ib::info() << "Data file " << name << " did not exist: new to"
+ " be created";
+
+ ib::info() << "Setting file " << name << " size to "
+ << ib::bytes_iec{SRV_UNDO_TABLESPACE_SIZE_IN_PAGES
+ << srv_page_size_shift};
+
+ ib::info() << "Database physically writes the file full: "
+ << "wait...";
+
+ if (!os_file_set_size(name, fh, os_offset_t
+ {SRV_UNDO_TABLESPACE_SIZE_IN_PAGES}
+ << srv_page_size_shift)) {
+ ib::error() << "Unable to allocate " << name;
+ err = DB_ERROR;
+ }
+
+ os_file_close(fh);
+ }
+
+ return(err);
+}
+
+inline dberr_t trx_sys_t::reset_page(mtr_t *mtr)
+{
+ dberr_t err= DB_SUCCESS;
+ buf_block_t *sys_header= buf_page_get_gen(
+ page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), 0, RW_X_LATCH, nullptr,
+ BUF_GET, mtr, &err);
+
+ if (!sys_header) return err;
+
+ const bool dblwr_enabled=
+ mach_read_from_4(TRX_SYS_DOUBLEWRITE_MAGIC + TRX_SYS_DOUBLEWRITE +
+ sys_header->page.frame)
+ == TRX_SYS_DOUBLEWRITE_MAGIC_N;
+
+ char doublewrite[TRX_SYS_DOUBLEWRITE_BLOCK2 + 4];
+ memcpy(doublewrite, TRX_SYS_DOUBLEWRITE + sys_header->page.frame,
+ sizeof doublewrite);
+
+ fsp_init_file_page(fil_system.sys_space, sys_header, mtr);
+
+ mtr->write<2>(*sys_header, FIL_PAGE_TYPE + sys_header->page.frame,
+ FIL_PAGE_TYPE_TRX_SYS);
+
+ mtr->write<4>(*sys_header,
+ TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO +
+ sys_header->page.frame, FSP_FIRST_RSEG_PAGE_NO);
+ mtr->memset(sys_header,
+ TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SLOT_SIZE,
+ 254 * TRX_SYS_RSEG_SLOT_SIZE, 0xff);
+
+ static_assert(TRX_SYS_RSEG_SLOT_SIZE == 8, "");
+
+ if (dblwr_enabled)
+ {
+ mtr->memcpy(
+ *sys_header, sys_header->page.frame + TRX_SYS_DOUBLEWRITE,
+ doublewrite, sizeof doublewrite);
+ mtr->memmove(
+ *sys_header,
+ TRX_SYS_DOUBLEWRITE + FSEG_HEADER_SIZE + TRX_SYS_DOUBLEWRITE_REPEAT,
+ TRX_SYS_DOUBLEWRITE + FSEG_HEADER_SIZE, 12);
+ memcpy(
+ sys_header->page.frame + TRX_SYS_DOUBLEWRITE
+ + FSEG_HEADER_SIZE + TRX_SYS_DOUBLEWRITE_REPEAT,
+ sys_header->page.frame + TRX_SYS_DOUBLEWRITE + FSEG_HEADER_SIZE, 12);
+ }
+
+ return DB_SUCCESS;
+}
+
+/** Delete the old undo tablespaces present in the undo log directory */
+static dberr_t srv_undo_delete_old_tablespaces()
+{
+ /* Delete the old undo tablespaces*/
+ for (uint32_t i= 0; i < srv_undo_tablespaces_open; ++i)
+ fil_close_tablespace(srv_undo_space_id_start + i);
+
+ DBUG_EXECUTE_IF("after_deleting_old_undo_abort", return DB_ERROR;);
+
+ /* Do checkpoint to get rid of old undo log tablespaces redo logs */
+ log_make_checkpoint();
+
+ DBUG_EXECUTE_IF("after_deleting_old_undo_success", return DB_ERROR;);
+
+ return DB_SUCCESS;
+}
+
+/** Recreate the undo log tablespaces */
+ATTRIBUTE_COLD static dberr_t srv_undo_tablespaces_reinit()
+{
+ mtr_t mtr;
+ dberr_t err;
+ buf_block_t *first_rseg_hdr;
+ uint32_t latest_space_id;
+
+ mtr.start();
+
+ buf_block_t *dict_hdr= buf_page_get_gen(
+ page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO), 0, RW_X_LATCH,
+ nullptr, BUF_GET, &mtr, &err);
+
+ if (!dict_hdr)
+ goto func_exit;
+
+ /* Assign the new space id for the first undo tablespace */
+ latest_space_id= mach_read_from_4(
+ DICT_HDR + DICT_HDR_MAX_SPACE_ID + dict_hdr->page.frame);
+
+ if (latest_space_id + srv_undo_tablespaces > SRV_SPACE_ID_UPPER_BOUND)
+ {
+ err= DB_ERROR;
+ sql_print_error("InnoDB: Running out of tablespace id");
+ goto func_exit;
+ }
+
+ first_rseg_hdr=
+ buf_page_get_gen(trx_sys.rseg_array[0].page_id(), 0, RW_X_LATCH,
+ nullptr, BUF_GET, &mtr, &err);
+ if (!first_rseg_hdr)
+ goto func_exit;
+
+ if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT +
+ first_rseg_hdr->page.frame)))
+ trx_rseg_format_upgrade(first_rseg_hdr, &mtr);
+
+ mtr.write<8,mtr_t::MAYBE_NOP>(*first_rseg_hdr,
+ TRX_RSEG + TRX_RSEG_MAX_TRX_ID +
+ first_rseg_hdr->page.frame,
+ trx_sys.get_max_trx_id() - 1);
+
+ /* Reset TRX_SYS page */
+ err= trx_sys.reset_page(&mtr);
+
+ if (err)
+ goto func_exit;
+
+ if (srv_undo_tablespaces_open == 0)
+ {
+ /* Free the system rollback segment */
+ for (ulint i= 1; i < TRX_SYS_N_RSEGS; i++)
+ {
+ trx_rseg_t *rseg= &trx_sys.rseg_array[i];
+ if (rseg->space != fil_system.sys_space)
+ continue;
+ buf_block_t *block= buf_page_get_gen(
+ rseg->page_id(), 0, RW_X_LATCH, nullptr, BUF_GET, &mtr);
+ if (!block) break;
+ while (!fseg_free_step(TRX_RSEG + TRX_RSEG_FSEG_HEADER +
+ block->page.frame, &mtr));
+ }
+ }
+
+ for (ulint rseg_id= 1; rseg_id < TRX_SYS_N_RSEGS; rseg_id++)
+ {
+ trx_rseg_t *rseg= &trx_sys.rseg_array[rseg_id];
+ rseg->destroy();
+ rseg->init(nullptr, FIL_NULL);
+ }
+
+ if (trx_sys.recovered_binlog_lsn
+#ifdef WITH_WSREP
+ || !trx_sys.recovered_wsrep_xid.is_null()
+#endif /* WITH_WSREP */
+ )
+ {
+ /* Update binlog offset, binlog file name & wsrep xid in
+ system tablespace rollback segment */
+ if (trx_sys.recovered_binlog_lsn)
+ {
+ ut_d(const size_t len = strlen(trx_sys.recovered_binlog_filename) + 1);
+ ut_ad(len > 1);
+ ut_ad(len <= TRX_RSEG_BINLOG_NAME_LEN);
+ trx_rseg_update_binlog_offset(
+ first_rseg_hdr, trx_sys.recovered_binlog_filename,
+ trx_sys.recovered_binlog_offset, &mtr);
+ }
+
+#ifdef WITH_WSREP
+ if (!trx_sys.recovered_wsrep_xid.is_null())
+ trx_rseg_update_wsrep_checkpoint(
+ first_rseg_hdr, &trx_sys.recovered_wsrep_xid, &mtr);
+#endif /* WITH_WSREP */
+ }
+
+ dict_hdr->page.fix();
+
+ mtr.commit();
+
+ DBUG_EXECUTE_IF("after_rseg_reset_abort",
+ log_write_up_to(mtr.commit_lsn(), true);
+ dict_hdr->page.unfix();
+ return DB_ERROR;);
+
+ sql_print_information(
+ "InnoDB: Reinitializing innodb_undo_tablespaces= %u from %u",
+ srv_undo_tablespaces, srv_undo_tablespaces_open);
+
+ /* Delete the old undo tablespaces */
+ err= srv_undo_delete_old_tablespaces();
+ if (err)
+ {
+ dict_hdr->page.unfix();
+ return err;
+ }
+
+ mtr.start();
+
+ dict_hdr->page.lock.x_lock();
+ mtr.memo_push(dict_hdr, MTR_MEMO_PAGE_X_FIX);
+
+ if (srv_undo_tablespaces == 0)
+ {
+ srv_undo_space_id_start= 0;
+ srv_undo_tablespaces_open= 0;
+ goto func_exit;
+ }
+
+ srv_undo_space_id_start= latest_space_id;
+ if (fil_assign_new_space_id(&srv_undo_space_id_start))
+ mtr.write<4>(*dict_hdr, DICT_HDR + DICT_HDR_MAX_SPACE_ID
+ + dict_hdr->page.frame, srv_undo_space_id_start);
+
+ /* Re-create the new undo tablespaces */
+ err= srv_undo_tablespaces_init(true, &mtr);
+func_exit:
+ mtr.commit();
+
+ DBUG_EXECUTE_IF("after_reinit_undo_abort",
+ log_write_up_to(mtr.commit_lsn(), true);
+ err= DB_ERROR;);
+
+ if (err == DB_SUCCESS)
+ {
+ /* Usually, recovery must work no matter when
+ log_checkpoints are triggered. This is a special case,
+ because this code is executed as part of InnoDB startup.
+ Backup requires that the server has been started up,
+ backup should never observe the log records that
+ were written in mtr and also srv_undo_tablespaces_init()
+ initializes the undo tablespace start id based on page0
+ content before reading the redo log */
+ log_make_checkpoint();
+
+ DBUG_EXECUTE_IF("after_reinit_undo_success", err= DB_ERROR;);
+ srv_undo_tablespaces_active= srv_undo_tablespaces;
+ }
+ return err;
+}
+
+/** Reinitialize the undo tablespaces when there is no undo log
+left to purge/rollback and validate the number of undo opened
+undo tablespace and user given undo tablespace
+@return DB_SUCCESS if it is valid */
+static dberr_t srv_undo_tablespaces_reinitialize()
+{
+
+ /* Re-create the undo tablespaces if it has no undo logs
+ left to purge/rollback */
+ if (srv_undo_tablespaces != srv_undo_tablespaces_open &&
+ trx_sys.is_undo_empty())
+ return srv_undo_tablespaces_reinit();
+
+ /* If the user says that there are fewer than what we find we
+ tolerate that discrepancy but not the inverse. Because there could
+ be unused undo tablespaces for future use. */
+
+ if (srv_undo_tablespaces != srv_undo_tablespaces_open)
+ {
+ sql_print_warning("InnoDB: Cannot change innodb_undo_tablespaces=%u "
+ "because previous shutdown was not with "
+ "innodb_fast_shutdown=0", srv_undo_tablespaces);
+ srv_undo_tablespaces= srv_undo_tablespaces_open;
+ }
+ else if (srv_undo_tablespaces_open > 0)
+ sql_print_information("InnoDB: Opened " UINT32PF " undo tablespaces",
+ srv_undo_tablespaces_open);
+
+ return DB_SUCCESS;
+}
+
+/** @return the number of active undo tablespaces (except system tablespace) */
+static uint32_t trx_rseg_get_n_undo_tablespaces()
+{
+ std::set<uint32_t> space_ids;
+ mtr_t mtr;
+ mtr.start();
+
+ if (const buf_block_t *sys_header= trx_sysf_get(&mtr, false))
+ for (ulint rseg_id= 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++)
+ if (trx_sysf_rseg_get_page_no(sys_header, rseg_id) != FIL_NULL)
+ if (uint32_t space= trx_sysf_rseg_get_space(sys_header, rseg_id))
+ space_ids.insert(space);
+ mtr.commit();
+ return static_cast<uint32_t>(space_ids.size());
+}
+
+/** Open an undo tablespace.
+@param[in] create whether undo tablespaces are being created
+@param[in] name tablespace file name
+@param[in] i undo tablespace count
+@return undo tablespace identifier
+@retval 0 if file doesn't exist
+@retval ~0U if page0 is corrupted */
+static uint32_t srv_undo_tablespace_open(bool create, const char* name,
+ uint32_t i)
+{
+ bool success;
+ uint32_t space_id= 0;
+ uint32_t fsp_flags= 0;
+
+ if (create)
+ {
+ space_id= srv_undo_space_id_start + i;
+ switch (srv_checksum_algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
+ fsp_flags= FSP_FLAGS_FCRC32_MASK_MARKER | FSP_FLAGS_FCRC32_PAGE_SSIZE();
+ break;
+ default:
+ fsp_flags= FSP_FLAGS_PAGE_SSIZE();
+ }
+ }
+
+ pfs_os_file_t fh= os_file_create(innodb_data_file_key, name, OS_FILE_OPEN |
+ OS_FILE_ON_ERROR_NO_EXIT |
+ OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_AIO, OS_DATA_FILE,
+ srv_read_only_mode, &success);
+
+ if (!success)
+ return 0;
+
+ os_offset_t size= os_file_get_size(fh);
+ ut_a(size != os_offset_t(-1));
+
+ if (!create)
+ {
+ page_t *page= static_cast<byte*>(aligned_malloc(srv_page_size,
+ srv_page_size));
+ if (os_file_read(IORequestRead, fh, page, 0, srv_page_size, nullptr) !=
+ DB_SUCCESS)
+ {
+err_exit:
+ ib::error() << "Unable to read first page of file " << name;
+ aligned_free(page);
+ return ~0U;
+ }
+
+ uint32_t id= mach_read_from_4(FIL_PAGE_SPACE_ID + page);
+ if (id == 0 || id >= SRV_SPACE_ID_UPPER_BOUND ||
+ memcmp_aligned<2>(FIL_PAGE_SPACE_ID + page,
+ FSP_HEADER_OFFSET + FSP_SPACE_ID + page, 4))
+ {
+ ib::error() << "Inconsistent tablespace ID in file " << name;
+ goto err_exit;
+ }
+
+ space_id= id;
+ fsp_flags= mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
+
+ if (buf_page_is_corrupted(false, page, fsp_flags))
+ {
+ sql_print_error("InnoDB: Checksum mismatch in the first page of file %s",
+ name);
+ if (recv_sys.dblwr.restore_first_page(space_id, name, fh))
+ goto err_exit;
+ }
+
+ aligned_free(page);
+ }
+
+ /* Load the tablespace into InnoDB's internal data structures. */
+
+ /* We set the biggest space id to the undo tablespace
+ because InnoDB hasn't opened any other tablespace apart
+ from the system tablespace. */
+
+ fil_set_max_space_id_if_bigger(space_id);
+
+ mysql_mutex_lock(&fil_system.mutex);
+ fil_space_t *space= fil_space_t::create(space_id, fsp_flags,
+ FIL_TYPE_TABLESPACE, nullptr,
+ FIL_ENCRYPTION_DEFAULT, true);
+ ut_ad(space);
+ fil_node_t *file= space->add(name, fh, 0, false, true);
+
+ if (create)
+ {
+ space->set_sizes(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
+ space->size= file->size= uint32_t(size >> srv_page_size_shift);
+ }
+ else if (!file->read_page0())
+ {
+ os_file_close(file->handle);
+ file->handle= OS_FILE_CLOSED;
+ ut_a(fil_system.n_open > 0);
+ fil_system.n_open--;
+ }
+
+ mysql_mutex_unlock(&fil_system.mutex);
+ return space_id;
+}
+
+/** Check if undo tablespaces and redo log files exist before creating a
+new system tablespace
+@retval DB_SUCCESS if all undo and redo logs are not found
+@retval DB_ERROR if any undo and redo logs are found */
+static
+dberr_t
+srv_check_undo_redo_logs_exists()
+{
+ bool ret;
+ os_file_t fh;
+ char name[OS_FILE_MAX_PATH];
+
+ /* Check if any undo tablespaces exist */
+ for (ulint i = 1; i <= srv_undo_tablespaces; ++i) {
+
+ snprintf(name, sizeof name, "%s/undo%03zu", srv_undo_dir, i);
+
+ fh = os_file_create_func(
+ name,
+ OS_FILE_OPEN_RETRY
+ | OS_FILE_ON_ERROR_NO_EXIT
+ | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_NORMAL,
+ OS_DATA_FILE,
+ srv_read_only_mode,
+ &ret);
+
+ if (ret) {
+ os_file_close_func(fh);
+ ib::error()
+ << "undo tablespace '" << name << "' exists."
+ " Creating system tablespace with existing undo"
+ " tablespaces is not supported. Please delete"
+ " all undo tablespaces before creating new"
+ " system tablespace.";
+ return(DB_ERROR);
+ }
+ }
+
+ /* Check if redo log file exists */
+ auto logfilename = get_log_file_path();
+
+ fh = os_file_create_func(logfilename.c_str(),
+ OS_FILE_OPEN_RETRY | OS_FILE_ON_ERROR_NO_EXIT
+ | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_NORMAL, OS_LOG_FILE,
+ srv_read_only_mode, &ret);
+
+ if (ret) {
+ os_file_close_func(fh);
+ ib::error() << "redo log file '" << logfilename
+ << "' exists. Creating system tablespace with"
+ " existing redo log file is not recommended."
+ " Please delete redo log file before"
+ " creating new system tablespace.";
+ return DB_ERROR;
+ }
+
+ return(DB_SUCCESS);
+}
+
+static dberr_t srv_all_undo_tablespaces_open(bool create_new_undo,
+ uint32_t n_undo)
+{
+ /* Open all the undo tablespaces that are currently in use. If we
+ fail to open any of these it is a fatal error. The tablespace ids
+ should be contiguous. It is a fatal error because they are required
+ for recovery and are referenced by the UNDO logs (a.k.a RBS). */
+
+ uint32_t prev_id= create_new_undo ? srv_undo_space_id_start - 1 : 0;
+
+ for (uint32_t i= 0; i < n_undo; ++i)
+ {
+ char name[OS_FILE_MAX_PATH];
+ snprintf(name, sizeof name, "%s/undo%03u", srv_undo_dir, i + 1);
+ uint32_t space_id= srv_undo_tablespace_open(create_new_undo, name, i);
+ switch (space_id) {
+ case ~0U:
+ return DB_CORRUPTION;
+ case 0:
+ if (!create_new_undo)
+ goto unused_undo;
+ sql_print_error("InnoDB: Unable to open create tablespace '%s'.", name);
+ return DB_ERROR;
+ default:
+ /* Should be no gaps in undo tablespace ids. */
+ ut_a(!i || prev_id + 1 == space_id);
+ }
+
+ prev_id= space_id;
+
+ /* Note the first undo tablespace id in case of
+ no active undo tablespace. */
+ if (0 == srv_undo_tablespaces_open++)
+ srv_undo_space_id_start= space_id;
+ }
+
+ /* Open any extra unused undo tablespaces. These must be contiguous.
+ We stop at the first failure. These are undo tablespaces that are
+ not in use and therefore not required by recovery. We only check
+ that there are no gaps. */
+unused_undo:
+ for (uint32_t i= prev_id + 1; i < srv_undo_space_id_start + TRX_SYS_N_RSEGS;
+ ++i)
+ {
+ char name[OS_FILE_MAX_PATH];
+ snprintf(name, sizeof name, "%s/undo%03u", srv_undo_dir, i);
+ uint32_t space_id= srv_undo_tablespace_open(create_new_undo, name, i);
+ if (!space_id || space_id == ~0U)
+ break;
+ if (0 == srv_undo_tablespaces_open++)
+ srv_undo_space_id_start= space_id;
+ }
+
+ return DB_SUCCESS;
+}
+
+/** Open the configured number of dedicated undo tablespaces.
+@param[in] create_new_undo whether the undo tablespaces has to be created
+@param[in,out] mtr mini-transaction
+@return DB_SUCCESS or error code */
+dberr_t srv_undo_tablespaces_init(bool create_new_undo, mtr_t *mtr)
+{
+ srv_undo_tablespaces_open= 0;
+
+ ut_ad(!create_new_undo || mtr);
+ ut_a(srv_undo_tablespaces <= TRX_SYS_N_RSEGS);
+ ut_a(!create_new_undo || srv_operation <= SRV_OPERATION_EXPORT_RESTORED);
+
+ if (srv_undo_tablespaces == 1)
+ srv_undo_tablespaces= 0;
+
+ /* Create the undo spaces only if we are creating a new
+ instance. We don't allow creating of new undo tablespaces
+ in an existing instance (yet). */
+ if (create_new_undo)
+ {
+ DBUG_EXECUTE_IF("innodb_undo_upgrade", srv_undo_space_id_start= 3;);
+
+ for (ulint i= 0; i < srv_undo_tablespaces; ++i)
+ {
+ char name[OS_FILE_MAX_PATH];
+ snprintf(name, sizeof name, "%s/undo%03zu", srv_undo_dir, i + 1);
+ if (dberr_t err= srv_undo_tablespace_create(name))
+ {
+ ib::error() << "Could not create undo tablespace '" << name << "'.";
+ return err;
+ }
+ }
+ }
+
+ /* Get the tablespace ids of all the undo segments excluding
+ the system tablespace (0). If we are creating a new instance then
+ we build the undo_tablespace_ids ourselves since they don't
+ already exist. */
+ srv_undo_tablespaces_active= srv_undo_tablespaces;
+
+ uint32_t n_undo= (create_new_undo || srv_operation == SRV_OPERATION_BACKUP ||
+ srv_operation == SRV_OPERATION_RESTORE_DELTA)
+ ? srv_undo_tablespaces : TRX_SYS_N_RSEGS;
+
+ if (dberr_t err= srv_all_undo_tablespaces_open(create_new_undo, n_undo))
+ return err;
+
+ /* Initialize srv_undo_space_id_start=0 when there are no
+ dedicated undo tablespaces. */
+ if (srv_undo_tablespaces_open == 0)
+ srv_undo_space_id_start= 0;
+
+ if (create_new_undo)
+ {
+ for (uint32_t i= 0; i < srv_undo_tablespaces; ++i)
+ {
+ dberr_t err= fsp_header_init(fil_space_get(srv_undo_space_id_start + i),
+ SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, mtr);
+ if (err) return err;
+ }
+ }
+
+ return DB_SUCCESS;
+}
+
+/** Create the temporary file tablespace.
+@param[in] create_new_db whether we are creating a new database
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+srv_open_tmp_tablespace(bool create_new_db)
+{
+ ulint sum_of_new_sizes;
+
+ /* Will try to remove if there is existing file left-over by last
+ unclean shutdown */
+ srv_tmp_space.set_sanity_check_status(true);
+ srv_tmp_space.delete_files();
+ srv_tmp_space.set_ignore_read_only(true);
+
+ bool create_new_temp_space;
+
+ srv_tmp_space.set_space_id(SRV_TMP_SPACE_ID);
+
+ dberr_t err = srv_tmp_space.check_file_spec(
+ &create_new_temp_space, 12 * 1024 * 1024);
+
+ if (err == DB_FAIL) {
+ ib::error() << "The innodb_temporary"
+ " data file must be writable!";
+ err = DB_ERROR;
+ } else if (err != DB_SUCCESS) {
+ ib::error() << "Could not create the shared innodb_temporary.";
+ } else if ((err = srv_tmp_space.open_or_create(
+ true, create_new_db, &sum_of_new_sizes))
+ != DB_SUCCESS) {
+ ib::error() << "Unable to create the shared innodb_temporary";
+ } else if (fil_system.temp_space->open(true)) {
+ /* Initialize the header page */
+ mtr_t mtr;
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NO_REDO);
+ err = fsp_header_init(fil_system.temp_space,
+ srv_tmp_space.get_sum_of_sizes(),
+ &mtr);
+ mtr.commit();
+ if (err == DB_SUCCESS) {
+ err = trx_temp_rseg_create(&mtr);
+ }
+ } else {
+ /* This file was just opened in the code above! */
+ ib::error() << "The innodb_temporary"
+ " data file cannot be re-opened"
+ " after check_file_spec() succeeded!";
+ err = DB_ERROR;
+ }
+
+ return(err);
+}
+
+/** Shutdown background threads, except the page cleaner. */
+static void srv_shutdown_threads()
+{
+ ut_ad(!srv_undo_sources);
+ srv_master_timer.reset();
+ srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
+
+ if (purge_sys.enabled()) {
+ srv_purge_shutdown();
+ }
+
+ if (srv_n_fil_crypt_threads) {
+ fil_crypt_set_thread_cnt(0);
+ }
+}
+
+
+/** Shut down background threads that can generate undo log. */
+static void srv_shutdown_bg_undo_sources()
+{
+ srv_shutdown_state= SRV_SHUTDOWN_INITIATED;
+
+ if (srv_undo_sources)
+ {
+ ut_ad(!srv_read_only_mode);
+ fts_optimize_shutdown();
+ dict_stats_shutdown();
+ srv_undo_sources= false;
+ }
+}
+
+#ifdef UNIV_DEBUG
+# define srv_init_abort(_db_err) \
+ srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err)
+#else
+# define srv_init_abort(_db_err) \
+ srv_init_abort_low(create_new_db, _db_err)
+#endif /* UNIV_DEBUG */
+
+/** Innobase start-up aborted. Perform cleanup actions.
+@param[in] create_new_db TRUE if new db is being created
+@param[in] file File name
+@param[in] line Line number
+@param[in] err Reason for aborting InnoDB startup
+@return DB_SUCCESS or error code. */
+MY_ATTRIBUTE((warn_unused_result, nonnull))
+static
+dberr_t
+srv_init_abort_low(
+ bool create_new_db,
+#ifdef UNIV_DEBUG
+ const char* file,
+ unsigned line,
+#endif /* UNIV_DEBUG */
+ dberr_t err)
+{
+ ut_ad(srv_is_being_started);
+
+ if (create_new_db) {
+ ib::error() << "Database creation was aborted"
+#ifdef UNIV_DEBUG
+ " at " << innobase_basename(file) << "[" << line << "]"
+#endif /* UNIV_DEBUG */
+ " with error " << err << ". You may need"
+ " to delete the ibdata1 file before trying to start"
+ " up again.";
+ } else if (srv_operation == SRV_OPERATION_NORMAL) {
+ ib::error() << "Plugin initialization aborted"
+#ifdef UNIV_DEBUG
+ " at " << innobase_basename(file) << "[" << line << "]"
+#endif /* UNIV_DEBUG */
+ " with error " << err;
+ }
+
+ srv_shutdown_bg_undo_sources();
+ srv_shutdown_threads();
+ return(err);
+}
+
+/** Prepare to delete the redo log file. Flush the dirty pages from all the
+buffer pools. Flush the redo log buffer to the redo log file.
+@return lsn upto which data pages have been flushed. */
+static lsn_t srv_prepare_to_delete_redo_log_file()
+{
+ DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
+
+ ut_ad(recv_sys.recovery_on);
+
+ /* Clean the buffer pool. */
+ buf_flush_sync();
+
+ DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_1"));
+
+ log_sys.latch.wr_lock(SRW_LOCK_CALL);
+ const bool latest_format{log_sys.is_latest()};
+ lsn_t flushed_lsn{log_sys.get_lsn()};
+
+ if (latest_format && !(log_sys.file_size & 4095) &&
+ flushed_lsn != log_sys.next_checkpoint_lsn +
+ (log_sys.is_encrypted()
+ ? SIZE_OF_FILE_CHECKPOINT + 8
+ : SIZE_OF_FILE_CHECKPOINT))
+ {
+ fil_names_clear(flushed_lsn);
+ flushed_lsn= log_sys.get_lsn();
+ }
+
+ {
+ const char *msg;
+ if (!latest_format)
+ {
+ msg= "Upgrading redo log: ";
+same_size:
+ ib::info() << msg << ib::bytes_iec(srv_log_file_size)
+ << "; LSN=" << flushed_lsn;
+ }
+ else if (srv_log_file_size == log_sys.file_size)
+ {
+ msg= srv_encrypt_log
+ ? "Encrypting redo log: " : "Removing redo log encryption: ";
+ goto same_size;
+ }
+ else
+ {
+ if (srv_encrypt_log == (my_bool)log_sys.is_encrypted())
+ msg= srv_encrypt_log ? "Resizing encrypted" : "Resizing";
+ else
+ msg= srv_encrypt_log
+ ? "Encrypting and resizing"
+ : "Removing encryption and resizing";
+
+ ib::info() << msg << " redo log from "
+ << ib::bytes_iec{log_sys.file_size} << " to "
+ << ib::bytes_iec{srv_log_file_size}
+ << "; LSN=" << flushed_lsn;
+ }
+ }
+
+ log_sys.latch.wr_unlock();
+
+ log_write_up_to(flushed_lsn, false);
+
+ ut_ad(flushed_lsn == log_sys.get_lsn());
+ ut_ad(!os_aio_pending_reads());
+ ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex));
+ ut_ad(!buf_pool.get_oldest_modification(0));
+ ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex));
+ ut_d(os_aio_wait_until_no_pending_writes(false));
+
+ DBUG_RETURN(flushed_lsn);
+}
+
+static tpool::task_group rollback_all_recovered_group(1);
+static tpool::task rollback_all_recovered_task(trx_rollback_all_recovered,
+ nullptr,
+ &rollback_all_recovered_group);
+
+/** Start InnoDB.
+@param[in] create_new_db whether to create a new database
+@return DB_SUCCESS or error code */
+dberr_t srv_start(bool create_new_db)
+{
+ dberr_t err = DB_SUCCESS;
+ mtr_t mtr;
+
+ ut_ad(srv_operation <= SRV_OPERATION_RESTORE_EXPORT
+ || srv_operation == SRV_OPERATION_RESTORE
+ || srv_operation == SRV_OPERATION_RESTORE_EXPORT);
+
+ if (srv_force_recovery) {
+ ib::info() << "!!! innodb_force_recovery is set to "
+ << srv_force_recovery << " !!!";
+ }
+
+ if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
+ srv_read_only_mode = true;
+ }
+
+ high_level_read_only = srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN
+ || srv_sys_space.created_new_raw();
+
+ srv_started_redo = false;
+
+ compile_time_assert(sizeof(ulint) == sizeof(void*));
+
+#ifdef UNIV_DEBUG
+ ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
+#endif
+
+#ifdef UNIV_IBUF_DEBUG
+ ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
+#endif
+
+ ib::info() << "Compressed tables use zlib " ZLIB_VERSION
+#ifdef UNIV_ZIP_DEBUG
+ " with validation"
+#endif /* UNIV_ZIP_DEBUG */
+ ;
+#ifdef UNIV_ZIP_COPY
+ ib::info() << "and extra copying";
+#endif /* UNIV_ZIP_COPY */
+
+ /* Since InnoDB does not currently clean up all its internal data
+ structures in MySQL Embedded Server Library server_end(), we
+ print an error message if someone tries to start up InnoDB a
+ second time during the process lifetime. */
+
+ if (srv_start_has_been_called) {
+ ib::error() << "Startup called second time"
+ " during the process lifetime."
+ " In the MariaDB Embedded Server Library"
+ " you cannot call server_init() more than"
+ " once during the process lifetime.";
+ }
+
+ srv_start_has_been_called = true;
+
+ srv_is_being_started = true;
+
+ /* Register performance schema stages before any real work has been
+ started which may need to be instrumented. */
+ mysql_stage_register("innodb", srv_stages,
+ static_cast<int>(UT_ARR_SIZE(srv_stages)));
+
+ srv_max_n_threads =
+ 1 /* dict_stats_thread */
+ + 1 /* fts_optimize_thread */
+ + 128 /* safety margin */
+ + max_connections;
+
+ srv_boot();
+
+ ib::info() << my_crc32c_implementation();
+
+ if (!srv_read_only_mode) {
+ mysql_mutex_init(srv_monitor_file_mutex_key,
+ &srv_monitor_file_mutex, nullptr);
+ mysql_mutex_init(srv_misc_tmpfile_mutex_key,
+ &srv_misc_tmpfile_mutex, nullptr);
+ }
+
+ if (!srv_read_only_mode) {
+ if (srv_innodb_status) {
+
+ srv_monitor_file_name = static_cast<char*>(
+ ut_malloc_nokey(
+ strlen(fil_path_to_mysql_datadir)
+ + 20 + sizeof "/innodb_status."));
+
+ sprintf(srv_monitor_file_name,
+ "%s/innodb_status." ULINTPF,
+ fil_path_to_mysql_datadir,
+ static_cast<ulint>
+ (IF_WIN(GetCurrentProcessId(), getpid())));
+
+ srv_monitor_file = my_fopen(srv_monitor_file_name,
+ O_RDWR|O_TRUNC|O_CREAT,
+ MYF(MY_WME));
+
+ if (!srv_monitor_file) {
+ ib::error() << "Unable to create "
+ << srv_monitor_file_name << ": "
+ << strerror(errno);
+ if (err == DB_SUCCESS) {
+ err = DB_ERROR;
+ }
+ }
+ } else {
+
+ srv_monitor_file_name = NULL;
+ srv_monitor_file = os_file_create_tmpfile();
+
+ if (!srv_monitor_file && err == DB_SUCCESS) {
+ err = DB_ERROR;
+ }
+ }
+
+ srv_misc_tmpfile = os_file_create_tmpfile();
+
+ if (!srv_misc_tmpfile && err == DB_SUCCESS) {
+ err = DB_ERROR;
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+
+ if (srv_read_only_mode) {
+ ib::info() << "Disabling background log and ibuf IO write"
+ << " threads.";
+ }
+
+ if (os_aio_init()) {
+ ib::error() << "Cannot initialize AIO sub-system";
+
+ return(srv_init_abort(DB_ERROR));
+ }
+
+#ifdef LINUX_NATIVE_AIO
+ if (srv_use_native_aio) {
+ ib::info() << "Using Linux native AIO";
+ }
+#endif
+#ifdef HAVE_URING
+ if (srv_use_native_aio) {
+ ib::info() << "Using liburing";
+ }
+#endif
+
+ fil_system.create(srv_file_per_table ? 50000 : 5000);
+
+ ib::info() << "Initializing buffer pool, total size = "
+ << ib::bytes_iec{srv_buf_pool_size}
+ << ", chunk size = " << ib::bytes_iec{srv_buf_pool_chunk_unit};
+
+ if (buf_pool.create()) {
+ ib::error() << "Cannot allocate memory for the buffer pool";
+
+ return(srv_init_abort(DB_ERROR));
+ }
+
+ ib::info() << "Completed initialization of buffer pool";
+
+#ifdef UNIV_DEBUG
+ /* We have observed deadlocks with a 5MB buffer pool but
+ the actual lower limit could very well be a little higher. */
+
+ if (srv_buf_pool_size <= 5 * 1024 * 1024) {
+
+ ib::info() << "Small buffer pool size ("
+ << ib::bytes_iec{srv_buf_pool_size}
+ << "), the flst_validate() debug function can cause a"
+ << " deadlock if the buffer pool fills up.";
+ }
+#endif /* UNIV_DEBUG */
+
+ if (!log_sys.create()) {
+ return srv_init_abort(DB_ERROR);
+ }
+
+ recv_sys.create();
+ lock_sys.create(srv_lock_table_size);
+
+ srv_startup_is_before_trx_rollback_phase = true;
+
+ if (!srv_read_only_mode) {
+ buf_flush_page_cleaner_init();
+ ut_ad(buf_page_cleaner_is_active);
+ }
+
+ /* Check if undo tablespaces and redo log files exist before creating
+ a new system tablespace */
+ if (create_new_db) {
+ err = srv_check_undo_redo_logs_exists();
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(DB_ERROR));
+ }
+ recv_sys.debug_free();
+ }
+
+ /* Open or create the data files. */
+ ulint sum_of_new_sizes;
+
+ err = srv_sys_space.open_or_create(
+ false, create_new_db, &sum_of_new_sizes);
+
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_CANNOT_OPEN_FILE:
+ ib::error()
+ << "Could not open or create the system tablespace. If"
+ " you tried to add new data files to the system"
+ " tablespace, and it failed here, you should now"
+ " edit innodb_data_file_path in my.cnf back to what"
+ " it was, and remove the new ibdata files InnoDB"
+ " created in this failed attempt. InnoDB only wrote"
+ " those files full of zeros, but did not yet use"
+ " them in any way. But be careful: do not remove"
+ " old data files which contain your precious data!";
+ /* fall through */
+ default:
+ /* Other errors might come from Datafile::validate_first_page() */
+ return(srv_init_abort(err));
+ }
+
+ if (innodb_encrypt_temporary_tables && !log_crypt_init()) {
+ return srv_init_abort(DB_ERROR);
+ }
+
+ if (create_new_db) {
+ lsn_t flushed_lsn = log_sys.init_lsn();
+
+ err = create_log_file(true, flushed_lsn);
+
+ if (err != DB_SUCCESS) {
+ for (const Datafile &file: srv_sys_space) {
+ os_file_delete(innodb_data_file_key,
+ file.filepath());
+ }
+ return srv_init_abort(err);
+ }
+
+ srv_undo_space_id_start= 1;
+ }
+
+ /* Open log file and data files in the systemtablespace: we keep
+ them open until database shutdown */
+ ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug);
+
+ if (fil_system.sys_space->open(create_new_db)) {
+ mtr_t mtr;
+ mtr.start();
+ err= srv_undo_tablespaces_init(create_new_db, &mtr);
+ mtr.commit();
+ }
+ else {
+ err= DB_ERROR;
+ }
+
+ /* If the force recovery is set very high then we carry on regardless
+ of all errors. Basically this is fingers crossed mode. */
+
+ if (err != DB_SUCCESS
+ && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
+
+ return(srv_init_abort(err));
+ }
+
+ /* Initialize objects used by dict stats gathering thread, which
+ can also be used by recovery if it tries to drop some table */
+ if (!srv_read_only_mode) {
+ dict_stats_init();
+ }
+
+ trx_sys.create();
+
+ if (create_new_db) {
+ ut_ad(!srv_read_only_mode);
+
+ mtr_start(&mtr);
+ ut_ad(fil_system.sys_space->id == 0);
+ compile_time_assert(TRX_SYS_SPACE == 0);
+ compile_time_assert(IBUF_SPACE_ID == 0);
+ ut_a(fsp_header_init(fil_system.sys_space,
+ uint32_t(sum_of_new_sizes), &mtr)
+ == DB_SUCCESS);
+
+ ulint ibuf_root = btr_create(
+ DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space,
+ DICT_IBUF_ID_MIN, nullptr, &mtr, &err);
+
+ mtr_commit(&mtr);
+
+ if (ibuf_root == FIL_NULL) {
+ return srv_init_abort(err);
+ }
+
+ ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO);
+
+ /* To maintain backward compatibility we create only
+ the first rollback segment before the double write buffer.
+ All the remaining rollback segments will be created later,
+ after the double write buffer has been created. */
+ err = trx_sys_create_sys_pages(&mtr);
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+
+ err = dict_create();
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+
+ buf_flush_sync();
+
+ ut_ad(!srv_log_file_created);
+ ut_d(srv_log_file_created= true);
+
+ if (log_sys.resize_rename()) {
+ return(srv_init_abort(DB_ERROR));
+ }
+ } else {
+ /* Suppress warnings in fil_space_t::create() for files
+ that are being read before dict_boot() has recovered
+ DICT_HDR_MAX_SPACE_ID. */
+ fil_system.space_id_reuse_warned = true;
+
+ /* We always try to do a recovery, even if the database had
+ been shut down normally: this is the normal startup path */
+
+ err = recv_recovery_from_checkpoint_start();
+ recv_sys.close_files();
+
+ recv_sys.dblwr.pages.clear();
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+
+ switch (srv_operation) {
+ case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_EXPORT_RESTORED:
+ case SRV_OPERATION_RESTORE_EXPORT:
+ /* Initialize the change buffer. */
+ err = dict_boot();
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+ /* fall through */
+ case SRV_OPERATION_RESTORE:
+ /* This must precede recv_sys.apply(true). */
+ srv_undo_tablespaces_active
+ = trx_rseg_get_n_undo_tablespaces();
+
+ if (srv_operation != SRV_OPERATION_RESTORE) {
+ dict_sys.load_sys_tables();
+ }
+ err = trx_lists_init_at_db_start();
+ if (err != DB_SUCCESS) {
+ return srv_init_abort(err);
+ }
+ break;
+ case SRV_OPERATION_RESTORE_DELTA:
+ case SRV_OPERATION_BACKUP:
+ case SRV_OPERATION_BACKUP_NO_DEFER:
+ ut_ad("wrong mariabackup mode" == 0);
+ }
+
+ if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+ /* Apply the hashed log records to the
+ respective file pages, for the last batch of
+ recv_group_scan_log_recs().
+ Since it may generate huge batch of threadpool tasks,
+ for read io task group, scale down thread creation rate
+ by temporarily restricting tpool concurrency.
+ */
+ srv_thread_pool->set_concurrency(srv_n_read_io_threads);
+
+ mysql_mutex_lock(&recv_sys.mutex);
+ recv_sys.apply(true);
+ mysql_mutex_unlock(&recv_sys.mutex);
+
+ srv_thread_pool->set_concurrency();
+
+ if (recv_sys.is_corrupt_log()
+ || recv_sys.is_corrupt_fs()) {
+ return(srv_init_abort(DB_CORRUPTION));
+ }
+
+ DBUG_PRINT("ib_log", ("apply completed"));
+
+ if (recv_needed_recovery) {
+ trx_sys_print_mysql_binlog_offset();
+ }
+ }
+
+ fil_system.space_id_reuse_warned = false;
+
+ if (!srv_read_only_mode) {
+ const uint32_t flags = FSP_FLAGS_PAGE_SSIZE();
+ for (uint32_t id = srv_undo_space_id_start;
+ id <= srv_undo_tablespaces; id++) {
+ if (fil_space_t* space = fil_space_get(id)) {
+ fsp_flags_try_adjust(space, flags);
+ }
+ }
+
+ if (sum_of_new_sizes > 0) {
+ /* New data file(s) were added */
+ mtr.start();
+ mtr.x_lock_space(fil_system.sys_space);
+ buf_block_t* block = buf_page_get(
+ page_id_t(0, 0), 0,
+ RW_SX_LATCH, &mtr);
+ /* The first page of the system tablespace
+ should already have been successfully
+ accessed earlier during startup. */
+ ut_a(block);
+ ulint size = mach_read_from_4(
+ FSP_HEADER_OFFSET + FSP_SIZE
+ + block->page.frame);
+ ut_ad(size == fil_system.sys_space
+ ->size_in_header);
+ size += sum_of_new_sizes;
+ mtr.write<4>(*block,
+ FSP_HEADER_OFFSET + FSP_SIZE
+ + block->page.frame, size);
+ fil_system.sys_space->size_in_header
+ = uint32_t(size);
+ mtr.commit();
+ log_write_up_to(mtr.commit_lsn(), true);
+ }
+ }
+
+#ifdef UNIV_DEBUG
+ {
+ mtr.start();
+ buf_block_t* block = buf_page_get(page_id_t(0, 0), 0,
+ RW_S_LATCH, &mtr);
+ ut_ad(mach_read_from_4(FSP_SIZE + FSP_HEADER_OFFSET
+ + block->page.frame)
+ == fil_system.sys_space->size_in_header);
+ mtr.commit();
+ }
+#endif
+ const ulint tablespace_size_in_header
+ = fil_system.sys_space->size_in_header;
+ const ulint sum_of_data_file_sizes
+ = srv_sys_space.get_sum_of_sizes();
+ /* Compare the system tablespace file size to what is
+ stored in FSP_SIZE. In srv_sys_space.open_or_create()
+ we already checked that the file sizes match the
+ innodb_data_file_path specification. */
+ if (srv_read_only_mode
+ || sum_of_data_file_sizes == tablespace_size_in_header) {
+ /* Do not complain about the size. */
+ } else if (!srv_sys_space.can_auto_extend_last_file()
+ || sum_of_data_file_sizes
+ < tablespace_size_in_header) {
+ ib::error() << "Tablespace size stored in header is "
+ << tablespace_size_in_header
+ << " pages, but the sum of data file sizes is "
+ << sum_of_data_file_sizes << " pages";
+
+ if (srv_force_recovery == 0
+ && sum_of_data_file_sizes
+ < tablespace_size_in_header) {
+ ib::error() <<
+ "Cannot start InnoDB. The tail of"
+ " the system tablespace is"
+ " missing. Have you edited"
+ " innodb_data_file_path in my.cnf"
+ " in an inappropriate way, removing"
+ " data files from there?"
+ " You can set innodb_force_recovery=1"
+ " in my.cnf to force"
+ " a startup if you are trying to"
+ " recover a badly corrupt database.";
+
+ return(srv_init_abort(DB_ERROR));
+ }
+ }
+
+ if (srv_operation > SRV_OPERATION_EXPORT_RESTORED) {
+ ut_ad(srv_operation == SRV_OPERATION_RESTORE_EXPORT
+ || srv_operation == SRV_OPERATION_RESTORE);
+ return(err);
+ }
+
+ /* Upgrade or resize or rebuild the redo logs before
+ generating any dirty pages, so that the old redo log
+ file will not be written to. */
+
+ if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
+ /* Completely ignore the redo log. */
+ } else if (srv_read_only_mode) {
+ /* Leave the redo log alone. */
+ } else if (log_sys.file_size == srv_log_file_size
+ && log_sys.format
+ == (srv_encrypt_log
+ ? log_t::FORMAT_ENC_10_8
+ : log_t::FORMAT_10_8)) {
+ /* No need to add or remove encryption,
+ upgrade, or resize. */
+ delete_log_files();
+ } else {
+ /* Prepare to delete the old redo log file */
+ const lsn_t lsn{srv_prepare_to_delete_redo_log_file()};
+
+ DBUG_EXECUTE_IF("innodb_log_abort_1",
+ return(srv_init_abort(DB_ERROR)););
+ /* Prohibit redo log writes from any other
+ threads until creating a log checkpoint at the
+ end of create_log_file(). */
+ ut_d(recv_no_log_write = true);
+ ut_ad(!os_aio_pending_reads());
+ ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex));
+ ut_ad(!buf_pool.get_oldest_modification(0));
+ ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex));
+ /* os_aio_pending_writes() may hold here if
+ some write_io_callback() did not release the
+ slot yet. However, the page write itself must
+ have completed, because the buf_pool.flush_list
+ is empty. In debug builds, we wait for this to
+ happen, hoping to get a hung process if this
+ assumption does not hold. */
+ ut_d(os_aio_wait_until_no_pending_writes(false));
+
+ /* Close the redo log file, so that we can replace it */
+ log_sys.close_file();
+
+ DBUG_EXECUTE_IF("innodb_log_abort_5",
+ return(srv_init_abort(DB_ERROR)););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_5"));
+
+ err = create_log_file(false, lsn);
+
+ if (err == DB_SUCCESS && log_sys.resize_rename()) {
+ err = DB_ERROR;
+ }
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+ }
+
+ recv_sys.debug_free();
+ }
+
+ ut_ad(err == DB_SUCCESS);
+ ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
+
+ /* Create the doublewrite buffer to a new tablespace */
+ if (!srv_read_only_mode && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
+ && !buf_dblwr.create()) {
+ return(srv_init_abort(DB_ERROR));
+ }
+
+ /* Recreate the undo tablespaces */
+ if (!high_level_read_only) {
+ err = srv_undo_tablespaces_reinitialize();
+ if (err) {
+ return srv_init_abort(err);
+ }
+ }
+
+ srv_undo_tablespaces = srv_undo_tablespaces_open;
+
+ /* Here the double write buffer has already been created and so
+ any new rollback segments will be allocated after the double
+ write buffer. The default segment should already exist.
+ We create the new segments only if it's a new database or
+ the database was shutdown cleanly. */
+
+ /* Note: When creating the extra rollback segments during an upgrade
+ we violate the latching order, even if the change buffer is empty.
+ It cannot create a deadlock because we are still
+ running in single threaded mode essentially. Only the IO threads
+ should be running at this stage. */
+
+ if (!trx_sys_create_rsegs()) {
+ return(srv_init_abort(DB_ERROR));
+ }
+
+ if (!create_new_db) {
+ ut_ad(high_level_read_only
+ || srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN);
+
+ /* Validate a few system page types that were left
+ uninitialized before MySQL or MariaDB 5.5. */
+ if (!high_level_read_only
+ && !fil_system.sys_space->full_crc32()) {
+ buf_block_t* block;
+ mtr.start();
+ /* Bitmap page types will be reset in
+ buf_dblwr_check_block() without redo logging. */
+ block = buf_page_get(
+ page_id_t(IBUF_SPACE_ID,
+ FSP_IBUF_HEADER_PAGE_NO),
+ 0, RW_X_LATCH, &mtr);
+ if (UNIV_UNLIKELY(!block)) {
+ corrupted_old_page:
+ mtr.commit();
+ return srv_init_abort(DB_CORRUPTION);
+ }
+ fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr);
+ /* Already MySQL 3.23.53 initialized
+ FSP_IBUF_TREE_ROOT_PAGE_NO to
+ FIL_PAGE_INDEX. No need to reset that one. */
+ block = buf_page_get(
+ page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+ 0, RW_X_LATCH, &mtr);
+ if (UNIV_UNLIKELY(!block)) {
+ goto corrupted_old_page;
+ }
+ fil_block_check_type(*block, FIL_PAGE_TYPE_TRX_SYS,
+ &mtr);
+ block = buf_page_get(
+ page_id_t(TRX_SYS_SPACE,
+ FSP_FIRST_RSEG_PAGE_NO),
+ 0, RW_X_LATCH, &mtr);
+ if (UNIV_UNLIKELY(!block)) {
+ goto corrupted_old_page;
+ }
+ fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr);
+ block = buf_page_get(
+ page_id_t(TRX_SYS_SPACE, FSP_DICT_HDR_PAGE_NO),
+ 0, RW_X_LATCH, &mtr);
+ if (UNIV_UNLIKELY(!block)) {
+ goto corrupted_old_page;
+ }
+ fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr);
+ mtr.commit();
+ }
+
+ /* Roll back any recovered data dictionary
+ transactions, so that the data dictionary tables will
+ be free of any locks. The data dictionary latch
+ should guarantee that there is at most one data
+ dictionary transaction active at a time. */
+ if (!high_level_read_only
+ && srv_force_recovery <= SRV_FORCE_NO_TRX_UNDO) {
+ /* If the following call is ever removed, the
+ first-time ha_innobase::open() must hold (or
+ acquire and release) a table lock that
+ conflicts with trx_resurrect_table_locks(), to
+ ensure that any recovered incomplete ALTER
+ TABLE will have been rolled back. Otherwise,
+ dict_table_t::instant could be cleared by
+ rollback invoking
+ dict_index_t::clear_instant_alter() while open
+ table handles exist in client connections. */
+ trx_rollback_recovered(false);
+ }
+
+ if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
+ /* The following call is necessary for the insert
+ buffer to work with multiple tablespaces. We must
+ know the mapping between space id's and .ibd file
+ names.
+
+ In a crash recovery, we check that the info in data
+ dictionary is consistent with what we already know
+ about space id's from the calls to fil_ibd_load().
+
+ In a normal startup, we create the space objects for
+ every table in the InnoDB data dictionary that has
+ an .ibd file.
+
+ We also determine the maximum tablespace id used. */
+ dict_check_tablespaces_and_store_max_id();
+ }
+
+ if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
+ && !srv_read_only_mode) {
+ /* Drop partially created indexes. */
+ row_merge_drop_temp_indexes();
+ /* Rollback incomplete non-DDL transactions */
+ trx_rollback_is_active = true;
+ srv_thread_pool->submit_task(&rollback_all_recovered_task);
+ }
+ }
+
+ srv_startup_is_before_trx_rollback_phase = false;
+
+ if (!srv_read_only_mode) {
+ DBUG_EXECUTE_IF("innodb_skip_monitors", goto skip_monitors;);
+ /* Create the task which warns of long semaphore waits */
+ srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task,
+ SRV_MONITOR_INTERVAL);
+
+#ifndef DBUG_OFF
+skip_monitors:
+#endif
+ ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN
+ || !purge_sys.enabled());
+
+ if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+ srv_undo_sources = true;
+ /* Create the dict stats gathering task */
+ dict_stats_start();
+ /* Create the thread that will optimize the
+ FULLTEXT search index subsystem. */
+ fts_optimize_init();
+ }
+ }
+
+ err = dict_sys.create_or_check_sys_tables();
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_READ_ONLY:
+ if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ break;
+ }
+ ib::error() << "Cannot create system tables in read-only mode";
+ /* fall through */
+ default:
+ return(srv_init_abort(err));
+ }
+
+ if (!srv_read_only_mode
+ && srv_operation <= SRV_OPERATION_EXPORT_RESTORED) {
+ /* Initialize the innodb_temporary tablespace and keep
+ it open until shutdown. */
+ err = srv_open_tmp_tablespace(create_new_db);
+
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
+ }
+
+ if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+ srv_start_periodic_timer(srv_master_timer, srv_master_callback, 1000);
+ }
+ }
+
+ srv_is_being_started = false;
+
+ if (srv_print_verbose_log) {
+ sql_print_information("InnoDB: "
+ "log sequence number " LSN_PF
+#ifdef HAVE_PMEM
+ "%s"
+#endif
+ "; transaction id " TRX_ID_FMT,
+ recv_sys.lsn,
+#ifdef HAVE_PMEM
+ log_sys.is_pmem()
+ ? " (memory-mapped)" : "",
+#endif
+ trx_sys.get_max_trx_id());
+ }
+
+ if (srv_force_recovery == 0) {
+ /* In the change buffer we may have even bigger tablespace
+ id's, because we may have dropped those tablespaces, but
+ the buffered records have not been cleaned yet. */
+ ibuf_update_max_tablespace_id();
+ }
+
+ if (!srv_read_only_mode) {
+ if (create_new_db) {
+ srv_buffer_pool_load_at_startup = FALSE;
+ }
+
+#ifdef WITH_WSREP
+ /*
+ Create the dump/load thread only when not running with
+ --wsrep-recover.
+ */
+ if (!get_wsrep_recovery()) {
+#endif /* WITH_WSREP */
+
+ /* Start buffer pool dump/load task */
+ buf_load_at_startup();
+
+#ifdef WITH_WSREP
+ } else {
+ ib::warn() <<
+ "Skipping buffer pool dump/restore during "
+ "wsrep recovery.";
+ }
+#endif /* WITH_WSREP */
+
+ /* Create thread(s) that handles key rotation. This is
+ needed already here as log_preflush_pool_modified_pages
+ will flush dirty pages and that might need e.g.
+ fil_crypt_threads_cond. */
+ fil_crypt_threads_init();
+
+ /* Initialize online defragmentation. */
+ btr_defragment_init();
+
+ srv_started_redo = true;
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+ Shutdown purge to make sure that there is no possibility that we call any
+ plugin code (e.g., audit) inside virtual column computation.
+*/
+void innodb_preshutdown()
+{
+ static bool first_time= true;
+ if (!first_time)
+ return;
+ first_time= false;
+
+ if (srv_read_only_mode)
+ return;
+ if (!srv_fast_shutdown && srv_operation <= SRV_OPERATION_EXPORT_RESTORED)
+ {
+ /* Because a slow shutdown must empty the change buffer, we had
+ better prevent any further changes from being buffered. */
+ innodb_change_buffering= 0;
+
+ if (trx_sys.is_initialised())
+ while (trx_sys.any_active_transactions())
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ }
+ srv_shutdown_bg_undo_sources();
+ srv_purge_shutdown();
+
+ if (srv_n_fil_crypt_threads)
+ fil_crypt_set_thread_cnt(0);
+}
+
+
+/** Shut down InnoDB. */
+void innodb_shutdown()
+{
+ innodb_preshutdown();
+ ut_ad(!srv_undo_sources);
+ switch (srv_operation) {
+ case SRV_OPERATION_BACKUP:
+ case SRV_OPERATION_RESTORE_DELTA:
+ case SRV_OPERATION_BACKUP_NO_DEFER:
+ break;
+ case SRV_OPERATION_RESTORE:
+ case SRV_OPERATION_RESTORE_EXPORT:
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
+ while (buf_page_cleaner_is_active) {
+ pthread_cond_signal(&buf_pool.do_flush_list);
+ my_cond_wait(&buf_pool.done_flush_list,
+ &buf_pool.flush_list_mutex.m_mutex);
+ }
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ break;
+ case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_EXPORT_RESTORED:
+ /* Shut down the persistent files. */
+ logs_empty_and_mark_files_at_shutdown();
+ }
+
+ os_aio_free();
+ fil_space_t::close_all();
+ /* Exit any remaining threads. */
+ ut_ad(!buf_page_cleaner_is_active);
+ srv_shutdown_threads();
+
+ if (srv_monitor_file) {
+ my_fclose(srv_monitor_file, MYF(MY_WME));
+ srv_monitor_file = 0;
+ if (srv_monitor_file_name) {
+ unlink(srv_monitor_file_name);
+ ut_free(srv_monitor_file_name);
+ }
+ }
+
+ if (srv_misc_tmpfile) {
+ my_fclose(srv_misc_tmpfile, MYF(MY_WME));
+ srv_misc_tmpfile = 0;
+ }
+
+ ut_ad(dict_sys.is_initialised() || !srv_was_started);
+ ut_ad(trx_sys.is_initialised() || !srv_was_started);
+ ut_ad(buf_dblwr.is_created() || !srv_was_started
+ || srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+ ut_ad(lock_sys.is_initialised() || !srv_was_started);
+ ut_ad(log_sys.is_initialised() || !srv_was_started);
+ ut_ad(ibuf.index || !innodb_change_buffering || !srv_was_started
+ || srv_force_recovery >= SRV_FORCE_NO_DDL_UNDO);
+
+ dict_stats_deinit();
+
+ if (srv_started_redo) {
+ ut_ad(!srv_read_only_mode);
+ /* srv_shutdown_bg_undo_sources() already invoked
+ fts_optimize_shutdown(); dict_stats_shutdown(); */
+
+ fil_crypt_threads_cleanup();
+ btr_defragment_shutdown();
+ }
+
+ /* This must be disabled before closing the buffer pool
+ and closing the data dictionary. */
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (dict_sys.is_initialised()) {
+ btr_search_disable();
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+ ibuf_close();
+ log_sys.close();
+ purge_sys.close();
+ trx_sys.close();
+ buf_dblwr.close();
+ lock_sys.close();
+ trx_pool_close();
+
+ if (!srv_read_only_mode) {
+ mysql_mutex_destroy(&srv_monitor_file_mutex);
+ mysql_mutex_destroy(&srv_misc_tmpfile_mutex);
+ }
+
+ dict_sys.close();
+ btr_search_sys_free();
+ srv_free();
+ fil_system.close();
+ pars_lexer_close();
+ recv_sys.close();
+
+ ut_ad(buf_pool.is_initialised() || !srv_was_started);
+ buf_pool.close();
+
+ srv_sys_space.shutdown();
+ if (srv_tmp_space.get_sanity_check_status()) {
+ if (fil_system.temp_space) {
+ fil_system.temp_space->close();
+ }
+ srv_tmp_space.delete_files();
+ }
+ srv_tmp_space.shutdown();
+
+ if (srv_stats.pages_page_compression_error)
+ ib::warn() << "Page compression errors: "
+ << srv_stats.pages_page_compression_error;
+
+ if (srv_was_started && srv_print_verbose_log) {
+ ib::info() << "Shutdown completed; log sequence number "
+ << srv_shutdown_lsn
+ << "; transaction id " << trx_sys.get_max_trx_id();
+ }
+ srv_thread_pool_end();
+ srv_started_redo = false;
+ srv_was_started = false;
+ srv_start_has_been_called = false;
+}
+
+/** Get the meta-data filename from the table name for a
+single-table tablespace.
+@param[in] table table object
+@param[out] filename filename
+@param[in] max_len filename max length */
+void
+srv_get_meta_data_filename(
+ dict_table_t* table,
+ char* filename,
+ ulint max_len)
+{
+ ulint len;
+ char* path;
+
+ /* Make sure the data_dir_path is set. */
+ dict_get_and_save_data_dir_path(table);
+
+ const char* data_dir_path = DICT_TF_HAS_DATA_DIR(table->flags)
+ ? table->data_dir_path : nullptr;
+ ut_ad(!DICT_TF_HAS_DATA_DIR(table->flags) || data_dir_path);
+
+ path = fil_make_filepath(data_dir_path, table->name, CFG,
+ data_dir_path != nullptr);
+ ut_a(path);
+ len = strlen(path);
+ ut_a(max_len >= len);
+
+ strcpy(filename, path);
+
+ ut_free(path);
+}