diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:07:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:07:14 +0000 |
commit | a175314c3e5827eb193872241446f2f8f5c9d33c (patch) | |
tree | cd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/innobase/srv | |
parent | Initial commit. (diff) | |
download | mariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.tar.xz mariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.zip |
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/srv')
-rw-r--r-- | storage/innobase/srv/srv0mon.cc | 2108 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 2135 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 2168 |
3 files changed, 6411 insertions, 0 deletions
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc new file mode 100644 index 00000000..f13af13c --- /dev/null +++ b/storage/innobase/srv/srv0mon.cc @@ -0,0 +1,2108 @@ +/***************************************************************************** + +Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, Facebook Inc. +Copyright (c) 2013, 2021, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file srv/srv0mon.cc +Database monitor counter interfaces + +Created 12/9/2009 Jimmy Yang +*******************************************************/ + +#include "buf0buf.h" +#include "dict0mem.h" +#include "ibuf0ibuf.h" +#include "lock0lock.h" +#include "mach0data.h" +#include "os0file.h" +#include "srv0mon.h" +#include "srv0srv.h" +#include "trx0rseg.h" +#include "trx0sys.h" + +/* Macro to standardize the counter names for counters in the +"monitor_buf_page" module as they have very structured defines */ +#define MONITOR_BUF_PAGE(name, description, code, op, op_code) \ + {"buffer_page_" op "_" name, "buffer_page_io", \ + "Number of " description " Pages " op, \ + MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \ + MONITOR_##code##_##op_code} + +#define MONITOR_BUF_PAGE_READ(name, description, code) \ + MONITOR_BUF_PAGE(name, description, code, "read", PAGE_READ) + +#define MONITOR_BUF_PAGE_WRITTEN(name, description, code) \ + MONITOR_BUF_PAGE(name, description, code, "written", PAGE_WRITTEN) + +/** This array defines basic static information of monitor counters, +including each monitor's name, module it belongs to, a short +description and its property/type and corresponding monitor_id. +Please note: If you add a monitor here, please add its corresponding +monitor_id to "enum monitor_id_value" structure in srv0mon.h file. */ + +static monitor_info_t innodb_counter_info[] = +{ + /* A dummy item to mark the module start, this is + to accomodate the default value (0) set for the + global variables with the control system. */ + {"module_start", "module_start", "module_start", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_DEFAULT_START}, + + /* ========== Counters for Server Metadata ========== */ + {"module_metadata", "metadata", "Server Metadata", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_METADATA}, + + {"metadata_table_handles_opened", "metadata", + "Number of table handles opened", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLE_OPEN}, + + {"metadata_table_handles_closed", "metadata", + "Number of table handles closed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLE_CLOSE}, + + {"metadata_table_reference_count", "metadata", + "Table reference counter", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLE_REFERENCE}, + + /* ========== Counters for Lock Module ========== */ + {"module_lock", "lock", "Lock Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_LOCK}, + + {"lock_deadlocks", "lock", "Number of deadlocks", + MONITOR_DEFAULT_ON, + MONITOR_DEFAULT_START, MONITOR_DEADLOCK}, + + {"lock_timeouts", "lock", "Number of lock timeouts", + MONITOR_DEFAULT_ON, + MONITOR_DEFAULT_START, MONITOR_TIMEOUT}, + + {"lock_rec_lock_waits", "lock", + "Number of times enqueued into record lock wait queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LOCKREC_WAIT}, + + {"lock_table_lock_waits", "lock", + "Number of times enqueued into table lock wait queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLELOCK_WAIT}, + + {"lock_rec_lock_requests", "lock", + "Number of record locks requested", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK_REQ}, + + {"lock_rec_lock_created", "lock", "Number of record locks created", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_RECLOCK_CREATED}, + + {"lock_rec_lock_removed", "lock", + "Number of record locks removed from the lock queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_RECLOCK_REMOVED}, + + {"lock_rec_locks", "lock", + "Current number of record locks on tables", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK}, + + {"lock_table_lock_created", "lock", "Number of table locks created", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLELOCK_CREATED}, + + {"lock_table_lock_removed", "lock", + "Number of table locks removed from the lock queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLELOCK_REMOVED}, + + {"lock_table_locks", "lock", + "Current number of table locks on tables", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_TABLELOCK}, + + {"lock_row_lock_current_waits", "lock", + "Number of row locks currently being waited for" + " (innodb_row_lock_current_waits)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT}, + + {"lock_row_lock_time", "lock", + "Time spent in acquiring row locks, in milliseconds" + " (innodb_row_lock_time)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_WAIT_TIME}, + + {"lock_row_lock_time_max", "lock", + "The maximum time to acquire a row lock, in milliseconds" + " (innodb_row_lock_time_max)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_MAX_WAIT_TIME}, + + {"lock_row_lock_waits", "lock", + "Number of times a row lock had to be waited for" + " (innodb_row_lock_waits)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_WAIT}, + + {"lock_row_lock_time_avg", "lock", + "The average time to acquire a row lock, in milliseconds" + " (innodb_row_lock_time_avg)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_AVG_WAIT_TIME}, + + /* ========== Counters for Buffer Manager and I/O ========== */ + {"module_buffer", "buffer", "Buffer Manager Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_BUFFER}, + + {"buffer_pool_size", "server", + "Server buffer pool size (all buffer pools) in bytes", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUFFER_POOL_SIZE}, + + {"buffer_pool_reads", "buffer", + "Number of reads directly from disk (innodb_buffer_pool_reads)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READS}, + + {"buffer_pool_read_requests", "buffer", + "Number of logical read requests (innodb_buffer_pool_read_requests)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_REQUESTS}, + + {"buffer_pool_write_requests", "buffer", + "Number of write requests (innodb_buffer_pool_write_requests)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST}, + + {"buffer_pool_wait_free", "buffer", + "Number of times waited for free buffer" + " (innodb_buffer_pool_wait_free)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WAIT_FREE}, + + {"buffer_pool_read_ahead", "buffer", + "Number of pages read as read ahead (innodb_buffer_pool_read_ahead)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD}, + + {"buffer_pool_read_ahead_evicted", "buffer", + "Read-ahead pages evicted without being accessed" + " (innodb_buffer_pool_read_ahead_evicted)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED}, + + {"buffer_pool_pages_total", "buffer", + "Total buffer pool size in pages (innodb_buffer_pool_pages_total)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_TOTAL}, + + {"buffer_pool_pages_misc", "buffer", + "Buffer pages for misc use such as row locks or the adaptive" + " hash index (innodb_buffer_pool_pages_misc)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_MISC}, + + {"buffer_pool_pages_data", "buffer", + "Buffer pages containing data (innodb_buffer_pool_pages_data)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA}, + + {"buffer_pool_bytes_data", "buffer", + "Buffer bytes containing data (innodb_buffer_pool_bytes_data)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA}, + + {"buffer_pool_pages_dirty", "buffer", + "Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY}, + + {"buffer_pool_bytes_dirty", "buffer", + "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY}, + + {"buffer_pool_pages_free", "buffer", + "Buffer pages currently free (innodb_buffer_pool_pages_free)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_FREE}, + + {"buffer_pages_created", "buffer", + "Number of pages created (innodb_pages_created)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_CREATED}, + + {"buffer_pages_written", "buffer", + "Number of pages written (innodb_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN}, + + {"buffer_index_pages_written", "buffer", + "Number of index pages written (innodb_index_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN}, + + {"buffer_non_index_pages_written", "buffer", + "Number of non index pages written (innodb_non_index_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN}, + + {"buffer_pages_read", "buffer", + "Number of pages read (innodb_pages_read)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_READ}, + + {"buffer_index_sec_rec_cluster_reads", "buffer", + "Number of secondary record reads triggered cluster read", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS}, + + {"buffer_index_sec_rec_cluster_reads_avoided", "buffer", + "Number of secondary record reads avoided triggering cluster read", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED}, + + {"buffer_data_reads", "buffer", + "Amount of data read in bytes (innodb_data_reads)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_READ}, + + {"buffer_data_written", "buffer", + "Amount of data written in bytes (innodb_data_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_WRITTEN}, + + /* Cumulative counter for scanning in flush batches */ + {"buffer_flush_batch_scanned", "buffer", + "Total pages scanned as part of flush batch", + MONITOR_SET_OWNER, + MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL, + MONITOR_FLUSH_BATCH_SCANNED}, + + {"buffer_flush_batch_num_scan", "buffer", + "Number of times buffer flush list flush is called", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED, + MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL}, + + {"buffer_flush_batch_scanned_per_call", "buffer", + "Pages scanned per flush batch scan", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED, + MONITOR_FLUSH_BATCH_SCANNED_PER_CALL}, + + /* Cumulative counter for pages flushed in flush batches */ + {"buffer_flush_batch_total_pages", "buffer", + "Total pages flushed as part of flush batch", + MONITOR_SET_OWNER, MONITOR_FLUSH_BATCH_COUNT, + MONITOR_FLUSH_BATCH_TOTAL_PAGE}, + + {"buffer_flush_batches", "buffer", + "Number of flush batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE, + MONITOR_FLUSH_BATCH_COUNT}, + + {"buffer_flush_batch_pages", "buffer", + "Pages queued as a flush batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE, + MONITOR_FLUSH_BATCH_PAGES}, + + /* Cumulative counter for flush batches because of neighbor */ + {"buffer_flush_neighbor_total_pages", "buffer", + "Total neighbors flushed as part of neighbor flush", + MONITOR_SET_OWNER, MONITOR_FLUSH_NEIGHBOR_COUNT, + MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE}, + + {"buffer_flush_neighbor", "buffer", + "Number of times neighbors flushing is invoked", + MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, + MONITOR_FLUSH_NEIGHBOR_COUNT}, + + {"buffer_flush_neighbor_pages", "buffer", + "Pages queued as a neighbor batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, + MONITOR_FLUSH_NEIGHBOR_PAGES}, + + {"buffer_flush_n_to_flush_requested", "buffer", + "Number of pages requested for flushing.", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED}, + + {"buffer_flush_n_to_flush_by_age", "buffer", + "Number of pages target by LSN Age for flushing.", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_BY_AGE}, + + {"buffer_flush_adaptive_avg_time", "buffer", + "Avg time (ms) spent for adaptive flushing recently.", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME}, + + {"buffer_flush_adaptive_avg_pass", "buffer", + "Number of adaptive flushes passed during the recent Avg period.", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_PASS}, + + {"buffer_LRU_get_free_loops", "buffer", + "Total loops in LRU get free.", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_LOOPS}, + + {"buffer_LRU_get_free_waits", "buffer", + "Total sleep waits in LRU get free.", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_WAITS}, + + {"buffer_flush_avg_page_rate", "buffer", + "Average number of pages at which flushing is happening", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE}, + + {"buffer_flush_lsn_avg_rate", "buffer", + "Average redo generation rate", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE}, + + {"buffer_flush_pct_for_dirty", "buffer", + "Percent of IO capacity used to avoid max dirty page limit", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY}, + + {"buffer_flush_pct_for_lsn", "buffer", + "Percent of IO capacity used to avoid reusable redo space limit", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN}, + + {"buffer_flush_sync_waits", "buffer", + "Number of times a wait happens due to sync flushing", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS}, + + /* Cumulative counter for flush batches for adaptive flushing */ + {"buffer_flush_adaptive_total_pages", "buffer", + "Total pages flushed as part of adaptive flushing", + MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT, + MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE}, + + {"buffer_flush_adaptive", "buffer", + "Number of adaptive batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, + MONITOR_FLUSH_ADAPTIVE_COUNT}, + + {"buffer_flush_adaptive_pages", "buffer", + "Pages queued as an adaptive batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, + MONITOR_FLUSH_ADAPTIVE_PAGES}, + + /* Cumulative counter for flush batches because of sync */ + {"buffer_flush_sync_total_pages", "buffer", + "Total pages flushed as part of sync batches", + MONITOR_SET_OWNER, MONITOR_FLUSH_SYNC_COUNT, + MONITOR_FLUSH_SYNC_TOTAL_PAGE}, + + {"buffer_flush_sync", "buffer", + "Number of sync batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE, + MONITOR_FLUSH_SYNC_COUNT}, + + {"buffer_flush_sync_pages", "buffer", + "Pages queued as a sync batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE, + MONITOR_FLUSH_SYNC_PAGES}, + + /* Cumulative counter for flush batches because of background */ + {"buffer_flush_background_total_pages", "buffer", + "Total pages flushed as part of background batches", + MONITOR_SET_OWNER, MONITOR_FLUSH_BACKGROUND_COUNT, + MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE}, + + {"buffer_flush_background", "buffer", + "Number of background batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, + MONITOR_FLUSH_BACKGROUND_COUNT}, + + {"buffer_flush_background_pages", "buffer", + "Pages queued as a background batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, + MONITOR_FLUSH_BACKGROUND_PAGES}, + + /* Cumulative counter for LRU batch scan */ + {"buffer_LRU_batch_scanned", "buffer", + "Total pages scanned as part of LRU batch", + MONITOR_SET_OWNER, MONITOR_LRU_BATCH_SCANNED_NUM_CALL, + MONITOR_LRU_BATCH_SCANNED}, + + {"buffer_LRU_batch_num_scan", "buffer", + "Number of times LRU batch is called", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED, + MONITOR_LRU_BATCH_SCANNED_NUM_CALL}, + + {"buffer_LRU_batch_scanned_per_call", "buffer", + "Pages scanned per LRU batch call", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED, + MONITOR_LRU_BATCH_SCANNED_PER_CALL}, + + /* Cumulative counter for LRU batch pages flushed */ + {"buffer_LRU_batch_flush_total_pages", "buffer", + "Total pages flushed as part of LRU batches", + MONITOR_SET_OWNER, MONITOR_LRU_BATCH_FLUSH_COUNT, + MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE}, + + {"buffer_LRU_batches_flush", "buffer", + "Number of LRU batches", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, + MONITOR_LRU_BATCH_FLUSH_COUNT}, + + {"buffer_LRU_batch_flush_pages", "buffer", + "Pages queued as an LRU batch", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, + MONITOR_LRU_BATCH_FLUSH_PAGES}, + + /* Cumulative counter for LRU batch pages flushed */ + {"buffer_LRU_batch_evict_total_pages", "buffer", + "Total pages evicted as part of LRU batches", + MONITOR_SET_OWNER, MONITOR_LRU_BATCH_EVICT_COUNT, + MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE}, + + {"buffer_LRU_batches_evict", "buffer", + "Number of LRU batches", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, + MONITOR_LRU_BATCH_EVICT_COUNT}, + + {"buffer_LRU_batch_evict_pages", "buffer", + "Pages queued as an LRU batch", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, + MONITOR_LRU_BATCH_EVICT_PAGES}, + + {"buffer_LRU_single_flush_failure_count", "Buffer", + "Number of times attempt to flush a single page from LRU failed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT}, + + {"buffer_LRU_get_free_search", "Buffer", + "Number of searches performed for a clean page", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_SEARCH}, + + /* Cumulative counter for LRU search scans */ + {"buffer_LRU_search_scanned", "buffer", + "Total pages scanned as part of LRU search", + MONITOR_SET_OWNER, + MONITOR_LRU_SEARCH_SCANNED_NUM_CALL, + MONITOR_LRU_SEARCH_SCANNED}, + + {"buffer_LRU_search_num_scan", "buffer", + "Number of times LRU search is performed", + MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED, + MONITOR_LRU_SEARCH_SCANNED_NUM_CALL}, + + {"buffer_LRU_search_scanned_per_call", "buffer", + "Page scanned per single LRU search", + MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED, + MONITOR_LRU_SEARCH_SCANNED_PER_CALL}, + + /* Cumulative counter for LRU unzip search scans */ + {"buffer_LRU_unzip_search_scanned", "buffer", + "Total pages scanned as part of LRU unzip search", + MONITOR_SET_OWNER, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL, + MONITOR_LRU_UNZIP_SEARCH_SCANNED}, + + {"buffer_LRU_unzip_search_num_scan", "buffer", + "Number of times LRU unzip search is performed", + MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL}, + + {"buffer_LRU_unzip_search_scanned_per_call", "buffer", + "Page scanned per single LRU unzip search", + MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL}, + + /* ========== Counters for Buffer Page I/O ========== */ + {"module_buffer_page", "buffer_page_io", "Buffer Page I/O Module", + static_cast<monitor_type_t>( + MONITOR_MODULE | MONITOR_GROUP_MODULE), + MONITOR_DEFAULT_START, MONITOR_MODULE_BUF_PAGE}, + + MONITOR_BUF_PAGE_READ("index_leaf","Index Leaf", INDEX_LEAF), + + MONITOR_BUF_PAGE_READ("index_non_leaf","Index Non-leaf", + INDEX_NON_LEAF), + + MONITOR_BUF_PAGE_READ("index_ibuf_leaf", "Insert Buffer Index Leaf", + INDEX_IBUF_LEAF), + + MONITOR_BUF_PAGE_READ("index_ibuf_non_leaf", + "Insert Buffer Index Non-Leaf", + INDEX_IBUF_NON_LEAF), + + MONITOR_BUF_PAGE_READ("undo_log", "Undo Log", UNDO_LOG), + + MONITOR_BUF_PAGE_READ("index_inode", "Index Inode", INODE), + + MONITOR_BUF_PAGE_READ("ibuf_free_list", "Insert Buffer Free List", + IBUF_FREELIST), + + MONITOR_BUF_PAGE_READ("ibuf_bitmap", "Insert Buffer Bitmap", + IBUF_BITMAP), + + MONITOR_BUF_PAGE_READ("system_page", "System", SYSTEM), + + MONITOR_BUF_PAGE_READ("trx_system", "Transaction System", TRX_SYSTEM), + + MONITOR_BUF_PAGE_READ("fsp_hdr", "File Space Header", FSP_HDR), + + MONITOR_BUF_PAGE_READ("xdes", "Extent Descriptor", XDES), + + MONITOR_BUF_PAGE_READ("blob", "Uncompressed BLOB", BLOB), + + MONITOR_BUF_PAGE_READ("zblob", "First Compressed BLOB", ZBLOB), + + MONITOR_BUF_PAGE_READ("zblob2", "Subsequent Compressed BLOB", ZBLOB2), + + MONITOR_BUF_PAGE_READ("other", "other/unknown (old version of InnoDB)", + OTHER), + + MONITOR_BUF_PAGE_WRITTEN("index_leaf","Index Leaf", INDEX_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("index_non_leaf","Index Non-leaf", + INDEX_NON_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("index_ibuf_leaf", "Insert Buffer Index Leaf", + INDEX_IBUF_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("index_ibuf_non_leaf", + "Insert Buffer Index Non-Leaf", + INDEX_IBUF_NON_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("undo_log", "Undo Log", UNDO_LOG), + + MONITOR_BUF_PAGE_WRITTEN("index_inode", "Index Inode", INODE), + + MONITOR_BUF_PAGE_WRITTEN("ibuf_free_list", "Insert Buffer Free List", + IBUF_FREELIST), + + MONITOR_BUF_PAGE_WRITTEN("ibuf_bitmap", "Insert Buffer Bitmap", + IBUF_BITMAP), + + MONITOR_BUF_PAGE_WRITTEN("system_page", "System", SYSTEM), + + MONITOR_BUF_PAGE_WRITTEN("trx_system", "Transaction System", + TRX_SYSTEM), + + MONITOR_BUF_PAGE_WRITTEN("fsp_hdr", "File Space Header", FSP_HDR), + + MONITOR_BUF_PAGE_WRITTEN("xdes", "Extent Descriptor", XDES), + + MONITOR_BUF_PAGE_WRITTEN("blob", "Uncompressed BLOB", BLOB), + + MONITOR_BUF_PAGE_WRITTEN("zblob", "First Compressed BLOB", ZBLOB), + + MONITOR_BUF_PAGE_WRITTEN("zblob2", "Subsequent Compressed BLOB", + ZBLOB2), + + MONITOR_BUF_PAGE_WRITTEN("other", "other/unknown (old version InnoDB)", + OTHER), + + /* ========== Counters for OS level operations ========== */ + {"module_os", "os", "OS Level Operation", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_OS}, + + {"os_data_reads", "os", + "Number of reads initiated (innodb_data_reads)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_READ}, + + {"os_data_writes", "os", + "Number of writes initiated (innodb_data_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_WRITE}, + + {"os_data_fsyncs", "os", + "Number of fsync() calls (innodb_data_fsyncs)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FSYNC}, + + {"os_pending_reads", "os", "Number of reads pending", + MONITOR_DEFAULT_ON, + MONITOR_DEFAULT_START, MONITOR_OS_PENDING_READS}, + + {"os_pending_writes", "os", "Number of writes pending", + MONITOR_DEFAULT_ON, + MONITOR_DEFAULT_START, MONITOR_OS_PENDING_WRITES}, + + {"os_log_bytes_written", "os", + "Bytes of log written (innodb_os_log_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_WRITTEN}, + + {"os_log_fsyncs", "os", + "Number of fsync log writes (innodb_os_log_fsyncs)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_FSYNC}, + + {"os_log_pending_fsyncs", "os", + "Number of pending fsync write (innodb_os_log_pending_fsyncs)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_FSYNC}, + + {"os_log_pending_writes", "os", + "Number of pending log file writes (innodb_os_log_pending_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_WRITES}, + + /* ========== Counters for Transaction Module ========== */ + {"module_trx", "transaction", "Transaction Manager", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_TRX}, + + {"trx_rw_commits", "transaction", + "Number of read-write transactions committed", + MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT}, + + {"trx_ro_commits", "transaction", + "Number of read-only transactions committed", + MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT}, + + {"trx_nl_ro_commits", "transaction", + "Number of non-locking auto-commit read-only transactions committed", + MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT}, + + {"trx_commits_insert_update", "transaction", + "Number of transactions committed with inserts and updates", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_COMMIT_UNDO}, + + {"trx_rollbacks", "transaction", + "Number of transactions rolled back", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK}, + + {"trx_rollbacks_savepoint", "transaction", + "Number of transactions rolled back to savepoint", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_SAVEPOINT}, + + {"trx_active_transactions", "transaction", + "Number of active transactions", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_ACTIVE}, + + {"trx_rseg_history_len", "transaction", + "Length of the TRX_RSEG_HISTORY list", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_RSEG_HISTORY_LEN}, + + {"trx_undo_slots_used", "transaction", "Number of undo slots used", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_USED}, + + {"trx_undo_slots_cached", "transaction", + "Number of undo slots cached", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_CACHED}, + + {"trx_rseg_current_size", "transaction", + "Current rollback segment size in pages", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_RSEG_CUR_SIZE}, + + /* ========== Counters for Purge Module ========== */ + {"module_purge", "purge", "Purge Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_PURGE}, + + {"purge_del_mark_records", "purge", + "Number of delete-marked rows purged", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE}, + + {"purge_upd_exist_or_extern_records", "purge", + "Number of purges on updates of existing records and" + " updates on delete marked record with externally stored field", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN}, + + {"purge_invoked", "purge", + "Number of times purge was invoked", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PURGE_INVOKED}, + + {"purge_undo_log_pages", "purge", + "Number of undo log pages handled by the purge", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PURGE_N_PAGE_HANDLED}, + + {"purge_dml_delay_usec", "purge", + "Microseconds DML to be delayed due to purge lagging", + MONITOR_DISPLAY_CURRENT, + MONITOR_DEFAULT_START, MONITOR_DML_PURGE_DELAY}, + + {"purge_stop_count", "purge", + "Number of times purge was stopped", + MONITOR_DISPLAY_CURRENT, + MONITOR_DEFAULT_START, MONITOR_PURGE_STOP_COUNT}, + + {"purge_resume_count", "purge", + "Number of times purge was resumed", + MONITOR_DISPLAY_CURRENT, + MONITOR_DEFAULT_START, MONITOR_PURGE_RESUME_COUNT}, + + /* ========== Counters for Recovery Module ========== */ + {"module_log", "recovery", "Recovery Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_RECOVERY}, + + {"log_checkpoints", "recovery", "Number of checkpoints", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_CHECKPOINT}, + + {"log_lsn_last_flush", "recovery", "LSN of Last flush", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_FLUSHDISK}, + + {"log_lsn_last_checkpoint", "recovery", "LSN at last checkpoint", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CHECKPOINT}, + + {"log_lsn_current", "recovery", "Current LSN value", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CURRENT}, + + {"log_lsn_checkpoint_age", "recovery", + "Current LSN value minus LSN at last checkpoint", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_LSN_CHECKPOINT_AGE}, + + {"log_lsn_buf_pool_oldest", "recovery", + "The oldest modified block LSN in the buffer pool", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_OLDEST_LSN}, + + {"log_max_modified_age_async", "recovery", + "Maximum LSN difference; when exceeded, start asynchronous preflush", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_ASYNC}, + + {"log_pending_log_flushes", "recovery", "Pending log flushes", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_FLUSH}, + + {"log_pending_checkpoint_writes", "recovery", "Pending checkpoints", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_PENDING_CHECKPOINT_WRITE}, + + {"log_num_log_io", "recovery", "Number of log I/Os", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_LOG_IO}, + + {"log_waits", "recovery", + "Number of log waits due to small log buffer (innodb_log_waits)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WAITS}, + + {"log_write_requests", "recovery", + "Number of log write requests (innodb_log_write_requests)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITE_REQUEST}, + + {"log_writes", "recovery", + "Number of log writes (innodb_log_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES}, + + {"log_padded", "recovery", + "Bytes of log padded for log write ahead", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_PADDED}, + + /* ========== Counters for Page Compression ========== */ + {"module_compress", "compression", "Page Compression Info", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_PAGE}, + + {"compress_pages_compressed", "compression", + "Number of pages compressed", MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAGE_COMPRESS}, + + {"compress_pages_decompressed", "compression", + "Number of pages decompressed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS}, + + {"compression_pad_increments", "compression", + "Number of times padding is incremented to avoid compression failures", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS}, + + {"compression_pad_decrements", "compression", + "Number of times padding is decremented due to good compressibility", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS}, + + {"compress_saved", "compression", + "Number of bytes saved by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED}, + + {"compress_pages_page_compressed", "compression", + "Number of pages compressed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED}, + + {"compress_page_compressed_trim_op", "compression", + "Number of TRIM operation performed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP}, + + {"compress_pages_page_decompressed", "compression", + "Number of pages decompressed by page compression", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED}, + + {"compress_pages_page_compression_error", "compression", + "Number of page compression errors", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR}, + + {"compress_pages_encrypted", "compression", + "Number of pages encrypted", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_ENCRYPTED}, + + {"compress_pages_decrypted", "compression", + "Number of pages decrypted", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_DECRYPTED}, + + /* ========== Counters for Index ========== */ + {"module_index", "index", "Index Manager", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_INDEX}, + + {"index_page_splits", "index", "Number of index page splits", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_SPLIT}, + + {"index_page_merge_attempts", "index", + "Number of index page merge attempts", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_ATTEMPTS}, + + {"index_page_merge_successful", "index", + "Number of successful index page merges", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_SUCCESSFUL}, + + {"index_page_reorg_attempts", "index", + "Number of index page reorganization attempts", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_ATTEMPTS}, + + {"index_page_reorg_successful", "index", + "Number of successful index page reorganizations", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_SUCCESSFUL}, + + {"index_page_discards", "index", "Number of index pages discarded", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_DISCARD}, + +#ifdef BTR_CUR_HASH_ADAPT + /* ========== Counters for Adaptive Hash Index ========== */ + {"module_adaptive_hash", "adaptive_hash_index", "Adaptive Hash Index", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_ADAPTIVE_HASH}, + + {"adaptive_hash_searches", "adaptive_hash_index", + "Number of successful searches using Adaptive Hash Index", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH}, +#endif /* BTR_CUR_HASH_ADAPT */ + + {"adaptive_hash_searches_btree", "adaptive_hash_index", + "Number of searches using B-tree on an index search", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE}, + +#ifdef BTR_CUR_HASH_ADAPT + {"adaptive_hash_pages_added", "adaptive_hash_index", + "Number of index pages on which the Adaptive Hash Index is built", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_ADDED}, + + {"adaptive_hash_pages_removed", "adaptive_hash_index", + "Number of index pages whose corresponding Adaptive Hash Index" + " entries were removed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_REMOVED}, + + {"adaptive_hash_rows_added", "adaptive_hash_index", + "Number of Adaptive Hash Index rows added", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_ADDED}, + + {"adaptive_hash_rows_removed", "adaptive_hash_index", + "Number of Adaptive Hash Index rows removed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVED}, + + {"adaptive_hash_rows_deleted_no_hash_entry", "adaptive_hash_index", + "Number of rows deleted that did not have corresponding Adaptive Hash" + " Index entries", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND}, + + {"adaptive_hash_rows_updated", "adaptive_hash_index", + "Number of Adaptive Hash Index rows updated", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_UPDATED}, +#endif /* BTR_CUR_HASH_ADAPT */ + + /* ========== Counters for tablespace ========== */ + {"module_file", "file_system", "Tablespace and File System Manager", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_FIL_SYSTEM}, + + {"file_num_open_files", "file_system", + "Number of files currently open (innodb_num_open_files)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_N_FILE_OPENED}, + + /* ========== Counters for Change Buffer ========== */ + {"module_ibuf_system", "change_buffer", "InnoDB Change Buffer", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_IBUF_SYSTEM}, + + {"ibuf_merges_insert", "change_buffer", + "Number of inserted records merged by change buffering", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_INSERT}, + + {"ibuf_merges_delete_mark", "change_buffer", + "Number of deleted records merged by change buffering", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DELETE}, + + {"ibuf_merges_delete", "change_buffer", + "Number of purge records merged by change buffering", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_PURGE}, + + {"ibuf_merges_discard_insert", "change_buffer", + "Number of insert merged operations discarded", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT}, + + {"ibuf_merges_discard_delete_mark", "change_buffer", + "Number of deleted merged operations discarded", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE}, + + {"ibuf_merges_discard_delete", "change_buffer", + "Number of purge merged operations discarded", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE}, + + {"ibuf_merges", "change_buffer", "Number of change buffer merges", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGES}, + + {"ibuf_size", "change_buffer", "Change buffer size in pages", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_SIZE}, + + /* ========== Counters for server operations ========== */ + {"module_innodb", "innodb", + "Counter for general InnoDB server wide operations and properties", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_SERVER}, + + {"innodb_master_thread_sleeps", "server", + "Number of times (seconds) master thread sleeps", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_MASTER_THREAD_SLEEP}, + + {"innodb_activity_count", "server", "Current server activity count", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_SERVER_ACTIVITY}, + + {"innodb_master_active_loops", "server", + "Number of times master thread performs its tasks when" + " server is active", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_MASTER_ACTIVE_LOOPS}, + + {"innodb_master_idle_loops", "server", + "Number of times master thread performs its tasks when server is idle", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_MASTER_IDLE_LOOPS}, + + {"innodb_background_drop_table_usec", "server", + "Time (in microseconds) spent to process drop table list", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND}, + + {"innodb_log_flush_usec", "server", + "Time (in microseconds) spent to flush log records", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_LOG_FLUSH_MICROSECOND}, + + {"innodb_dict_lru_usec", "server", + "Time (in microseconds) spent to process DICT LRU list", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_MICROSECOND}, + + {"innodb_dict_lru_count_active", "server", + "Number of tables evicted from DICT LRU list in the active loop", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE}, + + {"innodb_dict_lru_count_idle", "server", + "Number of tables evicted from DICT LRU list in the idle loop", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE}, + + {"innodb_dblwr_writes", "server", + "Number of doublewrite operations that have been performed" + " (innodb_dblwr_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_WRITES}, + + {"innodb_dblwr_pages_written", "server", + "Number of pages that have been written for doublewrite operations" + " (innodb_dblwr_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN}, + + {"innodb_page_size", "server", + "InnoDB page size in bytes (innodb_page_size)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_PAGE_SIZE}, + + {"innodb_rwlock_s_spin_waits", "server", + "Number of rwlock spin waits due to shared latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_WAITS}, + + {"innodb_rwlock_x_spin_waits", "server", + "Number of rwlock spin waits due to exclusive latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_WAITS}, + + {"innodb_rwlock_sx_spin_waits", "server", + "Number of rwlock spin waits due to sx latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS}, + + {"innodb_rwlock_s_spin_rounds", "server", + "Number of rwlock spin loop rounds due to shared latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS}, + + {"innodb_rwlock_x_spin_rounds", "server", + "Number of rwlock spin loop rounds due to exclusive latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS}, + + {"innodb_rwlock_sx_spin_rounds", "server", + "Number of rwlock spin loop rounds due to sx latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS}, + + {"innodb_rwlock_s_os_waits", "server", + "Number of OS waits due to shared latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_OS_WAITS}, + + {"innodb_rwlock_x_os_waits", "server", + "Number of OS waits due to exclusive latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_OS_WAITS}, + + {"innodb_rwlock_sx_os_waits", "server", + "Number of OS waits due to sx latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_OS_WAITS}, + + /* ========== Counters for DML operations ========== */ + {"module_dml", "dml", "Statistics for DMLs", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_DML_STATS}, + + {"dml_reads", "dml", "Number of rows read", + static_cast<monitor_type_t>(MONITOR_EXISTING), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_READ}, + + {"dml_inserts", "dml", "Number of rows inserted", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_INSERTED}, + + {"dml_deletes", "dml", "Number of rows deleted", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_DELETED}, + + {"dml_updates", "dml", "Number of rows updated", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_UPDTATED}, + + {"dml_system_reads", "dml", "Number of system rows read", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_READ}, + + {"dml_system_inserts", "dml", "Number of system rows inserted", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_INSERTED}, + + {"dml_system_deletes", "dml", "Number of system rows deleted", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_DELETED}, + + {"dml_system_updates", "dml", "Number of system rows updated", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_UPDATED}, + + /* ========== Counters for DDL operations ========== */ + {"module_ddl", "ddl", "Statistics for DDLs", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS}, + + {"ddl_background_drop_indexes", "ddl", + "Number of indexes waiting to be dropped after failed index creation", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX}, + + {"ddl_background_drop_tables", "ddl", + "Number of tables in background drop table list", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_TABLE}, + + {"ddl_online_create_index", "ddl", + "Number of indexes being created online", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX}, + + {"ddl_pending_alter_table", "ddl", + "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE}, + + {"ddl_sort_file_alter_table", "ddl", + "Number of sort files created during alter table", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_SORT_FILES}, + + {"ddl_log_file_alter_table", "ddl", + "Number of log files created during alter table", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_LOG_FILES}, + + /* ===== Counters for ICP (Index Condition Pushdown) Module ===== */ + {"module_icp", "icp", "Index Condition Pushdown", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_ICP}, + + {"icp_attempts", "icp", + "Number of attempts for index push-down condition checks", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_ATTEMPTS}, + + {"icp_no_match", "icp", "Index push-down condition does not match", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_NO_MATCH}, + + {"icp_out_of_range", "icp", "Index push-down condition out of range", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_OUT_OF_RANGE}, + + {"icp_match", "icp", "Index push-down condition matches", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_MATCH}, + + /* ========== Mutex monitoring on/off ========== */ + {"latch_status", "Latch counters", + "Collect latch counters to display via SHOW ENGING INNODB MUTEX", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_LATCHES}, + + {"latch", "sync", "Latch monitoring control", + MONITOR_HIDDEN, + MONITOR_DEFAULT_START, MONITOR_LATCHES}, + + /* ========== To turn on/off reset all counters ========== */ + {"all", "All Counters", "Turn on/off and reset all counters", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_ALL_COUNTER} +}; + +/* The "innodb_counter_value" array stores actual counter values */ +monitor_value_t innodb_counter_value[NUM_MONITOR]; + +/* monitor_set_tbl is used to record and determine whether a monitor +has been turned on/off. */ +Atomic_relaxed<ulint> + monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) / NUM_BITS_ULINT]; + +/****************************************************************//** +Get a monitor's "monitor_info" by its monitor id (index into the +innodb_counter_info array. +@return Point to corresponding monitor_info_t, or NULL if no such +monitor */ +monitor_info_t* +srv_mon_get_info( +/*=============*/ + monitor_id_t monitor_id) /*!< id indexing into the + innodb_counter_info array */ +{ + ut_a(monitor_id < NUM_MONITOR); + + return((monitor_id < NUM_MONITOR) + ? &innodb_counter_info[monitor_id] + : NULL); +} + +/****************************************************************//** +Get monitor's name by its monitor id (indexing into the +innodb_counter_info array. +@return corresponding monitor name, or NULL if no such +monitor */ +const char* +srv_mon_get_name( +/*=============*/ + monitor_id_t monitor_id) /*!< id index into the + innodb_counter_info array */ +{ + ut_a(monitor_id < NUM_MONITOR); + + return((monitor_id < NUM_MONITOR) + ? innodb_counter_info[monitor_id].monitor_name + : NULL); +} + +/****************************************************************//** +Turn on/off, reset monitor counters in a module. If module_id +is MONITOR_ALL_COUNTER then turn on all monitor counters. +turned on because it has already been turned on. */ +void +srv_mon_set_module_control( +/*=======================*/ + monitor_id_t module_id, /*!< in: Module ID as in + monitor_counter_id. If it is + set to MONITOR_ALL_COUNTER, this means + we shall turn on all the counters */ + mon_option_t set_option) /*!< in: Turn on/off reset the + counter */ +{ + lint ix; + lint start_id; + ibool set_current_module = FALSE; + + ut_a(module_id <= NUM_MONITOR); + compile_time_assert(array_elements(innodb_counter_info) + == NUM_MONITOR); + + /* The module_id must be an ID of MONITOR_MODULE type */ + ut_a(innodb_counter_info[module_id].monitor_type & MONITOR_MODULE); + + /* start with the first monitor in the module. If module_id + is MONITOR_ALL_COUNTER, this means we need to turn on all + monitor counters. */ + if (module_id == MONITOR_ALL_COUNTER) { + start_id = 1; + } else if (innodb_counter_info[module_id].monitor_type + & MONITOR_GROUP_MODULE) { + /* Counters in this module are set as a group together + and cannot be turned on/off individually. Need to set + the on/off bit in the module counter */ + start_id = module_id; + set_current_module = TRUE; + + } else { + start_id = module_id + 1; + } + + for (ix = start_id; ix < NUM_MONITOR; ix++) { + /* if we hit the next module counter, we will + continue if we want to turn on all monitor counters, + and break if just turn on the counters in the + current module. */ + if (innodb_counter_info[ix].monitor_type & MONITOR_MODULE) { + + if (set_current_module) { + /* Continue to set on/off bit on current + module */ + set_current_module = FALSE; + } else if (module_id == MONITOR_ALL_COUNTER) { + if (!(innodb_counter_info[ix].monitor_type + & MONITOR_GROUP_MODULE)) { + continue; + } + } else { + /* Hitting the next module, stop */ + break; + } + } + + /* Cannot turn on a monitor already been turned on. User + should be aware some counters are already on before + turn them on again (which could reset counter value) */ + if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) { + ib::info() << "Monitor '" + << srv_mon_get_name((monitor_id_t) ix) + << "' is already enabled."; + continue; + } + + /* For some existing counters (server status variables), + we will get its counter value at the start/stop time + to calculate the actual value during the time. */ + if (innodb_counter_info[ix].monitor_type & MONITOR_EXISTING) { + srv_mon_process_existing_counter( + static_cast<monitor_id_t>(ix), set_option); + } + + /* Currently support 4 operations on the monitor counters: + turn on, turn off, reset and reset all operations. */ + switch (set_option) { + case MONITOR_TURN_ON: + MONITOR_ON(ix); + MONITOR_INIT(ix); + MONITOR_SET_START(ix); + break; + + case MONITOR_TURN_OFF: + MONITOR_OFF(ix); + MONITOR_SET_OFF(ix); + break; + + case MONITOR_RESET_VALUE: + srv_mon_reset(static_cast<monitor_id_t>(ix)); + break; + + case MONITOR_RESET_ALL_VALUE: + srv_mon_reset_all(static_cast<monitor_id_t>(ix)); + break; + + default: + ut_error; + } + } +} + +/****************************************************************//** +Get transaction system's rollback segment size in pages +@return size in pages */ +static +ulint +srv_mon_get_rseg_size(void) +/*=======================*/ +{ + ulint i; + ulint value = 0; + + /* rseg_array is a static array, so we can go through it without + mutex protection. In addition, we provide an estimate of the + total rollback segment size and to avoid mutex contention we + don't acquire the rseg->mutex" */ + for (i = 0; i < TRX_SYS_N_RSEGS; ++i) { + const trx_rseg_t* rseg = trx_sys.rseg_array[i]; + + if (rseg != NULL) { + value += rseg->curr_size; + } + } + + return(value); +} + +/****************************************************************//** +This function consolidates some existing server counters used +by "system status variables". These existing system variables do not have +mechanism to start/stop and reset the counters, so we simulate these +controls by remembering the corresponding counter values when the +corresponding monitors are turned on/off/reset, and do appropriate +mathematics to deduct the actual value. Please also refer to +srv_export_innodb_status() for related global counters used by +the existing status variables.*/ +void +srv_mon_process_existing_counter( +/*=============================*/ + monitor_id_t monitor_id, /*!< in: the monitor's ID as in + monitor_counter_id */ + mon_option_t set_option) /*!< in: Turn on/off reset the + counter */ +{ + mon_type_t value; + monitor_info_t* monitor_info; + ibool update_min = FALSE; + + monitor_info = srv_mon_get_info(monitor_id); + + ut_a(monitor_info->monitor_type & MONITOR_EXISTING); + ut_a(monitor_id < NUM_MONITOR); + + /* Get the value from corresponding global variable */ + switch (monitor_id) { + /* export_vars.innodb_buffer_pool_reads. Num Reads from + disk (page not in buffer) */ + case MONITOR_OVLD_BUF_POOL_READS: + value = srv_stats.buf_pool_reads; + break; + + /* innodb_buffer_pool_read_requests, the number of logical + read requests */ + case MONITOR_OVLD_BUF_POOL_READ_REQUESTS: + value = buf_pool.stat.n_page_gets; + break; + + /* innodb_buffer_pool_write_requests, the number of + write request */ + case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST: + value = srv_stats.buf_pool_write_requests; + break; + + /* innodb_buffer_pool_wait_free */ + case MONITOR_OVLD_BUF_POOL_WAIT_FREE: + value = buf_pool.stat.LRU_waits; + break; + + /* innodb_buffer_pool_read_ahead */ + case MONITOR_OVLD_BUF_POOL_READ_AHEAD: + value = buf_pool.stat.n_ra_pages_read; + break; + + /* innodb_buffer_pool_read_ahead_evicted */ + case MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED: + value = buf_pool.stat.n_ra_pages_evicted; + break; + + /* innodb_buffer_pool_pages_total */ + case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL: + value = buf_pool.get_n_pages(); + break; + + /* innodb_buffer_pool_pages_misc */ + case MONITOR_OVLD_BUF_POOL_PAGE_MISC: + value = buf_pool.get_n_pages() + - UT_LIST_GET_LEN(buf_pool.LRU) + - UT_LIST_GET_LEN(buf_pool.free); + break; + + /* innodb_buffer_pool_pages_data */ + case MONITOR_OVLD_BUF_POOL_PAGES_DATA: + value = UT_LIST_GET_LEN(buf_pool.LRU); + break; + + /* innodb_buffer_pool_bytes_data */ + case MONITOR_OVLD_BUF_POOL_BYTES_DATA: + value = buf_pool.stat.LRU_bytes + + (UT_LIST_GET_LEN(buf_pool.unzip_LRU) + << srv_page_size_shift); + break; + + /* innodb_buffer_pool_pages_dirty */ + case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY: + value = UT_LIST_GET_LEN(buf_pool.flush_list); + break; + + /* innodb_buffer_pool_bytes_dirty */ + case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY: + value = buf_pool.stat.flush_list_bytes; + break; + + /* innodb_buffer_pool_pages_free */ + case MONITOR_OVLD_BUF_POOL_PAGES_FREE: + value = UT_LIST_GET_LEN(buf_pool.free); + break; + + /* innodb_pages_created, the number of pages created */ + case MONITOR_OVLD_PAGE_CREATED: + value = buf_pool.stat.n_pages_created; + break; + + /* innodb_pages_written, the number of page written */ + case MONITOR_OVLD_PAGES_WRITTEN: + value = buf_pool.stat.n_pages_written; + break; + + /* innodb_index_pages_written, the number of index pages written */ + case MONITOR_OVLD_INDEX_PAGES_WRITTEN: + value = srv_stats.index_pages_written; + break; + + /* innodb_non_index_pages_written, the number of non index pages written */ + case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN: + value = srv_stats.non_index_pages_written; + break; + + /* innodb_pages_read */ + case MONITOR_OVLD_PAGES_READ: + value = buf_pool.stat.n_pages_read; + break; + + /* Number of times secondary index lookup triggered cluster lookup */ + case MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS: + value = srv_stats.n_sec_rec_cluster_reads; + break; + /* Number of times prefix optimization avoided triggering cluster + lookup */ + case MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED: + value = srv_stats.n_sec_rec_cluster_reads_avoided; + break; + + /* innodb_data_reads, the total number of data reads */ + case MONITOR_OVLD_BYTE_READ: + value = srv_stats.data_read; + break; + + /* innodb_data_writes, the total number of data writes. */ + case MONITOR_OVLD_BYTE_WRITTEN: + value = srv_stats.data_written; + break; + + /* innodb_data_reads, the total number of data reads. */ + case MONITOR_OVLD_OS_FILE_READ: + value = os_n_file_reads; + break; + + /* innodb_data_writes, the total number of data writes*/ + case MONITOR_OVLD_OS_FILE_WRITE: + value = os_n_file_writes; + break; + + /* innodb_data_fsyncs, number of fsync() operations so far. */ + case MONITOR_OVLD_OS_FSYNC: + value = os_n_fsyncs; + break; + + /* innodb_os_log_written */ + case MONITOR_OVLD_OS_LOG_WRITTEN: + value = (mon_type_t) srv_stats.os_log_written; + break; + + /* innodb_os_log_fsyncs */ + case MONITOR_OVLD_OS_LOG_FSYNC: + value = log_sys.get_flushes(); + break; + + /* innodb_os_log_pending_fsyncs */ + case MONITOR_OVLD_OS_LOG_PENDING_FSYNC: + value = log_sys.get_pending_flushes(); + update_min = TRUE; + break; + + /* innodb_os_log_pending_writes */ + case MONITOR_OVLD_OS_LOG_PENDING_WRITES: + value = srv_stats.os_log_pending_writes; + update_min = TRUE; + break; + + /* innodb_log_waits */ + case MONITOR_OVLD_LOG_WAITS: + value = srv_stats.log_waits; + break; + + /* innodb_log_write_requests */ + case MONITOR_OVLD_LOG_WRITE_REQUEST: + value = srv_stats.log_write_requests; + break; + + /* innodb_log_writes */ + case MONITOR_OVLD_LOG_WRITES: + value = srv_stats.log_writes; + break; + + case MONITOR_OVLD_LOG_PADDED: + value = srv_stats.log_padded; + break; + + /* innodb_dblwr_writes */ + case MONITOR_OVLD_SRV_DBLWR_WRITES: + buf_dblwr.lock(); + value = buf_dblwr.batches(); + buf_dblwr.unlock(); + break; + + /* innodb_dblwr_pages_written */ + case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN: + buf_dblwr.lock(); + value = buf_dblwr.written(); + buf_dblwr.unlock(); + break; + + /* innodb_page_size */ + case MONITOR_OVLD_SRV_PAGE_SIZE: + value = srv_page_size; + break; + + case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS: + value = rw_lock_stats.rw_s_spin_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS: + value = rw_lock_stats.rw_x_spin_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS: + value = rw_lock_stats.rw_sx_spin_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS: + value = rw_lock_stats.rw_s_spin_round_count; + break; + + case MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS: + value = rw_lock_stats.rw_x_spin_round_count; + break; + + case MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS: + value = rw_lock_stats.rw_sx_spin_round_count; + break; + + case MONITOR_OVLD_RWLOCK_S_OS_WAITS: + value = rw_lock_stats.rw_s_os_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_X_OS_WAITS: + value = rw_lock_stats.rw_x_os_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_SX_OS_WAITS: + value = rw_lock_stats.rw_sx_os_wait_count; + break; + + case MONITOR_OVLD_BUFFER_POOL_SIZE: + value = srv_buf_pool_size; + break; + + /* innodb_rows_read */ + case MONITOR_OLVD_ROW_READ: + value = srv_stats.n_rows_read; + break; + + /* innodb_rows_inserted */ + case MONITOR_OLVD_ROW_INSERTED: + value = srv_stats.n_rows_inserted; + break; + + /* innodb_rows_deleted */ + case MONITOR_OLVD_ROW_DELETED: + value = srv_stats.n_rows_deleted; + break; + + /* innodb_rows_updated */ + case MONITOR_OLVD_ROW_UPDTATED: + value = srv_stats.n_rows_updated; + break; + + /* innodb_system_rows_read */ + case MONITOR_OLVD_SYSTEM_ROW_READ: + value = srv_stats.n_system_rows_read; + break; + + /* innodb_system_rows_inserted */ + case MONITOR_OLVD_SYSTEM_ROW_INSERTED: + value = srv_stats.n_system_rows_inserted; + break; + + /* innodb_system_rows_deleted */ + case MONITOR_OLVD_SYSTEM_ROW_DELETED: + value = srv_stats.n_system_rows_deleted; + break; + + /* innodb_system_rows_updated */ + case MONITOR_OLVD_SYSTEM_ROW_UPDATED: + value = srv_stats.n_system_rows_updated; + break; + + /* innodb_row_lock_current_waits */ + case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT: + value = srv_stats.n_lock_wait_current_count; + break; + + /* innodb_row_lock_time */ + case MONITOR_OVLD_LOCK_WAIT_TIME: + value = srv_stats.n_lock_wait_time / 1000; + break; + + /* innodb_row_lock_time_max */ + case MONITOR_OVLD_LOCK_MAX_WAIT_TIME: + value = lock_sys.n_lock_max_wait_time / 1000; + break; + + /* innodb_row_lock_time_avg */ + case MONITOR_OVLD_LOCK_AVG_WAIT_TIME: + if (srv_stats.n_lock_wait_count > 0) { + value = srv_stats.n_lock_wait_time / 1000 + / srv_stats.n_lock_wait_count; + } else { + value = 0; + } + break; + + /* innodb_row_lock_waits */ + case MONITOR_OVLD_ROW_LOCK_WAIT: + value = srv_stats.n_lock_wait_count; + break; + + case MONITOR_RSEG_HISTORY_LEN: + value = trx_sys.rseg_history_len; + break; + + case MONITOR_RSEG_CUR_SIZE: + value = srv_mon_get_rseg_size(); + break; + + case MONITOR_OVLD_N_FILE_OPENED: + value = fil_system.n_open; + break; + + case MONITOR_OVLD_IBUF_MERGE_INSERT: + value = ibuf.n_merged_ops[IBUF_OP_INSERT]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DELETE: + value = ibuf.n_merged_ops[IBUF_OP_DELETE_MARK]; + break; + + case MONITOR_OVLD_IBUF_MERGE_PURGE: + value = ibuf.n_merged_ops[IBUF_OP_DELETE]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT: + value = ibuf.n_discarded_ops[IBUF_OP_INSERT]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE: + value = ibuf.n_discarded_ops[IBUF_OP_DELETE_MARK]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE: + value = ibuf.n_discarded_ops[IBUF_OP_DELETE]; + break; + + case MONITOR_OVLD_IBUF_MERGES: + value = ibuf.n_merges; + break; + + case MONITOR_OVLD_IBUF_SIZE: + value = ibuf.size; + break; + + case MONITOR_OVLD_SERVER_ACTIVITY: + value = srv_get_activity_count(); + break; + + case MONITOR_OVLD_LSN_FLUSHDISK: + value = log_sys.get_flushed_lsn(); + break; + + case MONITOR_OVLD_LSN_CURRENT: + value = log_sys.get_lsn(); + break; + + case MONITOR_PENDING_LOG_FLUSH: + value = static_cast<mon_type_t>(log_sys.pending_flushes); + + break; + + case MONITOR_PENDING_CHECKPOINT_WRITE: + mysql_mutex_lock(&log_sys.mutex); + value = static_cast<mon_type_t>( + log_sys.n_pending_checkpoint_writes); + mysql_mutex_unlock(&log_sys.mutex); + break; + + case MONITOR_LOG_IO: + mysql_mutex_lock(&log_sys.mutex); + value = static_cast<mon_type_t>(log_sys.n_log_ios); + mysql_mutex_unlock(&log_sys.mutex); + break; + + case MONITOR_LSN_CHECKPOINT_AGE: + mysql_mutex_lock(&log_sys.mutex); + value = static_cast<mon_type_t>(log_sys.get_lsn() + - log_sys.last_checkpoint_lsn); + mysql_mutex_unlock(&log_sys.mutex); + break; + + case MONITOR_OVLD_BUF_OLDEST_LSN: + mysql_mutex_lock(&buf_pool.flush_list_mutex); + value = (mon_type_t) buf_pool.get_oldest_modification(0); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + break; + + case MONITOR_OVLD_LSN_CHECKPOINT: + value = (mon_type_t) log_sys.last_checkpoint_lsn; + break; + + case MONITOR_OVLD_MAX_AGE_ASYNC: + value = log_sys.max_modified_age_async; + break; + +#ifdef BTR_CUR_HASH_ADAPT + case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH: + value = btr_cur_n_sea; + break; +#endif /* BTR_CUR_HASH_ADAPT */ + + case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE: + value = btr_cur_n_non_sea; + break; + + case MONITOR_OVLD_PAGE_COMPRESS_SAVED: + value = srv_stats.page_compression_saved; + break; + case MONITOR_OVLD_PAGES_PAGE_COMPRESSED: + value = srv_stats.pages_page_compressed; + break; + case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP: + value = srv_stats.page_compressed_trim_op; + break; + case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED: + value = srv_stats.pages_page_decompressed; + break; + case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR: + value = srv_stats.pages_page_compression_error; + break; + case MONITOR_OVLD_PAGES_ENCRYPTED: + value = srv_stats.pages_encrypted; + break; + case MONITOR_OVLD_PAGES_DECRYPTED: + value = srv_stats.pages_decrypted; + break; + + default: + ut_error; + } + + switch (set_option) { + case MONITOR_TURN_ON: + /* Save the initial counter value in mon_start_value + field */ + MONITOR_SAVE_START(monitor_id, value); + return; + + case MONITOR_TURN_OFF: + /* Save the counter value to mon_last_value when we + turn off the monitor but not yet reset. Note the + counter has not yet been set to off in the bitmap + table for normal turn off. We need to check the + count status (on/off) to avoid reset the value + for an already off conte */ + if (MONITOR_IS_ON(monitor_id)) { + srv_mon_process_existing_counter(monitor_id, + MONITOR_GET_VALUE); + MONITOR_SAVE_LAST(monitor_id); + } + return; + + case MONITOR_GET_VALUE: + if (MONITOR_IS_ON(monitor_id)) { + + /* If MONITOR_DISPLAY_CURRENT bit is on, we + only record the current value, rather than + incremental value over a period. Most of +` this type of counters are resource related + counters such as number of buffer pages etc. */ + if (monitor_info->monitor_type + & MONITOR_DISPLAY_CURRENT) { + MONITOR_SET(monitor_id, value); + } else { + /* Most status counters are montonically + increasing, no need to update their + minimum values. Only do so + if "update_min" set to TRUE */ + MONITOR_SET_DIFF(monitor_id, value); + + if (update_min + && (MONITOR_VALUE(monitor_id) + < MONITOR_MIN_VALUE(monitor_id))) { + MONITOR_MIN_VALUE(monitor_id) = + MONITOR_VALUE(monitor_id); + } + } + } + return; + + case MONITOR_RESET_VALUE: + if (!MONITOR_IS_ON(monitor_id)) { + MONITOR_LAST_VALUE(monitor_id) = 0; + } + return; + + /* Nothing special for reset all operation for these existing + counters */ + case MONITOR_RESET_ALL_VALUE: + return; + } +} + +/*************************************************************//** +Reset a monitor, create a new base line with the current monitor +value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */ +void +srv_mon_reset( +/*==========*/ + monitor_id_t monitor) /*!< in: monitor id */ +{ + ibool monitor_was_on; + + monitor_was_on = MONITOR_IS_ON(monitor); + + if (monitor_was_on) { + /* Temporarily turn off the counter for the resetting + operation */ + MONITOR_OFF(monitor); + } + + /* Before resetting the current monitor value, first + calculate and set the max/min value since monitor + start */ + srv_mon_calc_max_since_start(monitor); + srv_mon_calc_min_since_start(monitor); + + /* Monitors with MONITOR_DISPLAY_CURRENT bit + are not incremental, no need to remember + the reset value. */ + if (innodb_counter_info[monitor].monitor_type + & MONITOR_DISPLAY_CURRENT) { + MONITOR_VALUE_RESET(monitor) = 0; + } else { + /* Remember the new baseline */ + MONITOR_VALUE_RESET(monitor) = MONITOR_VALUE_RESET(monitor) + + MONITOR_VALUE(monitor); + } + + /* Reset the counter value */ + MONITOR_VALUE(monitor) = 0; + MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; + MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; + + MONITOR_FIELD((monitor), mon_reset_time) = time(NULL); + + if (monitor_was_on) { + MONITOR_ON(monitor); + } +} + +/*************************************************************//** +Turn on monitor counters that are marked as default ON. */ +void +srv_mon_default_on(void) +/*====================*/ +{ + ulint ix; + + for (ix = 0; ix < NUM_MONITOR; ix++) { + if (innodb_counter_info[ix].monitor_type + & MONITOR_DEFAULT_ON) { + /* Turn on monitor counters that are default on */ + MONITOR_ON(ix); + MONITOR_INIT(ix); + MONITOR_SET_START(ix); + } + } +} diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc new file mode 100644 index 00000000..ad221dc2 --- /dev/null +++ b/storage/innobase/srv/srv0srv.cc @@ -0,0 +1,2135 @@ +/***************************************************************************** + +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2008, 2009 Google Inc. +Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, 2021, MariaDB Corporation. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file srv/srv0srv.cc +The database server main program + +Created 10/8/1995 Heikki Tuuri +*******************************************************/ + +#include "my_global.h" +// JAN: TODO: MySQL 5.7 missing header +//#include "my_thread.h" +// +#include "mysql/psi/mysql_stage.h" +#include "mysql/psi/psi.h" + +#include "btr0sea.h" +#include "buf0flu.h" +#include "buf0lru.h" +#include "dict0boot.h" +#include "dict0load.h" +#include "ibuf0ibuf.h" +#include "lock0lock.h" +#include "log0recv.h" +#include "mem0mem.h" +#include "pars0pars.h" +#include "que0que.h" +#include "row0mysql.h" +#include "row0log.h" +#include "srv0mon.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "sync0sync.h" +#include "trx0i_s.h" +#include "trx0purge.h" +#include "ut0crc32.h" +#include "btr0defragment.h" +#include "ut0mem.h" +#include "fil0fil.h" +#include "fil0crypt.h" +#include "fil0pagecompress.h" +#include "trx0types.h" +#include <list> + +#include <my_service_manager.h> +/* The following is the maximum allowed duration of a lock wait. */ +UNIV_INTERN ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT; + +/* How much data manipulation language (DML) statements need to be delayed, +in microseconds, in order to reduce the lagging of the purge thread. */ +ulint srv_dml_needed_delay; + +const char* srv_main_thread_op_info = ""; + +/** Prefix used by MySQL to indicate pre-5.1 table name encoding */ +const char srv_mysql50_table_name_prefix[10] = "#mysql50#"; + +/* Server parameters which are read from the initfile */ + +/* The following three are dir paths which are catenated before file +names, where the file name itself may also contain a path */ + +char* srv_data_home; + +/** Rollback files directory, can be absolute. */ +char* srv_undo_dir; + +/** The number of tablespaces to use for rollback segments. */ +ulong srv_undo_tablespaces; + +/** The number of UNDO tablespaces that are open and ready to use. */ +ulint srv_undo_tablespaces_open; + +/** The number of UNDO tablespaces that are active (hosting some rollback +segment). It is quite possible that some of the tablespaces doesn't host +any of the rollback-segment based on configuration used. */ +ulint srv_undo_tablespaces_active; + +/** Rate at which UNDO records should be purged. */ +ulong srv_purge_rseg_truncate_frequency; + +/** Enable or Disable Truncate of UNDO tablespace. +Note: If enabled then UNDO tablespace will be selected for truncate. +While Server waits for undo-tablespace to truncate if user disables +it, truncate action is completed but no new tablespace is marked +for truncate (action is never aborted). */ +my_bool srv_undo_log_truncate; + +/** Maximum size of undo tablespace. */ +unsigned long long srv_max_undo_log_size; + +/** Set if InnoDB must operate in read-only mode. We don't do any +recovery and open all tables in RO mode instead of RW mode. We don't +sync the max trx id to disk either. */ +my_bool srv_read_only_mode; +/** store to its own file each table created by an user; data +dictionary tables are in the system tablespace 0 */ +my_bool srv_file_per_table; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +my_bool high_level_read_only; + +/** Sort buffer size in index creation */ +ulong srv_sort_buf_size; +/** Maximum modification log file size for online index creation */ +unsigned long long srv_online_max_size; + +/* If this flag is TRUE, then we will use the native aio of the +OS (provided we compiled Innobase with it in), otherwise we will +use simulated aio we build below with threads. +Currently we support native aio on windows and linux */ +my_bool srv_use_native_aio; +my_bool srv_numa_interleave; +/** copy of innodb_use_atomic_writes; @see innodb_init_params() */ +my_bool srv_use_atomic_writes; +/** innodb_compression_algorithm; used with page compression */ +ulong innodb_compression_algorithm; + +#ifdef UNIV_DEBUG +/** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */ +my_bool srv_master_thread_disabled_debug; +/** Event used to inform that master thread is disabled. */ +static os_event_t srv_master_thread_disabled_event; +#endif /* UNIV_DEBUG */ + +/*------------------------- LOG FILES ------------------------ */ +char* srv_log_group_home_dir; + +/** The InnoDB redo log file size, or 0 when changing the redo log format +at startup (while disallowing writes to the redo log). */ +ulonglong srv_log_file_size; +/** innodb_log_buffer_size, in bytes */ +ulong srv_log_buffer_size; +/** innodb_flush_log_at_trx_commit */ +ulong srv_flush_log_at_trx_commit; +/** innodb_flush_log_at_timeout */ +uint srv_flush_log_at_timeout; +/** innodb_page_size */ +ulong srv_page_size; +/** log2 of innodb_page_size; @see innodb_init_params() */ +ulong srv_page_size_shift; +/** innodb_log_write_ahead_size */ +ulong srv_log_write_ahead_size; + +/** innodb_adaptive_flushing; try to flush dirty pages so as to avoid +IO bursts at the checkpoints. */ +my_bool srv_adaptive_flushing; + +/** innodb_flush_sync; whether to ignore io_capacity at log checkpoints */ +my_bool srv_flush_sync; + +/** common thread pool*/ +tpool::thread_pool* srv_thread_pool; + +/** Maximum number of times allowed to conditionally acquire +mutex before switching to blocking wait on the mutex */ +#define MAX_MUTEX_NOWAIT 2 + +/** Check whether the number of failed nonblocking mutex +acquisition attempts exceeds maximum allowed value. If so, +srv_printf_innodb_monitor() will request mutex acquisition +with mutex_enter(), which will wait until it gets the mutex. */ +#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) + +#ifdef WITH_INNODB_DISALLOW_WRITES +UNIV_INTERN os_event_t srv_allow_writes_event; +#endif /* WITH_INNODB_DISALLOW_WRITES */ + +/** copy of innodb_buffer_pool_size */ +ulint srv_buf_pool_size; +const ulint srv_buf_pool_min_size = 5 * 1024 * 1024; +/** Default pool size in bytes */ +const ulint srv_buf_pool_def_size = 128 * 1024 * 1024; +/** Requested buffer pool chunk size */ +ulong srv_buf_pool_chunk_unit; +/** innodb_lru_scan_depth; number of blocks scanned in LRU flush batch */ +ulong srv_LRU_scan_depth; +/** innodb_flush_neighbors; whether or not to flush neighbors of a block */ +ulong srv_flush_neighbors; +/** Previously requested size */ +ulint srv_buf_pool_old_size; +/** Current size as scaling factor for the other components */ +ulint srv_buf_pool_base_size; +/** Current size in bytes */ +ulint srv_buf_pool_curr_size; +/** Dump this % of each buffer pool during BP dump */ +ulong srv_buf_pool_dump_pct; +/** Abort load after this amount of pages */ +#ifdef UNIV_DEBUG +ulong srv_buf_pool_load_pages_abort = LONG_MAX; +#endif +/** Lock table size in bytes */ +ulint srv_lock_table_size = ULINT_MAX; + +/** innodb_read_io_threads */ +uint srv_n_read_io_threads; +/** innodb_write_io_threads */ +uint srv_n_write_io_threads; + +/** innodb_random_read_ahead */ +my_bool srv_random_read_ahead; +/** innodb_read_ahead_threshold; the number of pages that must be present +in the buffer cache and accessed sequentially for InnoDB to trigger a +readahead request. */ +ulong srv_read_ahead_threshold; + +/** innodb_change_buffer_max_size; maximum on-disk size of change +buffer in terms of percentage of the buffer pool. */ +uint srv_change_buffer_max_size; + +ulong srv_file_flush_method; + + +/** copy of innodb_open_files; @see innodb_init_params() */ +ulint srv_max_n_open_files; + +/** innodb_io_capacity */ +ulong srv_io_capacity; +/** innodb_io_capacity_max */ +ulong srv_max_io_capacity; + +/* The InnoDB main thread tries to keep the ratio of modified pages +in the buffer pool to all database pages in the buffer pool smaller than +the following number. But it is not guaranteed that the value stays below +that during a time of heavy update/insert activity. */ + +/** innodb_max_dirty_pages_pct */ +double srv_max_buf_pool_modified_pct; +/** innodb_max_dirty_pages_pct_lwm */ +double srv_max_dirty_pages_pct_lwm; + +/** innodb_adaptive_flushing_lwm; the percentage of log capacity at +which adaptive flushing, if enabled, will kick in. */ +double srv_adaptive_flushing_lwm; + +/** innodb_flushing_avg_loops; number of iterations over which +adaptive flushing is averaged */ +ulong srv_flushing_avg_loops; + +/** innodb_purge_threads; the number of purge tasks to use */ +uint srv_n_purge_threads; + +/** innodb_purge_batch_size, in pages */ +ulong srv_purge_batch_size; + +/** innodb_stats_method decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method; + +srv_stats_t srv_stats; + +/* structure to pass status variables to MySQL */ +export_var_t export_vars; + +/** Normally 0. When nonzero, skip some phases of crash recovery, +starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered +by SELECT or mysqldump. When this is nonzero, we do not allow any user +modifications to the data. */ +ulong srv_force_recovery; + +/** innodb_print_all_deadlocks; whether to print all user-level +transactions deadlocks to the error log */ +my_bool srv_print_all_deadlocks; + +/** innodb_cmp_per_index_enabled; enable +INFORMATION_SCHEMA.innodb_cmp_per_index */ +my_bool srv_cmp_per_index_enabled; + +/** innodb_fast_shutdown=1 skips purge and change buffer merge. +innodb_fast_shutdown=2 effectively crashes the server (no log checkpoint). +innodb_fast_shutdown=3 is a clean shutdown that skips the rollback +of active transaction (to be done on restart). */ +uint srv_fast_shutdown; + +/** copy of innodb_status_file; generate a innodb_status.<pid> file */ +ibool srv_innodb_status; + +/** innodb_prefix_index_cluster_optimization; whether to optimize +prefix index queries to skip cluster index lookup when possible */ +my_bool srv_prefix_index_cluster_optimization; + +/** innodb_stats_transient_sample_pages; +When estimating number of different key values in an index, sample +this many index pages, there are 2 ways to calculate statistics: +* persistent stats that are calculated by ANALYZE TABLE and saved + in the innodb database. +* quick transient stats, that are used if persistent stats for the given + table/index are not found in the innodb database */ +unsigned long long srv_stats_transient_sample_pages; +/** innodb_stats_persistent */ +my_bool srv_stats_persistent; +/** innodb_stats_include_delete_marked */ +my_bool srv_stats_include_delete_marked; +/** innodb_stats_persistent_sample_pages */ +unsigned long long srv_stats_persistent_sample_pages; +/** innodb_stats_auto_recalc */ +my_bool srv_stats_auto_recalc; + +/** innodb_stats_modified_counter; The number of rows modified before +we calculate new statistics (default 0 = current limits) */ +unsigned long long srv_stats_modified_counter; + +/** innodb_stats_traditional; enable traditional statistic calculation +based on number of configured pages */ +my_bool srv_stats_sample_traditional; + +my_bool srv_use_doublewrite_buf; + +/** innodb_sync_spin_loops */ +ulong srv_n_spin_wait_rounds; +/** innodb_spin_wait_delay */ +uint srv_spin_wait_delay; + +static ulint srv_n_rows_inserted_old; +static ulint srv_n_rows_updated_old; +static ulint srv_n_rows_deleted_old; +static ulint srv_n_rows_read_old; +static ulint srv_n_system_rows_inserted_old; +static ulint srv_n_system_rows_updated_old; +static ulint srv_n_system_rows_deleted_old; +static ulint srv_n_system_rows_read_old; + +ulint srv_truncated_status_writes; +/** Number of initialized rollback segments for persistent undo log */ +ulong srv_available_undo_logs; + +/* Defragmentation */ +UNIV_INTERN my_bool srv_defragment; +/** innodb_defragment_n_pages */ +UNIV_INTERN uint srv_defragment_n_pages; +UNIV_INTERN uint srv_defragment_stats_accuracy; +/** innodb_defragment_fill_factor_n_recs */ +UNIV_INTERN uint srv_defragment_fill_factor_n_recs; +/** innodb_defragment_fill_factor */ +UNIV_INTERN double srv_defragment_fill_factor; +/** innodb_defragment_frequency */ +UNIV_INTERN uint srv_defragment_frequency; +/** derived from innodb_defragment_frequency; +@see innodb_defragment_frequency_update() */ +UNIV_INTERN ulonglong srv_defragment_interval; + +/** Current mode of operation */ +UNIV_INTERN enum srv_operation_mode srv_operation; + +/* Set the following to 0 if you want InnoDB to write messages on +stderr on startup/shutdown. Not enabled on the embedded server. */ +ibool srv_print_verbose_log; +my_bool srv_print_innodb_monitor; +my_bool srv_print_innodb_lock_monitor; +/** innodb_force_primary_key; whether to disallow CREATE TABLE without +PRIMARY KEY */ +my_bool srv_force_primary_key; + +/** Key version to encrypt the temporary tablespace */ +my_bool innodb_encrypt_temporary_tables; + +my_bool srv_immediate_scrub_data_uncompressed; + +static time_t srv_last_monitor_time; + +static ib_mutex_t srv_innodb_monitor_mutex; + +/** Mutex protecting page_zip_stat_per_index */ +ib_mutex_t page_zip_stat_per_index_mutex; + +/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */ +ib_mutex_t srv_monitor_file_mutex; + +/** Temporary file for innodb monitor output */ +FILE* srv_monitor_file; +/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode. +This mutex has a very low rank; threads reserving it should not +acquire any further latches or sleep before releasing this one. */ +ib_mutex_t srv_misc_tmpfile_mutex; +/** Temporary file for miscellanous diagnostic output */ +FILE* srv_misc_tmpfile; + +static ulint srv_main_thread_process_no; +static ulint srv_main_thread_id; + +/* The following counts are used by the srv_master_callback. */ + +/** Iterations of the loop bounded by 'srv_active' label. */ +ulint srv_main_active_loops; +/** Iterations of the loop bounded by the 'srv_idle' label. */ +ulint srv_main_idle_loops; +/** Iterations of the loop bounded by the 'srv_shutdown' label. */ +static ulint srv_main_shutdown_loops; +/** Log writes involving flush. */ +ulint srv_log_writes_and_flush; + +/* This is only ever touched by the master thread. It records the +time when the last flush of log file has happened. The master +thread ensures that we flush the log files at least once per +second. */ +static time_t srv_last_log_flush_time; + +/* Interval in seconds at which various tasks are performed by the +master thread when server is active. In order to balance the workload, +we should try to keep intervals such that they are not multiple of +each other. For example, if we have intervals for various tasks +defined as 5, 10, 15, 60 then all tasks will be performed when +current_time % 60 == 0 and no tasks will be performed when +current_time % 5 != 0. */ + +# define SRV_MASTER_CHECKPOINT_INTERVAL (7) +# define SRV_MASTER_DICT_LRU_INTERVAL (47) + +/** Buffer pool dump status frequence in percentages */ +UNIV_INTERN ulong srv_buf_dump_status_frequency; + +/* + IMPLEMENTATION OF THE SERVER MAIN PROGRAM + ========================================= + +There is the following analogue between this database +server and an operating system kernel: + +DB concept equivalent OS concept +---------- --------------------- +transaction -- process; + +query thread -- thread; + +lock -- semaphore; + +kernel -- kernel; + +query thread execution: +(a) without lock mutex +reserved -- process executing in user mode; +(b) with lock mutex reserved + -- process executing in kernel mode; + +The server has several backgroind threads all running at the same +priority as user threads. It periodically checks if here is anything +happening in the server which requires intervention of the master +thread. Such situations may be, for example, when flushing of dirty +blocks is needed in the buffer pool or old version of database rows +have to be cleaned away (purged). The user can configure a separate +dedicated purge thread(s) too, in which case the master thread does not +do any purging. + +The threads which we call user threads serve the queries of the MySQL +server. They run at normal priority. + +When there is no activity in the system, also the master thread +suspends itself to wait for an event making the server totally silent. + +There is still one complication in our server design. If a +background utility thread obtains a resource (e.g., mutex) needed by a user +thread, and there is also some other user activity in the system, +the user thread may have to wait indefinitely long for the +resource, as the OS does not schedule a background thread if +there is some other runnable user thread. This problem is called +priority inversion in real-time programming. + +One solution to the priority inversion problem would be to keep record +of which thread owns which resource and in the above case boost the +priority of the background thread so that it will be scheduled and it +can release the resource. This solution is called priority inheritance +in real-time programming. A drawback of this solution is that the overhead +of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100 +MHz Pentium, because the thread has to call os_thread_get_curr_id. This may +be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note +that the thread cannot store the information in the resource , say mutex, +itself, because competing threads could wipe out the information if it is +stored before acquiring the mutex, and if it stored afterwards, the +information is outdated for the time of one machine instruction, at least. +(To be precise, the information could be stored to lock_word in mutex if +the machine supports atomic swap.) + +The above solution with priority inheritance may become actual in the +future, currently we do not implement any priority twiddling solution. +Our general aim is to reduce the contention of all mutexes by making +them more fine grained. + +The thread table contains information of the current status of each +thread existing in the system, and also the event semaphores used in +suspending the master thread and utility threads when they have nothing +to do. The thread table can be seen as an analogue to the process table +in a traditional Unix implementation. */ + +/** The server system struct */ +struct srv_sys_t{ + ib_mutex_t tasks_mutex; /*!< variable protecting the + tasks queue */ + UT_LIST_BASE_NODE_T(que_thr_t) + tasks; /*!< task queue */ + + srv_stats_t::ulint_ctr_1_t + activity_count; /*!< For tracking server + activity */ +}; + +static srv_sys_t srv_sys; + +/* + Structure shared by timer and coordinator_callback. + No protection necessary since timer and task never run + in parallel (being in the same task group of size 1). +*/ +struct purge_coordinator_state +{ + /** Snapshot of the last history length before the purge call.*/ + uint32 m_history_length; + Atomic_counter<int> m_running; + purge_coordinator_state() : m_history_length(), m_running(0) {} +}; + +static purge_coordinator_state purge_state; + +/** threadpool timer for srv_monitor_task() */ +std::unique_ptr<tpool::timer> srv_monitor_timer; + + +/** The buffer pool dump/load file name */ +char* srv_buf_dump_filename; + +/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown +and/or load it during startup. */ +char srv_buffer_pool_dump_at_shutdown = TRUE; +char srv_buffer_pool_load_at_startup = TRUE; + +#ifdef HAVE_PSI_STAGE_INTERFACE +/** Performance schema stage event for monitoring ALTER TABLE progress +everything after flush log_make_checkpoint(). */ +PSI_stage_info srv_stage_alter_table_end + = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_merge_insert_index_tuples(). */ +PSI_stage_info srv_stage_alter_table_insert + = {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_log_apply(). */ +PSI_stage_info srv_stage_alter_table_log_index + = {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_log_table_apply(). */ +PSI_stage_info srv_stage_alter_table_log_table + = {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_merge_sort(). */ +PSI_stage_info srv_stage_alter_table_merge_sort + = {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring ALTER TABLE progress +row_merge_read_clustered_index(). */ +PSI_stage_info srv_stage_alter_table_read_pk_internal_sort + = {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS}; + +/** Performance schema stage event for monitoring buffer pool load progress. */ +PSI_stage_info srv_stage_buffer_pool_load + = {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS}; +#endif /* HAVE_PSI_STAGE_INTERFACE */ + +/*********************************************************************//** +Prints counters for work done by srv_master_thread. */ +static +void +srv_print_master_thread_info( +/*=========================*/ + FILE *file) /* in: output stream */ +{ + fprintf(file, "srv_master_thread loops: " ULINTPF " srv_active, " + ULINTPF " srv_shutdown, " ULINTPF " srv_idle\n" + "srv_master_thread log flush and writes: " ULINTPF "\n", + srv_main_active_loops, + srv_main_shutdown_loops, + srv_main_idle_loops, + srv_log_writes_and_flush); +} + +static void thread_pool_thread_init() +{ + my_thread_init(); + pfs_register_thread(thread_pool_thread_key); +} +static void thread_pool_thread_end() +{ + pfs_delete_thread(); + my_thread_end(); +} + + +#ifndef DBUG_OFF +static void dbug_after_task_callback() +{ + ut_ad(!sync_check_iterate(sync_check())); +} +#endif + +void srv_thread_pool_init() +{ + DBUG_ASSERT(!srv_thread_pool); + +#if defined (_WIN32) + srv_thread_pool= tpool::create_thread_pool_win(); +#else + srv_thread_pool= tpool::create_thread_pool_generic(); +#endif + srv_thread_pool->set_thread_callbacks(thread_pool_thread_init, + thread_pool_thread_end); +#ifndef DBUG_OFF + tpool::set_after_task_callback(dbug_after_task_callback); +#endif +} + + +void srv_thread_pool_end() +{ + ut_ad(!srv_master_timer); + delete srv_thread_pool; + srv_thread_pool= nullptr; +} + +static bool need_srv_free; + +/** Initialize the server. */ +static void srv_init() +{ + mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex); + + if (!srv_read_only_mode) { + mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys.tasks_mutex); + + UT_LIST_INIT(srv_sys.tasks, &que_thr_t::queue); + } + + need_srv_free = true; + ut_d(srv_master_thread_disabled_event = os_event_create(0)); + + /* page_zip_stat_per_index_mutex is acquired from: + 1. page_zip_compress() (after SYNC_FSP) + 2. page_zip_decompress() + 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired) + 4. innodb_cmp_per_index_update(), no other latches + since we do not acquire any other latches while holding this mutex, + it can have very low level. We pick SYNC_ANY_LATCH for it. */ + mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX, + &page_zip_stat_per_index_mutex); + +#ifdef WITH_INNODB_DISALLOW_WRITES + /* Writes have to be enabled on init or else we hang. Thus, we + always set the event here regardless of innobase_disallow_writes. + That flag will always be 0 at this point because it isn't settable + via my.cnf or command line arg. */ + srv_allow_writes_event = os_event_create(0); + os_event_set(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ + + /* Initialize some INFORMATION SCHEMA internal structures */ + trx_i_s_cache_init(trx_i_s_cache); + +} + +/*********************************************************************//** +Frees the data structures created in srv_init(). */ +void +srv_free(void) +/*==========*/ +{ + if (!need_srv_free) { + return; + } + + mutex_free(&srv_innodb_monitor_mutex); + mutex_free(&page_zip_stat_per_index_mutex); + + if (!srv_read_only_mode) { + mutex_free(&srv_sys.tasks_mutex); + } + + ut_d(os_event_destroy(srv_master_thread_disabled_event)); + + trx_i_s_cache_free(trx_i_s_cache); + srv_thread_pool_end(); +} + +/*********************************************************************//** +Boots the InnoDB server. */ +void +srv_boot(void) +/*==========*/ +{ + srv_thread_pool_init(); + sync_check_init(); + trx_pool_init(); + row_mysql_init(); + srv_init(); +} + +/******************************************************************//** +Refreshes the values used to calculate per-second averages. */ +static void srv_refresh_innodb_monitor_stats(time_t current_time) +{ + mutex_enter(&srv_innodb_monitor_mutex); + + if (difftime(current_time, srv_last_monitor_time) < 60) { + /* We referesh InnoDB Monitor values so that averages are + printed from at most 60 last seconds */ + mutex_exit(&srv_innodb_monitor_mutex); + return; + } + + srv_last_monitor_time = current_time; + + os_aio_refresh_stats(); + +#ifdef BTR_CUR_HASH_ADAPT + btr_cur_n_sea_old = btr_cur_n_sea; +#endif /* BTR_CUR_HASH_ADAPT */ + btr_cur_n_non_sea_old = btr_cur_n_non_sea; + + log_refresh_stats(); + + buf_refresh_io_stats(); + + srv_n_rows_inserted_old = srv_stats.n_rows_inserted; + srv_n_rows_updated_old = srv_stats.n_rows_updated; + srv_n_rows_deleted_old = srv_stats.n_rows_deleted; + srv_n_rows_read_old = srv_stats.n_rows_read; + + srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted; + srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated; + srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted; + srv_n_system_rows_read_old = srv_stats.n_system_rows_read; + + mutex_exit(&srv_innodb_monitor_mutex); +} + +/******************************************************************//** +Outputs to a file the output of the InnoDB Monitor. +@return FALSE if not all information printed +due to failure to obtain necessary mutex */ +ibool +srv_printf_innodb_monitor( +/*======================*/ + FILE* file, /*!< in: output stream */ + ibool nowait, /*!< in: whether to wait for the + lock_sys_t:: mutex */ + ulint* trx_start_pos, /*!< out: file position of the start of + the list of active transactions */ + ulint* trx_end) /*!< out: file position of the end of + the list of active transactions */ +{ + double time_elapsed; + time_t current_time; + ibool ret; + + mutex_enter(&srv_innodb_monitor_mutex); + + current_time = time(NULL); + + /* We add 0.001 seconds to time_elapsed to prevent division + by zero if two users happen to call SHOW ENGINE INNODB STATUS at the + same time */ + + time_elapsed = difftime(current_time, srv_last_monitor_time) + + 0.001; + + srv_last_monitor_time = time(NULL); + + fputs("\n=====================================\n", file); + + ut_print_timestamp(file); + fprintf(file, + " INNODB MONITOR OUTPUT\n" + "=====================================\n" + "Per second averages calculated from the last %lu seconds\n", + (ulong) time_elapsed); + + fputs("-----------------\n" + "BACKGROUND THREAD\n" + "-----------------\n", file); + srv_print_master_thread_info(file); + + fputs("----------\n" + "SEMAPHORES\n" + "----------\n", file); + + sync_print(file); + + /* Conceptually, srv_innodb_monitor_mutex has a very high latching + order level in sync0sync.h, while dict_foreign_err_mutex has a very + low level 135. Therefore we can reserve the latter mutex here without + a danger of a deadlock of threads. */ + + mutex_enter(&dict_foreign_err_mutex); + + if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) { + fputs("------------------------\n" + "LATEST FOREIGN KEY ERROR\n" + "------------------------\n", file); + ut_copy_file(file, dict_foreign_err_file); + } + + mutex_exit(&dict_foreign_err_mutex); + + /* Only if lock_print_info_summary proceeds correctly, + before we call the lock_print_info_all_transactions + to print all the lock information. IMPORTANT NOTE: This + function acquires the lock mutex on success. */ + ret = lock_print_info_summary(file, nowait); + + if (ret) { + if (trx_start_pos) { + long t = ftell(file); + if (t < 0) { + *trx_start_pos = ULINT_UNDEFINED; + } else { + *trx_start_pos = (ulint) t; + } + } + + /* NOTE: If we get here then we have the lock mutex. This + function will release the lock mutex that we acquired when + we called the lock_print_info_summary() function earlier. */ + + lock_print_info_all_transactions(file); + + if (trx_end) { + long t = ftell(file); + if (t < 0) { + *trx_end = ULINT_UNDEFINED; + } else { + *trx_end = (ulint) t; + } + } + } + + fputs("--------\n" + "FILE I/O\n" + "--------\n", file); + os_aio_print(file); + + fputs("-------------------------------------\n" + "INSERT BUFFER AND ADAPTIVE HASH INDEX\n" + "-------------------------------------\n", file); + ibuf_print(file); + +#ifdef BTR_CUR_HASH_ADAPT + for (ulint i = 0; i < btr_ahi_parts && btr_search_enabled; ++i) { + const auto part= &btr_search_sys.parts[i]; + rw_lock_s_lock(&part->latch); + ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH); + fprintf(file, "Hash table size " ULINTPF + ", node heap has " ULINTPF " buffer(s)\n", + part->table.n_cells, + part->heap->base.count - !part->heap->free_block); + rw_lock_s_unlock(&part->latch); + } + + fprintf(file, + "%.2f hash searches/s, %.2f non-hash searches/s\n", + static_cast<double>(btr_cur_n_sea - btr_cur_n_sea_old) + / time_elapsed, + static_cast<double>(btr_cur_n_non_sea - btr_cur_n_non_sea_old) + / time_elapsed); + btr_cur_n_sea_old = btr_cur_n_sea; +#else /* BTR_CUR_HASH_ADAPT */ + fprintf(file, + "%.2f non-hash searches/s\n", + static_cast<double>(btr_cur_n_non_sea - btr_cur_n_non_sea_old) + / time_elapsed); +#endif /* BTR_CUR_HASH_ADAPT */ + btr_cur_n_non_sea_old = btr_cur_n_non_sea; + + fputs("---\n" + "LOG\n" + "---\n", file); + log_print(file); + + fputs("----------------------\n" + "BUFFER POOL AND MEMORY\n" + "----------------------\n", file); + fprintf(file, + "Total large memory allocated " ULINTPF "\n" + "Dictionary memory allocated " ULINTPF "\n", + ulint{os_total_large_mem_allocated}, + dict_sys.rough_size()); + + buf_print_io(file); + + fputs("--------------\n" + "ROW OPERATIONS\n" + "--------------\n", file); + fprintf(file, ULINTPF " read views open inside InnoDB\n", + trx_sys.view_count()); + + if (ulint n_reserved = fil_system.sys_space->n_reserved_extents) { + fprintf(file, + ULINTPF " tablespace extents now reserved for" + " B-tree split operations\n", + n_reserved); + } + + fprintf(file, + "Process ID=" ULINTPF + ", Main thread ID=" ULINTPF + ", state: %s\n", + srv_main_thread_process_no, + srv_main_thread_id, + srv_main_thread_op_info); + fprintf(file, + "Number of rows inserted " ULINTPF + ", updated " ULINTPF + ", deleted " ULINTPF + ", read " ULINTPF "\n", + (ulint) srv_stats.n_rows_inserted, + (ulint) srv_stats.n_rows_updated, + (ulint) srv_stats.n_rows_deleted, + (ulint) srv_stats.n_rows_read); + fprintf(file, + "%.2f inserts/s, %.2f updates/s," + " %.2f deletes/s, %.2f reads/s\n", + static_cast<double>(srv_stats.n_rows_inserted + - srv_n_rows_inserted_old) + / time_elapsed, + static_cast<double>(srv_stats.n_rows_updated + - srv_n_rows_updated_old) + / time_elapsed, + static_cast<double>(srv_stats.n_rows_deleted + - srv_n_rows_deleted_old) + / time_elapsed, + static_cast<double>(srv_stats.n_rows_read + - srv_n_rows_read_old) + / time_elapsed); + fprintf(file, + "Number of system rows inserted " ULINTPF + ", updated " ULINTPF ", deleted " ULINTPF + ", read " ULINTPF "\n", + (ulint) srv_stats.n_system_rows_inserted, + (ulint) srv_stats.n_system_rows_updated, + (ulint) srv_stats.n_system_rows_deleted, + (ulint) srv_stats.n_system_rows_read); + fprintf(file, + "%.2f inserts/s, %.2f updates/s," + " %.2f deletes/s, %.2f reads/s\n", + static_cast<double>(srv_stats.n_system_rows_inserted + - srv_n_system_rows_inserted_old) + / time_elapsed, + static_cast<double>(srv_stats.n_system_rows_updated + - srv_n_system_rows_updated_old) + / time_elapsed, + static_cast<double>(srv_stats.n_system_rows_deleted + - srv_n_system_rows_deleted_old) + / time_elapsed, + static_cast<double>(srv_stats.n_system_rows_read + - srv_n_system_rows_read_old) + / time_elapsed); + srv_n_rows_inserted_old = srv_stats.n_rows_inserted; + srv_n_rows_updated_old = srv_stats.n_rows_updated; + srv_n_rows_deleted_old = srv_stats.n_rows_deleted; + srv_n_rows_read_old = srv_stats.n_rows_read; + srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted; + srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated; + srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted; + srv_n_system_rows_read_old = srv_stats.n_system_rows_read; + + fputs("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" + "============================\n", file); + mutex_exit(&srv_innodb_monitor_mutex); + fflush(file); + + return(ret); +} + +/******************************************************************//** +Function to pass InnoDB status variables to MySQL */ +void +srv_export_innodb_status(void) +/*==========================*/ +{ + fil_crypt_stat_t crypt_stat; + + if (!srv_read_only_mode) { + fil_crypt_total_stat(&crypt_stat); + } + +#ifdef BTR_CUR_HASH_ADAPT + ulint mem_adaptive_hash = 0; + for (ulong i = 0; i < btr_ahi_parts; i++) { + const auto part= &btr_search_sys.parts[i]; + rw_lock_s_lock(&part->latch); + if (part->heap) { + ut_ad(part->heap->type == MEM_HEAP_FOR_BTR_SEARCH); + + mem_adaptive_hash += mem_heap_get_size(part->heap) + + part->table.n_cells * sizeof(hash_cell_t); + } + rw_lock_s_unlock(&part->latch); + } + export_vars.innodb_mem_adaptive_hash = mem_adaptive_hash; +#endif + + export_vars.innodb_mem_dictionary = dict_sys.rough_size(); + + mutex_enter(&srv_innodb_monitor_mutex); + + export_vars.innodb_data_pending_reads = + ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS)); + + export_vars.innodb_data_pending_writes = + ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES)); + + export_vars.innodb_data_pending_fsyncs = + log_sys.get_pending_flushes() + + fil_n_pending_tablespace_flushes; + + export_vars.innodb_data_fsyncs = os_n_fsyncs; + + export_vars.innodb_data_read = srv_stats.data_read; + + export_vars.innodb_data_reads = os_n_file_reads; + + export_vars.innodb_data_writes = os_n_file_writes; + + ulint dblwr = 0; + + if (buf_dblwr.is_initialised()) { + buf_dblwr.lock(); + dblwr = buf_dblwr.submitted(); + export_vars.innodb_dblwr_pages_written = buf_dblwr.written(); + export_vars.innodb_dblwr_writes = buf_dblwr.batches(); + buf_dblwr.unlock(); + } + + export_vars.innodb_data_written = srv_stats.data_written + dblwr; + + export_vars.innodb_buffer_pool_read_requests + = buf_pool.stat.n_page_gets; + + export_vars.innodb_buffer_pool_write_requests = + srv_stats.buf_pool_write_requests; + + export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads; + + export_vars.innodb_buffer_pool_read_ahead_rnd = + buf_pool.stat.n_ra_pages_read_rnd; + + export_vars.innodb_buffer_pool_read_ahead = + buf_pool.stat.n_ra_pages_read; + + export_vars.innodb_buffer_pool_read_ahead_evicted = + buf_pool.stat.n_ra_pages_evicted; + + export_vars.innodb_buffer_pool_pages_data = + UT_LIST_GET_LEN(buf_pool.LRU); + + export_vars.innodb_buffer_pool_bytes_data = + buf_pool.stat.LRU_bytes + + (UT_LIST_GET_LEN(buf_pool.unzip_LRU) + << srv_page_size_shift); + + export_vars.innodb_buffer_pool_pages_dirty = + UT_LIST_GET_LEN(buf_pool.flush_list); + + export_vars.innodb_buffer_pool_pages_made_young + = buf_pool.stat.n_pages_made_young; + export_vars.innodb_buffer_pool_pages_made_not_young + = buf_pool.stat.n_pages_not_made_young; + + export_vars.innodb_buffer_pool_pages_old = buf_pool.LRU_old_len; + + export_vars.innodb_buffer_pool_bytes_dirty = + buf_pool.stat.flush_list_bytes; + + export_vars.innodb_buffer_pool_pages_free = + UT_LIST_GET_LEN(buf_pool.free); + +#ifdef UNIV_DEBUG + export_vars.innodb_buffer_pool_pages_latched = + buf_get_latched_pages_number(); +#endif /* UNIV_DEBUG */ + export_vars.innodb_buffer_pool_pages_total = buf_pool.get_n_pages(); + + export_vars.innodb_buffer_pool_pages_misc = + buf_pool.get_n_pages() + - UT_LIST_GET_LEN(buf_pool.LRU) + - UT_LIST_GET_LEN(buf_pool.free); + + export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id(); + export_vars.innodb_history_list_length = trx_sys.rseg_history_len; + + export_vars.innodb_log_waits = srv_stats.log_waits; + + export_vars.innodb_os_log_written = srv_stats.os_log_written; + + export_vars.innodb_os_log_fsyncs = log_sys.get_flushes(); + + export_vars.innodb_os_log_pending_fsyncs + = log_sys.get_pending_flushes(); + + export_vars.innodb_os_log_pending_writes = + srv_stats.os_log_pending_writes; + + export_vars.innodb_log_write_requests = srv_stats.log_write_requests; + + export_vars.innodb_log_writes = srv_stats.log_writes; + + export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count; + + export_vars.innodb_row_lock_current_waits = + srv_stats.n_lock_wait_current_count; + + export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000; + + if (srv_stats.n_lock_wait_count > 0) { + + export_vars.innodb_row_lock_time_avg = (ulint) + (srv_stats.n_lock_wait_time + / 1000 / srv_stats.n_lock_wait_count); + + } else { + export_vars.innodb_row_lock_time_avg = 0; + } + + export_vars.innodb_row_lock_time_max = + lock_sys.n_lock_max_wait_time / 1000; + + export_vars.innodb_rows_read = srv_stats.n_rows_read; + + export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted; + + export_vars.innodb_rows_updated = srv_stats.n_rows_updated; + + export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted; + + export_vars.innodb_system_rows_read = srv_stats.n_system_rows_read; + + export_vars.innodb_system_rows_inserted = + srv_stats.n_system_rows_inserted; + + export_vars.innodb_system_rows_updated = + srv_stats.n_system_rows_updated; + + export_vars.innodb_system_rows_deleted = + srv_stats.n_system_rows_deleted; + + export_vars.innodb_truncated_status_writes = + srv_truncated_status_writes; + + export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved; + export_vars.innodb_index_pages_written = srv_stats.index_pages_written; + export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written; + export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed; + export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; + export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; + export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error; + export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted; + export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted; + export_vars.innodb_n_merge_blocks_encrypted = srv_stats.n_merge_blocks_encrypted; + export_vars.innodb_n_merge_blocks_decrypted = srv_stats.n_merge_blocks_decrypted; + export_vars.innodb_n_rowlog_blocks_encrypted = srv_stats.n_rowlog_blocks_encrypted; + export_vars.innodb_n_rowlog_blocks_decrypted = srv_stats.n_rowlog_blocks_decrypted; + + export_vars.innodb_n_temp_blocks_encrypted = + srv_stats.n_temp_blocks_encrypted; + + export_vars.innodb_n_temp_blocks_decrypted = + srv_stats.n_temp_blocks_decrypted; + + export_vars.innodb_defragment_compression_failures = + btr_defragment_compression_failures; + export_vars.innodb_defragment_failures = btr_defragment_failures; + export_vars.innodb_defragment_count = btr_defragment_count; + + export_vars.innodb_onlineddl_rowlog_rows = onlineddl_rowlog_rows; + export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used; + export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress; + + export_vars.innodb_sec_rec_cluster_reads = + srv_stats.n_sec_rec_cluster_reads; + export_vars.innodb_sec_rec_cluster_reads_avoided = + srv_stats.n_sec_rec_cluster_reads_avoided; + + if (!srv_read_only_mode) { + export_vars.innodb_encryption_rotation_pages_read_from_cache = + crypt_stat.pages_read_from_cache; + export_vars.innodb_encryption_rotation_pages_read_from_disk = + crypt_stat.pages_read_from_disk; + export_vars.innodb_encryption_rotation_pages_modified = + crypt_stat.pages_modified; + export_vars.innodb_encryption_rotation_pages_flushed = + crypt_stat.pages_flushed; + export_vars.innodb_encryption_rotation_estimated_iops = + crypt_stat.estimated_iops; + export_vars.innodb_encryption_key_requests = + srv_stats.n_key_requests; + export_vars.innodb_key_rotation_list_length = + srv_stats.key_rotation_list_length; + } + + mutex_exit(&srv_innodb_monitor_mutex); + + mysql_mutex_lock(&log_sys.mutex); + export_vars.innodb_lsn_current = log_sys.get_lsn(); + export_vars.innodb_lsn_flushed = log_sys.get_flushed_lsn(); + export_vars.innodb_lsn_last_checkpoint = log_sys.last_checkpoint_lsn; + export_vars.innodb_checkpoint_max_age = static_cast<ulint>( + log_sys.max_checkpoint_age); + mysql_mutex_unlock(&log_sys.mutex); + + export_vars.innodb_checkpoint_age = static_cast<ulint>( + export_vars.innodb_lsn_current + - export_vars.innodb_lsn_last_checkpoint); +} + +struct srv_monitor_state_t +{ + time_t last_monitor_time; + ulint mutex_skipped; + bool last_srv_print_monitor; + srv_monitor_state_t() : mutex_skipped(0), last_srv_print_monitor(false) + { + srv_last_monitor_time = time(NULL); + last_monitor_time= srv_last_monitor_time; + } +}; + +static srv_monitor_state_t monitor_state; + +/** A task which prints the info output by various InnoDB monitors.*/ +static void srv_monitor() +{ + time_t current_time = time(NULL); + + if (difftime(current_time, monitor_state.last_monitor_time) >= 15) { + monitor_state.last_monitor_time = current_time; + + if (srv_print_innodb_monitor) { + /* Reset mutex_skipped counter everytime + srv_print_innodb_monitor changes. This is to + ensure we will not be blocked by lock_sys.mutex + for short duration information printing */ + if (!monitor_state.last_srv_print_monitor) { + monitor_state.mutex_skipped = 0; + monitor_state.last_srv_print_monitor = true; + } + + if (!srv_printf_innodb_monitor(stderr, + MUTEX_NOWAIT(monitor_state.mutex_skipped), + NULL, NULL)) { + monitor_state.mutex_skipped++; + } else { + /* Reset the counter */ + monitor_state.mutex_skipped = 0; + } + } else { + monitor_state.last_monitor_time = 0; + } + + + /* We don't create the temp files or associated + mutexes in read-only-mode */ + + if (!srv_read_only_mode && srv_innodb_status) { + mutex_enter(&srv_monitor_file_mutex); + rewind(srv_monitor_file); + if (!srv_printf_innodb_monitor(srv_monitor_file, + MUTEX_NOWAIT(monitor_state.mutex_skipped), + NULL, NULL)) { + monitor_state.mutex_skipped++; + } else { + monitor_state.mutex_skipped = 0; + } + + os_file_set_eof(srv_monitor_file); + mutex_exit(&srv_monitor_file_mutex); + } + } + + srv_refresh_innodb_monitor_stats(current_time); +} + +/*********************************************************************//** +A task which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. +*/ +void srv_monitor_task(void*) +{ + /* number of successive fatal timeouts observed */ + static ulint fatal_cnt; + static lsn_t old_lsn = recv_sys.recovered_lsn; + /* longest waiting thread for a semaphore */ + os_thread_id_t waiter; + static os_thread_id_t old_waiter = os_thread_get_curr_id(); + /* the semaphore that is being waited for */ + const void* sema = NULL; + static const void* old_sema = NULL; + + ut_ad(!srv_read_only_mode); + + /* Try to track a strange bug reported by Harald Fuchs and others, + where the lsn seems to decrease at times */ + + lsn_t new_lsn = log_sys.get_lsn(); + ut_a(new_lsn >= old_lsn); + old_lsn = new_lsn; + + /* Update the statistics collected for deciding LRU + eviction policy. */ + buf_LRU_stat_update(); + + if (sync_array_print_long_waits(&waiter, &sema) + && sema == old_sema && os_thread_eq(waiter, old_waiter)) { +#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) + if (!os_event_is_set(srv_allow_writes_event)) { + fprintf(stderr, + "WSREP: avoiding InnoDB self crash due to " + "long semaphore wait of > %lu seconds\n" + "Server is processing SST donor operation, " + "fatal_cnt now: " ULINTPF, + srv_fatal_semaphore_wait_threshold, fatal_cnt); + return; + } +#endif /* WITH_WSREP */ + if (fatal_cnt++) { + ib::fatal() << "Semaphore wait has lasted > " + << srv_fatal_semaphore_wait_threshold + << " seconds. We intentionally crash the" + " server because it appears to be hung."; + } + } else { + fatal_cnt = 0; + old_waiter = waiter; + old_sema = sema; + } + + srv_monitor(); +} + +/******************************************************************//** +Increment the server activity count. */ +void +srv_inc_activity_count(void) +/*========================*/ +{ + srv_sys.activity_count.inc(); +} + +#ifdef UNIV_DEBUG +/** @return whether purge or master task is active */ +bool srv_any_background_activity() +{ + if (purge_sys.enabled() || srv_master_timer.get()) + { + ut_ad(!srv_read_only_mode); + return true; + } + return false; +} +#endif /* UNIV_DEBUG */ + +static void purge_worker_callback(void*); +static void purge_coordinator_callback(void*); +static void purge_coordinator_timer_callback(void*); + +static tpool::task_group purge_task_group; +tpool::waitable_task purge_worker_task(purge_worker_callback, nullptr, + &purge_task_group); +static tpool::task_group purge_coordinator_task_group(1); +static tpool::waitable_task purge_coordinator_task + (purge_coordinator_callback, nullptr, &purge_coordinator_task_group); + +static tpool::timer *purge_coordinator_timer; + +/** Wake up the purge threads if there is work to do. */ +void +srv_wake_purge_thread_if_not_active() +{ + ut_ad(!srv_read_only_mode); + + if (purge_sys.enabled() && !purge_sys.paused() + && trx_sys.rseg_history_len) { + if(++purge_state.m_running == 1) { + srv_thread_pool->submit_task(&purge_coordinator_task); + } + } +} + +/** @return whether the purge tasks are active */ +bool purge_sys_t::running() const +{ + return purge_coordinator_task.is_running(); +} + +/** Stop purge during FLUSH TABLES FOR EXPORT */ +void purge_sys_t::stop() +{ + rw_lock_x_lock(&latch); + + if (!enabled()) + { + /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */ + ut_ad(!srv_undo_sources); + rw_lock_x_unlock(&latch); + return; + } + + ut_ad(srv_n_purge_threads > 0); + + const auto paused= m_paused++; + + rw_lock_x_unlock(&latch); + + if (!paused) + { + ib::info() << "Stopping purge"; + MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT); + purge_coordinator_task.disable(); + } +} + +/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */ +void purge_sys_t::resume() +{ + if (!enabled()) + { + /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */ + ut_ad(!srv_undo_sources); + return; + } + ut_ad(!srv_read_only_mode); + ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + ut_ad(!sync_check_iterate(sync_check())); + purge_coordinator_task.enable(); + rw_lock_x_lock(&latch); + int32_t paused= m_paused--; + ut_a(paused); + + if (paused == 1) + { + ib::info() << "Resuming purge"; + purge_state.m_running = 0; + srv_wake_purge_thread_if_not_active(); + MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT); + } + rw_lock_x_unlock(&latch); +} + +/*******************************************************************//** +Get current server activity count. +@return activity count. */ +ulint +srv_get_activity_count(void) +/*========================*/ +{ + return(srv_sys.activity_count); +} + +/** Check if srv_inc_activity_count() has been called. +@param activity_count copy of srv_sys.activity_count +@return whether the activity_count had changed */ +static bool srv_check_activity(ulint *activity_count) +{ + ulint new_activity_count= srv_sys.activity_count; + if (new_activity_count != *activity_count) + { + *activity_count= new_activity_count; + return true; + } + + return false; +} + +/********************************************************************//** +The master thread is tasked to ensure that flush of log file happens +once every second in the background. This is to ensure that not more +than one second of trxs are lost in case of crash when +innodb_flush_logs_at_trx_commit != 1 */ +static +void +srv_sync_log_buffer_in_background(void) +/*===================================*/ +{ + time_t current_time = time(NULL); + + srv_main_thread_op_info = "flushing log"; + if (difftime(current_time, srv_last_log_flush_time) + >= srv_flush_log_at_timeout) { + log_buffer_flush_to_disk(); + srv_last_log_flush_time = current_time; + srv_log_writes_and_flush++; + } +} + +/********************************************************************//** +Make room in the table cache by evicting an unused table. +@return number of tables evicted. */ +static +ulint +srv_master_evict_from_table_cache( +/*==============================*/ + ulint pct_check) /*!< in: max percent to check */ +{ + ulint n_tables_evicted = 0; + + dict_sys_lock(); + + n_tables_evicted = dict_make_room_in_cache( + innobase_get_table_cache_size(), pct_check); + + dict_sys_unlock(); + + return(n_tables_evicted); +} + +/*********************************************************************//** +This function prints progress message every 60 seconds during server +shutdown, for any activities that master thread is pending on. */ +static +void +srv_shutdown_print_master_pending( +/*==============================*/ + time_t* last_print_time, /*!< last time the function + print the message */ + ulint n_tables_to_drop, /*!< number of tables to + be dropped */ + ulint n_bytes_merged) /*!< number of change buffer + just merged */ +{ + time_t current_time = time(NULL); + + if (difftime(current_time, *last_print_time) > 60) { + *last_print_time = current_time; + + if (n_tables_to_drop) { + ib::info() << "Waiting for " << n_tables_to_drop + << " table(s) to be dropped"; + } + + /* Check change buffer merge, we only wait for change buffer + merge if it is a slow shutdown */ + if (!srv_fast_shutdown && n_bytes_merged) { + ib::info() << "Waiting for change buffer merge to" + " complete number of bytes of change buffer" + " just merged: " << n_bytes_merged; + } + } +} + +#ifdef UNIV_DEBUG +/** Waits in loop as long as master thread is disabled (debug) */ +static +void +srv_master_do_disabled_loop(void) +{ + if (!srv_master_thread_disabled_debug) { + /* We return here to avoid changing op_info. */ + return; + } + + srv_main_thread_op_info = "disabled"; + + while (srv_master_thread_disabled_debug) { + os_event_set(srv_master_thread_disabled_event); + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { + break; + } + os_thread_sleep(100000); + } + + srv_main_thread_op_info = ""; +} + +/** Disables master thread. It's used by: + SET GLOBAL innodb_master_thread_disabled_debug = 1 (0). +@param[in] save immediate result from check function */ +void +srv_master_thread_disabled_debug_update(THD*, st_mysql_sys_var*, void*, + const void* save) +{ + /* This method is protected by mutex, as every SET GLOBAL .. */ + ut_ad(srv_master_thread_disabled_event != NULL); + + const bool disable = *static_cast<const my_bool*>(save); + + const int64_t sig_count = os_event_reset( + srv_master_thread_disabled_event); + + srv_master_thread_disabled_debug = disable; + + if (disable) { + os_event_wait_low( + srv_master_thread_disabled_event, sig_count); + } +} +#endif /* UNIV_DEBUG */ + +/*********************************************************************//** +Perform the tasks that the master thread is supposed to do when the +server is active. There are two types of tasks. The first category is +of such tasks which are performed at each inovcation of this function. +We assume that this function is called roughly every second when the +server is active. The second category is of such tasks which are +performed at some interval e.g.: purge, dict_LRU cleanup etc. */ +static +void +srv_master_do_active_tasks(void) +/*============================*/ +{ + time_t cur_time = time(NULL); + ulonglong counter_time = microsecond_interval_timer(); + + /* First do the tasks that we are suppose to do at each + invocation of this function. */ + + ++srv_main_active_loops; + + MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS); + + /* ALTER TABLE in MySQL requires on Unix that the table handler + can drop tables lazily after there no longer are SELECT + queries to them. */ + srv_main_thread_op_info = "doing background drop tables"; + row_drop_tables_for_mysql_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time); + + ut_d(srv_master_do_disabled_loop()); + + if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) { + return; + } + + /* make sure that there is enough reusable space in the redo + log files */ + srv_main_thread_op_info = "checking free log space"; + log_free_check(); + + /* Flush logs if needed */ + srv_main_thread_op_info = "flushing log"; + srv_sync_log_buffer_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); + + /* Now see if various tasks that are performed at defined + intervals need to be performed. */ + + if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) { + return; + } + + if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) { + srv_main_thread_op_info = "enforcing dict cache limit"; + ulint n_evicted = srv_master_evict_from_table_cache(50); + if (n_evicted != 0) { + MONITOR_INC_VALUE( + MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted); + } + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); + } +} + +/*********************************************************************//** +Perform the tasks that the master thread is supposed to do whenever the +server is idle. We do check for the server state during this function +and if the server has entered the shutdown phase we may return from +the function without completing the required tasks. +Note that the server can move to active state when we are executing this +function but we don't check for that as we are suppose to perform more +or less same tasks when server is active. */ +static +void +srv_master_do_idle_tasks(void) +/*==========================*/ +{ + ++srv_main_idle_loops; + + MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS); + + + /* ALTER TABLE in MySQL requires on Unix that the table handler + can drop tables lazily after there no longer are SELECT + queries to them. */ + ulonglong counter_time = microsecond_interval_timer(); + srv_main_thread_op_info = "doing background drop tables"; + row_drop_tables_for_mysql_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, + counter_time); + + ut_d(srv_master_do_disabled_loop()); + + if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) { + return; + } + + /* make sure that there is enough reusable space in the redo + log files */ + srv_main_thread_op_info = "checking free log space"; + log_free_check(); + + if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) { + return; + } + + srv_main_thread_op_info = "enforcing dict cache limit"; + ulint n_evicted = srv_master_evict_from_table_cache(100); + if (n_evicted != 0) { + MONITOR_INC_VALUE( + MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted); + } + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); + + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); +} + +/** +Complete the shutdown tasks such as background DROP TABLE, +and optionally change buffer merge (on innodb_fast_shutdown=0). */ +void srv_shutdown(bool ibuf_merge) +{ + ulint n_bytes_merged = 0; + ulint n_tables_to_drop; + time_t now = time(NULL); + + do { + ut_ad(!srv_read_only_mode); + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); + ++srv_main_shutdown_loops; + + /* FIXME: Remove the background DROP TABLE queue; it is not + crash-safe and breaks ACID. */ + srv_main_thread_op_info = "doing background drop tables"; + n_tables_to_drop = row_drop_tables_for_mysql_in_background(); + + if (ibuf_merge) { + srv_main_thread_op_info = "checking free log space"; + log_free_check(); + srv_main_thread_op_info = "doing insert buffer merge"; + n_bytes_merged = ibuf_merge_all(); + + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); + } + + /* Print progress message every 60 seconds during shutdown */ + if (srv_print_verbose_log) { + srv_shutdown_print_master_pending( + &now, n_tables_to_drop, n_bytes_merged); + } + } while (n_bytes_merged || n_tables_to_drop); +} + +/** The periodic master task controlling the server. */ +void srv_master_callback(void*) +{ + static ulint old_activity_count; + + ut_a(srv_shutdown_state <= SRV_SHUTDOWN_INITIATED); + + srv_main_thread_op_info = ""; + MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); + if (srv_check_activity(&old_activity_count)) { + srv_master_do_active_tasks(); + } else { + srv_master_do_idle_tasks(); + } + srv_main_thread_op_info = "sleeping"; +} + +/** @return whether purge should exit due to shutdown */ +static bool srv_purge_should_exit() +{ + ut_ad(srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP); + + if (srv_undo_sources) + return false; + + if (srv_fast_shutdown) + return true; + + /* Slow shutdown was requested. */ + if (const uint32_t history_size= trx_sys.rseg_history_len) + { + static time_t progress_time; + time_t now= time(NULL); + if (now - progress_time >= 15) + { + progress_time= now; +#if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY + service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, + "InnoDB: to purge %u transactions", + history_size); + ib::info() << "to purge " << history_size << " transactions"; +#endif + } + return false; + } + + return !trx_sys.any_active_transactions(); +} + +/*********************************************************************//** +Fetch and execute a task from the work queue. +@param [in,out] slot purge worker thread slot +@return true if a task was executed */ +static bool srv_task_execute() +{ + ut_ad(!srv_read_only_mode); + ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + + mutex_enter(&srv_sys.tasks_mutex); + + if (que_thr_t* thr = UT_LIST_GET_FIRST(srv_sys.tasks)) { + ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE); + UT_LIST_REMOVE(srv_sys.tasks, thr); + mutex_exit(&srv_sys.tasks_mutex); + que_run_threads(thr); + return true; + } + + ut_ad(UT_LIST_GET_LEN(srv_sys.tasks) == 0); + mutex_exit(&srv_sys.tasks_mutex); + return false; +} + +std::mutex purge_thread_count_mtx; +void srv_update_purge_thread_count(uint n) +{ + std::lock_guard<std::mutex> lk(purge_thread_count_mtx); + srv_n_purge_threads = n; + srv_purge_thread_count_changed = 1; +} + +Atomic_counter<int> srv_purge_thread_count_changed; + +/** Do the actual purge operation. +@param[in,out] n_total_purged total number of purged pages +@return length of history list before the last purge batch. */ +static uint32_t srv_do_purge(ulint* n_total_purged) +{ + ulint n_pages_purged; + + static ulint count = 0; + static ulint n_use_threads = 0; + static uint32_t rseg_history_len = 0; + ulint old_activity_count = srv_get_activity_count(); + static ulint n_threads = srv_n_purge_threads; + + ut_a(n_threads > 0); + ut_ad(!srv_read_only_mode); + + /* Purge until there are no more records to purge and there is + no change in configuration or server state. If the user has + configured more than one purge thread then we treat that as a + pool of threads and only use the extra threads if purge can't + keep up with updates. */ + + if (n_use_threads == 0) { + n_use_threads = n_threads; + } + + do { + if (UNIV_UNLIKELY(srv_purge_thread_count_changed)) { + /* Read the fresh value of srv_n_purge_threads, reset + the changed flag. Both variables are protected by + purge_thread_count_mtx. + + This code does not run concurrently, it is executed + by a single purge_coordinator thread, and no races + involving srv_purge_thread_count_changed are possible. + */ + + std::lock_guard<std::mutex> lk(purge_thread_count_mtx); + n_threads = n_use_threads = srv_n_purge_threads; + srv_purge_thread_count_changed = 0; + } else if (trx_sys.rseg_history_len > rseg_history_len + || (srv_max_purge_lag > 0 + && rseg_history_len > srv_max_purge_lag)) { + + /* History length is now longer than what it was + when we took the last snapshot. Use more threads. */ + + if (n_use_threads < n_threads) { + ++n_use_threads; + } + + } else if (srv_check_activity(&old_activity_count) + && n_use_threads > 1) { + + /* History length same or smaller since last snapshot, + use fewer threads. */ + + --n_use_threads; + } + + /* Ensure that the purge threads are less than what + was configured. */ + + ut_a(n_use_threads > 0); + ut_a(n_use_threads <= n_threads); + + /* Take a snapshot of the history list before purge. */ + if (!(rseg_history_len = trx_sys.rseg_history_len)) { + break; + } + + n_pages_purged = trx_purge( + n_use_threads, + !(++count % srv_purge_rseg_truncate_frequency) + || purge_sys.truncate.current); + + *n_total_purged += n_pages_purged; + } while (n_pages_purged > 0 && !purge_sys.paused() + && !srv_purge_should_exit()); + + return(rseg_history_len); +} + + +static std::list<THD*> purge_thds; +static std::mutex purge_thd_mutex; +extern void* thd_attach_thd(THD*); +extern void thd_detach_thd(void *); + +THD* acquire_thd(void **ctx) +{ + std::unique_lock<std::mutex> lk(purge_thd_mutex); + if (purge_thds.empty()) { + THD* thd = current_thd; + purge_thds.push_back(innobase_create_background_thd("InnoDB purge worker")); + set_current_thd(thd); + } + THD* thd = purge_thds.front(); + purge_thds.pop_front(); + lk.unlock(); + + /* Set current thd, and thd->mysys_var as well, + it might be used by something in the server.*/ + *ctx = thd_attach_thd(thd); + return thd; +} + +void release_thd(THD *thd, void *ctx) +{ + thd_detach_thd(ctx); + std::unique_lock<std::mutex> lk(purge_thd_mutex); + purge_thds.push_back(thd); + lk.unlock(); + set_current_thd(0); +} + + +/* + Called by timer when purge coordinator decides + to delay processing of purge records. +*/ +static void purge_coordinator_timer_callback(void *) +{ + if (!purge_sys.enabled() || purge_sys.paused() || + purge_state.m_running || !trx_sys.rseg_history_len) + return; + + if (purge_state.m_history_length < 5000 && + purge_state.m_history_length == trx_sys.rseg_history_len) + /* No new records were added since wait started. + Simply wait for new records. The magic number 5000 is an + approximation for the case where we have cached UNDO + log records which prevent truncate of the UNDO segments.*/ + return; + srv_wake_purge_thread_if_not_active(); +} + +static void purge_worker_callback(void*) +{ + ut_ad(!current_thd); + ut_ad(!srv_read_only_mode); + ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + void *ctx; + THD *thd= acquire_thd(&ctx); + while (srv_task_execute()) + ut_ad(purge_sys.running()); + release_thd(thd,ctx); +} + +static void purge_coordinator_callback_low() +{ + ulint n_total_purged= ULINT_UNDEFINED; + purge_state.m_history_length= 0; + + if (!purge_sys.enabled() || purge_sys.paused()) + return; + do + { + n_total_purged = 0; + int sigcount= purge_state.m_running; + + purge_state.m_history_length= srv_do_purge(&n_total_purged); + + /* Check if purge was woken by srv_wake_purge_thread_if_not_active() */ + + bool woken_during_purge= purge_state.m_running > sigcount; + + /* If last purge batch processed less than 1 page and there is + still work to do, delay the next batch by 10ms. Unless + someone added work and woke us up. */ + if (n_total_purged == 0) + { + if (trx_sys.rseg_history_len == 0) + return; + if (!woken_during_purge) + { + /* Delay next purge round*/ + purge_coordinator_timer->set_time(10, 0); + return; + } + } + } + while ((purge_sys.enabled() && !purge_sys.paused()) || + !srv_purge_should_exit()); +} + +static void purge_coordinator_callback(void*) +{ + void *ctx; + THD *thd= acquire_thd(&ctx); + purge_coordinator_callback_low(); + release_thd(thd,ctx); + purge_state.m_running= 0; +} + +void srv_init_purge_tasks() +{ + purge_coordinator_timer= srv_thread_pool->create_timer + (purge_coordinator_timer_callback, nullptr); +} + +static void srv_shutdown_purge_tasks() +{ + purge_coordinator_task.wait(); + delete purge_coordinator_timer; + purge_coordinator_timer= nullptr; + purge_worker_task.wait(); + while (!purge_thds.empty()) + { + innobase_destroy_background_thd(purge_thds.front()); + purge_thds.pop_front(); + } +} + +/**********************************************************************//** +Enqueues a task to server task queue and releases a worker thread, if there +is a suspended one. */ +void +srv_que_task_enqueue_low( +/*=====================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ut_ad(!srv_read_only_mode); + mutex_enter(&srv_sys.tasks_mutex); + + UT_LIST_ADD_LAST(srv_sys.tasks, thr); + + mutex_exit(&srv_sys.tasks_mutex); +} + +#ifdef UNIV_DEBUG +/** @return number of tasks in queue */ +ulint srv_get_task_queue_length() +{ + ulint n_tasks; + + ut_ad(!srv_read_only_mode); + + mutex_enter(&srv_sys.tasks_mutex); + + n_tasks = UT_LIST_GET_LEN(srv_sys.tasks); + + mutex_exit(&srv_sys.tasks_mutex); + + return(n_tasks); +} +#endif + +/** Shut down the purge threads. */ +void srv_purge_shutdown() +{ + if (purge_sys.enabled()) { + srv_update_purge_thread_count(innodb_purge_threads_MAX); + while(!srv_purge_should_exit()) { + ut_a(!purge_sys.paused()); + srv_wake_purge_thread_if_not_active(); + os_thread_sleep(1000); + } + purge_sys.coordinator_shutdown(); + srv_shutdown_purge_tasks(); + } +} diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc new file mode 100644 index 00000000..aa6e7ce1 --- /dev/null +++ b/storage/innobase/srv/srv0start.cc @@ -0,0 +1,2168 @@ +/***************************************************************************** + +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2008, Google Inc. +Copyright (c) 2009, Percona Inc. +Copyright (c) 2013, 2021, MariaDB Corporation. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/********************************************************************//** +@file srv/srv0start.cc +Starts the InnoDB database server + +Created 2/16/1996 Heikki Tuuri +*************************************************************************/ + +#include "my_global.h" + +#include "mysqld.h" +#include "mysql/psi/mysql_stage.h" +#include "mysql/psi/psi.h" + +#include "row0ftsort.h" +#include "ut0mem.h" +#include "mem0mem.h" +#include "data0data.h" +#include "data0type.h" +#include "dict0dict.h" +#include "buf0buf.h" +#include "buf0dblwr.h" +#include "buf0dump.h" +#include "os0file.h" +#include "os0thread.h" +#include "fil0fil.h" +#include "fil0crypt.h" +#include "fsp0fsp.h" +#include "rem0rec.h" +#include "mtr0mtr.h" +#include "log0crypt.h" +#include "log0recv.h" +#include "page0page.h" +#include "page0cur.h" +#include "trx0trx.h" +#include "trx0sys.h" +#include "btr0btr.h" +#include "btr0cur.h" +#include "rem0rec.h" +#include "ibuf0ibuf.h" +#include "srv0start.h" +#include "srv0srv.h" +#include "btr0defragment.h" +#include "mysql/service_wsrep.h" /* wsrep_recovery */ +#include "trx0rseg.h" +#include "buf0flu.h" +#include "buf0rea.h" +#include "dict0boot.h" +#include "dict0load.h" +#include "dict0stats_bg.h" +#include "que0que.h" +#include "lock0lock.h" +#include "trx0roll.h" +#include "trx0purge.h" +#include "lock0lock.h" +#include "pars0pars.h" +#include "btr0sea.h" +#include "rem0cmp.h" +#include "dict0crea.h" +#include "row0ins.h" +#include "row0sel.h" +#include "row0upd.h" +#include "row0row.h" +#include "row0mysql.h" +#include "btr0pcur.h" +#include "os0event.h" +#include "zlib.h" +#include "ut0crc32.h" + +/** We are prepared for a situation that we have this many threads waiting for +a semaphore inside InnoDB. srv_start() sets the value. */ +ulint srv_max_n_threads; + +/** Log sequence number at shutdown */ +lsn_t srv_shutdown_lsn; + +/** TRUE if a raw partition is in use */ +ibool srv_start_raw_disk_in_use; + +/** Number of IO threads to use */ +uint srv_n_file_io_threads; + +/** UNDO tablespaces starts with space id. */ +ulint srv_undo_space_id_start; + +/** TRUE if the server is being started, before rolling back any +incomplete transactions */ +bool srv_startup_is_before_trx_rollback_phase; +/** TRUE if the server is being started */ +bool srv_is_being_started; +/** TRUE if SYS_TABLESPACES is available for lookups */ +bool srv_sys_tablespaces_open; +/** TRUE if the server was successfully started */ +bool srv_was_started; +/** The original value of srv_log_file_size (innodb_log_file_size) */ +static ulonglong srv_log_file_size_requested; +/** whether srv_start() has been called */ +static bool srv_start_has_been_called; + +/** Whether any undo log records can be generated */ +UNIV_INTERN bool srv_undo_sources; + +#ifdef UNIV_DEBUG +/** InnoDB system tablespace to set during recovery */ +UNIV_INTERN uint srv_sys_space_size_debug; +/** whether redo log file have been created at startup */ +UNIV_INTERN bool srv_log_file_created; +#endif /* UNIV_DEBUG */ + +/** whether some background threads that create redo log have been started */ +static bool srv_started_redo; + +/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to +SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ +enum srv_shutdown_t srv_shutdown_state = SRV_SHUTDOWN_NONE; + +/** Name of srv_monitor_file */ +static char* srv_monitor_file_name; +std::unique_ptr<tpool::timer> srv_master_timer; + +/** */ +#define SRV_MAX_N_PENDING_SYNC_IOS 100 + +#ifdef UNIV_PFS_THREAD +/* Keys to register InnoDB threads with performance schema */ +mysql_pfs_key_t thread_pool_thread_key; +#endif /* UNIV_PFS_THREAD */ + +#ifdef HAVE_PSI_STAGE_INTERFACE +/** Array of all InnoDB stage events for monitoring activities via +performance schema. */ +static PSI_stage_info* srv_stages[] = +{ + &srv_stage_alter_table_end, + &srv_stage_alter_table_insert, + &srv_stage_alter_table_log_index, + &srv_stage_alter_table_log_table, + &srv_stage_alter_table_merge_sort, + &srv_stage_alter_table_read_pk_internal_sort, + &srv_stage_buffer_pool_load, +}; +#endif /* HAVE_PSI_STAGE_INTERFACE */ + +/*********************************************************************//** +Check if a file can be opened in read-write mode. +@return true if it doesn't exist or can be opened in rw mode. */ +static +bool +srv_file_check_mode( +/*================*/ + const char* name) /*!< in: filename to check */ +{ + os_file_stat_t stat; + + memset(&stat, 0x0, sizeof(stat)); + + dberr_t err = os_file_get_status( + name, &stat, true, srv_read_only_mode); + + if (err == DB_FAIL) { + ib::error() << "os_file_get_status() failed on '" << name + << "'. Can't determine file permissions."; + return(false); + + } else if (err == DB_SUCCESS) { + + /* Note: stat.rw_perm is only valid of files */ + + if (stat.type == OS_FILE_TYPE_FILE) { + + if (!stat.rw_perm) { + const char* mode = srv_read_only_mode + ? "read" : "read-write"; + ib::error() << name << " can't be opened in " + << mode << " mode."; + return(false); + } + } else { + /* Not a regular file, bail out. */ + ib::error() << "'" << name << "' not a regular file."; + + return(false); + } + } else { + + /* This is OK. If the file create fails on RO media, there + is nothing we can do. */ + + ut_a(err == DB_NOT_FOUND); + } + + return(true); +} + +/** Initial number of the redo log file */ +static const char INIT_LOG_FILE0[]= "101"; + +/** Creates log file. +@param[in] create_new_db whether the database is being initialized +@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value +@param[out] logfile0 name of the log file +@return DB_SUCCESS or error code */ +static dberr_t create_log_file(bool create_new_db, lsn_t lsn, + std::string& logfile0) +{ + if (srv_read_only_mode) { + ib::error() << "Cannot create log file in read-only mode"; + return DB_READ_ONLY; + } + + /* Crashing after deleting the first file should be + recoverable. The buffer pool was clean, and we can simply + create log file from the scratch. */ + DBUG_EXECUTE_IF("innodb_log_abort_6", delete_log_file("0"); + return DB_ERROR;); + + for (size_t i = 0; i < 102; i++) { + delete_log_file(std::to_string(i).c_str()); + } + + DBUG_PRINT("ib_log", ("After innodb_log_abort_6")); + DBUG_ASSERT(!buf_pool.any_io_pending()); + + DBUG_EXECUTE_IF("innodb_log_abort_7", return DB_ERROR;); + DBUG_PRINT("ib_log", ("After innodb_log_abort_7")); + + logfile0 = get_log_file_path(LOG_FILE_NAME_PREFIX) + .append(INIT_LOG_FILE0); + + bool ret; + pfs_os_file_t file = os_file_create( + innodb_log_file_key, logfile0.c_str(), + OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, + OS_LOG_FILE, srv_read_only_mode, &ret); + + if (!ret) { + ib::error() << "Cannot create " << logfile0; + return DB_ERROR; + } + + ib::info() << "Setting log file " << logfile0 << " size to " + << srv_log_file_size << " bytes"; + + ret = os_file_set_size(logfile0.c_str(), file, srv_log_file_size); + if (!ret) { + os_file_close(file); + ib::error() << "Cannot set log file " << logfile0 + << " size to " << srv_log_file_size << " bytes"; + return DB_ERROR; + } + + ret = os_file_close(file); + ut_a(ret); + + DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR);); + DBUG_PRINT("ib_log", ("After innodb_log_abort_8")); + + /* We did not create the first log file initially as LOG_FILE_NAME, so + that crash recovery cannot find it until it has been completed and + renamed. */ + + log_sys.log.create(); + if (!log_set_capacity(srv_log_file_size_requested)) { + return DB_ERROR; + } + + log_sys.log.open_file(logfile0); + if (!fil_system.sys_space->open(create_new_db)) { + return DB_ERROR; + } + + /* Create a log checkpoint. */ + mysql_mutex_lock(&log_sys.mutex); + if (log_sys.is_encrypted() && !log_crypt_init()) { + return DB_ERROR; + } + ut_d(recv_no_log_write = false); + lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); + log_sys.set_lsn(lsn + LOG_BLOCK_HDR_SIZE); + log_sys.log.set_lsn(lsn); + log_sys.log.set_lsn_offset(LOG_FILE_HDR_SIZE); + + log_sys.buf_next_to_write = 0; + log_sys.write_lsn = lsn; + + log_sys.next_checkpoint_no = 0; + log_sys.last_checkpoint_lsn = 0; + + memset(log_sys.buf, 0, srv_log_buffer_size); + log_block_init(log_sys.buf, lsn); + log_block_set_first_rec_group(log_sys.buf, LOG_BLOCK_HDR_SIZE); + memset(log_sys.flush_buf, 0, srv_log_buffer_size); + + log_sys.buf_free = LOG_BLOCK_HDR_SIZE; + + log_sys.log.write_header_durable(lsn); + + mysql_mutex_unlock(&log_sys.mutex); + + log_make_checkpoint(); + log_buffer_flush_to_disk(); + + return DB_SUCCESS; +} + +/** Rename the first redo log file. +@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value +@param[in,out] logfile0 name of the first log file +@return error code +@retval DB_SUCCESS on successful operation */ +MY_ATTRIBUTE((warn_unused_result)) +static dberr_t create_log_file_rename(lsn_t lsn, std::string &logfile0) +{ + ut_ad(!srv_log_file_created); + ut_d(srv_log_file_created= true); + + DBUG_EXECUTE_IF("innodb_log_abort_9", return (DB_ERROR);); + DBUG_PRINT("ib_log", ("After innodb_log_abort_9")); + + /* Rename the first log file, now that a log checkpoint has been created. */ + auto new_name = get_log_file_path(); + + ib::info() << "Renaming log file " << logfile0 << " to " << new_name; + + mysql_mutex_lock(&log_sys.mutex); + ut_ad(logfile0.size() == 2 + new_name.size()); + logfile0= new_name; + dberr_t err= log_sys.log.rename(std::move(new_name)); + + mysql_mutex_unlock(&log_sys.mutex); + + DBUG_EXECUTE_IF("innodb_log_abort_10", err= DB_ERROR;); + + if (err == DB_SUCCESS) + ib::info() << "New log file created, LSN=" << lsn; + + return err; +} + +/** Create an undo tablespace file +@param[in] name file name +@return DB_SUCCESS or error code */ +static dberr_t srv_undo_tablespace_create(const char* name) +{ + pfs_os_file_t fh; + bool ret; + dberr_t err = DB_SUCCESS; + + os_file_create_subdirs_if_needed(name); + + fh = os_file_create( + innodb_data_file_key, + name, + srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE, + OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret); + + if (!ret) { + if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS +#ifdef UNIV_AIX + /* AIX 5.1 after security patch ML7 may have + errno set to 0 here, which causes our function + to return 100; work around that AIX problem */ + && os_file_get_last_error(false) != 100 +#endif /* UNIV_AIX */ + ) { + ib::error() << "Can't create UNDO tablespace " + << name; + } + err = DB_ERROR; + } else if (srv_read_only_mode) { + ib::info() << name << " opened in read-only mode"; + } else { + /* We created the data file and now write it full of zeros */ + + ib::info() << "Data file " << name << " did not exist: new to" + " be created"; + + ib::info() << "Setting file " << name << " size to " + << (SRV_UNDO_TABLESPACE_SIZE_IN_PAGES >> (20 - srv_page_size_shift)) << " MB"; + + ib::info() << "Database physically writes the file full: " + << "wait..."; + + if (!os_file_set_size(name, fh, os_offset_t + {SRV_UNDO_TABLESPACE_SIZE_IN_PAGES} + << srv_page_size_shift)) { + ib::error() << "Unable to allocate " << name; + err = DB_ERROR; + } + + os_file_close(fh); + } + + return(err); +} + +/* Validate the number of undo opened undo tablespace and user given +undo tablespace +@return DB_SUCCESS if it is valid */ +static dberr_t srv_validate_undo_tablespaces() +{ + /* If the user says that there are fewer than what we find we + tolerate that discrepancy but not the inverse. Because there could + be unused undo tablespaces for future use. */ + + if (srv_undo_tablespaces > srv_undo_tablespaces_open) + { + ib::error() << "Expected to open innodb_undo_tablespaces=" + << srv_undo_tablespaces + << " but was able to find only " + << srv_undo_tablespaces_open; + + return DB_ERROR; + } + else if (srv_undo_tablespaces_open > 0) + { + ib::info() << "Opened " << srv_undo_tablespaces_open + << " undo tablespaces"; + + if (srv_undo_tablespaces == 0) + ib::warn() << "innodb_undo_tablespaces=0 disables" + " dedicated undo log tablespaces"; + } + return DB_SUCCESS; +} + +/** @return the number of active undo tablespaces (except system tablespace) */ +static ulint trx_rseg_get_n_undo_tablespaces() +{ + std::set<uint32_t> space_ids; + mtr_t mtr; + mtr.start(); + + if (const buf_block_t *sys_header= trx_sysf_get(&mtr, false)) + for (ulint rseg_id= 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) + if (trx_sysf_rseg_get_page_no(sys_header, rseg_id) != FIL_NULL) + if (uint32_t space= trx_sysf_rseg_get_space(sys_header, rseg_id)) + space_ids.insert(space); + mtr.commit(); + return space_ids.size(); +} + +/** Open an undo tablespace. +@param[in] create whether undo tablespaces are being created +@param[in] name tablespace file name +@param[in] i undo tablespace count +@return undo tablespace identifier +@retval 0 on failure */ +static ulint srv_undo_tablespace_open(bool create, const char* name, ulint i) +{ + bool success; + char undo_name[sizeof "innodb_undo000"]; + ulint space_id= 0; + ulint fsp_flags= 0; + + if (create) + { + space_id= srv_undo_space_id_start + i; + snprintf(undo_name, sizeof(undo_name), + "innodb_undo%03u", static_cast<unsigned>(space_id)); + switch (srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: + fsp_flags= FSP_FLAGS_FCRC32_MASK_MARKER | FSP_FLAGS_FCRC32_PAGE_SSIZE(); + break; + default: + fsp_flags= FSP_FLAGS_PAGE_SSIZE(); + } + } + + pfs_os_file_t fh= os_file_create(innodb_data_file_key, name, OS_FILE_OPEN | + OS_FILE_ON_ERROR_NO_EXIT | + OS_FILE_ON_ERROR_SILENT, + OS_FILE_AIO, OS_DATA_FILE, + srv_read_only_mode, &success); + + if (!success) + return 0; + + os_offset_t size= os_file_get_size(fh); + ut_a(size != os_offset_t(-1)); + + if (!create) + { + page_t *page= static_cast<byte*>(aligned_malloc(srv_page_size, + srv_page_size)); + dberr_t err= os_file_read(IORequestRead, fh, page, 0, srv_page_size); + if (err != DB_SUCCESS) + { +err_exit: + ib::error() << "Unable to read first page of file " << name; + aligned_free(page); + return err; + } + + uint32_t id= mach_read_from_4(FIL_PAGE_SPACE_ID + page); + if (id == 0 || id >= SRV_SPACE_ID_UPPER_BOUND || + memcmp_aligned<2>(FIL_PAGE_SPACE_ID + page, + FSP_HEADER_OFFSET + FSP_SPACE_ID + page, 4)) + { + ib::error() << "Inconsistent tablespace ID in file " << name; + err= DB_CORRUPTION; + goto err_exit; + } + + fsp_flags= mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + if (buf_page_is_corrupted(false, page, fsp_flags)) + { + ib::error() << "Checksum mismatch in the first page of file " << name; + err= DB_CORRUPTION; + goto err_exit; + } + + space_id= id; + snprintf(undo_name, sizeof undo_name, "innodb_undo%03u", id); + aligned_free(page); + } + + /* Load the tablespace into InnoDB's internal data structures. */ + + /* We set the biggest space id to the undo tablespace + because InnoDB hasn't opened any other tablespace apart + from the system tablespace. */ + + fil_set_max_space_id_if_bigger(space_id); + + fil_space_t *space= fil_space_t::create(undo_name, space_id, fsp_flags, + FIL_TYPE_TABLESPACE, NULL); + ut_a(fil_validate()); + ut_a(space); + + fil_node_t *file= space->add(name, fh, 0, false, true); + mutex_enter(&fil_system.mutex); + + if (create) + { + space->set_sizes(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); + space->size= file->size= uint32_t(size >> srv_page_size_shift); + } + else if (!file->read_page0()) + { + os_file_close(file->handle); + file->handle= OS_FILE_CLOSED; + ut_a(fil_system.n_open > 0); + fil_system.n_open--; + } + + mutex_exit(&fil_system.mutex); + return space_id; +} + +/** Check if undo tablespaces and redo log files exist before creating a +new system tablespace +@retval DB_SUCCESS if all undo and redo logs are not found +@retval DB_ERROR if any undo and redo logs are found */ +static +dberr_t +srv_check_undo_redo_logs_exists() +{ + bool ret; + pfs_os_file_t fh; + char name[OS_FILE_MAX_PATH]; + + /* Check if any undo tablespaces exist */ + for (ulint i = 1; i <= srv_undo_tablespaces; ++i) { + + snprintf( + name, sizeof(name), + "%s%cundo%03zu", + srv_undo_dir, OS_PATH_SEPARATOR, + i); + + fh = os_file_create( + innodb_data_file_key, name, + OS_FILE_OPEN_RETRY + | OS_FILE_ON_ERROR_NO_EXIT + | OS_FILE_ON_ERROR_SILENT, + OS_FILE_NORMAL, + OS_DATA_FILE, + srv_read_only_mode, + &ret); + + if (ret) { + os_file_close(fh); + ib::error() + << "undo tablespace '" << name << "' exists." + " Creating system tablespace with existing undo" + " tablespaces is not supported. Please delete" + " all undo tablespaces before creating new" + " system tablespace."; + return(DB_ERROR); + } + } + + /* Check if redo log file exists */ + auto logfilename = get_log_file_path(); + + fh = os_file_create(innodb_log_file_key, logfilename.c_str(), + OS_FILE_OPEN_RETRY | OS_FILE_ON_ERROR_NO_EXIT + | OS_FILE_ON_ERROR_SILENT, + OS_FILE_NORMAL, OS_LOG_FILE, srv_read_only_mode, + &ret); + + if (ret) { + os_file_close(fh); + ib::error() << "redo log file '" << logfilename + << "' exists. Creating system tablespace with" + " existing redo log file is not recommended." + " Please delete redo log file before" + " creating new system tablespace."; + return DB_ERROR; + } + + return(DB_SUCCESS); +} + +static dberr_t srv_all_undo_tablespaces_open(bool create_new_db, ulint n_undo) +{ + /* Open all the undo tablespaces that are currently in use. If we + fail to open any of these it is a fatal error. The tablespace ids + should be contiguous. It is a fatal error because they are required + for recovery and are referenced by the UNDO logs (a.k.a RBS). */ + + ulint prev_id= create_new_db ? srv_undo_space_id_start - 1 : 0; + + for (ulint i= 0; i < n_undo; ++i) + { + char name[OS_FILE_MAX_PATH]; + snprintf(name, sizeof name, "%s%cundo%03zu", srv_undo_dir, + OS_PATH_SEPARATOR, i + 1); + ulint space_id= srv_undo_tablespace_open(create_new_db, name, i); + if (!space_id) + { + if (!create_new_db) + break; + ib::error() << "Unable to open create tablespace '" << name << "'."; + return DB_ERROR; + } + + /* Should be no gaps in undo tablespace ids. */ + ut_a(!i || prev_id + 1 == space_id); + + prev_id= space_id; + + /* Note the first undo tablespace id in case of + no active undo tablespace. */ + if (0 == srv_undo_tablespaces_open++) + srv_undo_space_id_start= space_id; + } + + /* Open any extra unused undo tablespaces. These must be contiguous. + We stop at the first failure. These are undo tablespaces that are + not in use and therefore not required by recovery. We only check + that there are no gaps. */ + + for (ulint i= prev_id + 1; i < srv_undo_space_id_start + TRX_SYS_N_RSEGS; + ++i) + { + char name[OS_FILE_MAX_PATH]; + snprintf(name, sizeof(name), + "%s%cundo%03zu", srv_undo_dir, OS_PATH_SEPARATOR, i); + if (!srv_undo_tablespace_open(create_new_db, name, i)) + break; + ++srv_undo_tablespaces_open; + } + + return srv_validate_undo_tablespaces(); +} + +/** Open the configured number of dedicated undo tablespaces. +@param[in] create_new_db whether the database is being initialized +@return DB_SUCCESS or error code */ +dberr_t +srv_undo_tablespaces_init(bool create_new_db) +{ + srv_undo_tablespaces_open= 0; + + ut_a(srv_undo_tablespaces <= TRX_SYS_N_RSEGS); + ut_a(!create_new_db || srv_operation == SRV_OPERATION_NORMAL); + + if (srv_undo_tablespaces == 1) + srv_undo_tablespaces= 0; + + /* Create the undo spaces only if we are creating a new + instance. We don't allow creating of new undo tablespaces + in an existing instance (yet). */ + if (create_new_db) + { + srv_undo_space_id_start= 1; + DBUG_EXECUTE_IF("innodb_undo_upgrade", srv_undo_space_id_start= 3;); + + for (ulint i= 0; i < srv_undo_tablespaces; ++i) + { + char name[OS_FILE_MAX_PATH]; + snprintf(name, sizeof name, "%s%cundo%03zu", + srv_undo_dir, OS_PATH_SEPARATOR, i + 1); + if (dberr_t err= srv_undo_tablespace_create(name)) + { + ib::error() << "Could not create undo tablespace '" << name << "'."; + return err; + } + } + } + + /* Get the tablespace ids of all the undo segments excluding + the system tablespace (0). If we are creating a new instance then + we build the undo_tablespace_ids ourselves since they don't + already exist. */ + srv_undo_tablespaces_active= srv_undo_tablespaces; + + ulint n_undo= (create_new_db || srv_operation == SRV_OPERATION_BACKUP || + srv_operation == SRV_OPERATION_RESTORE_DELTA) + ? srv_undo_tablespaces : TRX_SYS_N_RSEGS; + + if (dberr_t err= srv_all_undo_tablespaces_open(create_new_db, n_undo)) + return err; + + /* Initialize srv_undo_space_id_start=0 when there are no + dedicated undo tablespaces. */ + if (srv_undo_tablespaces_open == 0) + srv_undo_space_id_start= 0; + + if (create_new_db) + { + mtr_t mtr; + for (ulint i= 0; i < srv_undo_tablespaces; ++i) + { + mtr.start(); + fsp_header_init(fil_space_get(srv_undo_space_id_start + i), + SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); + mtr.commit(); + } + } + + return DB_SUCCESS; +} + +/** Create the temporary file tablespace. +@param[in] create_new_db whether we are creating a new database +@return DB_SUCCESS or error code. */ +static +dberr_t +srv_open_tmp_tablespace(bool create_new_db) +{ + ulint sum_of_new_sizes; + + /* Will try to remove if there is existing file left-over by last + unclean shutdown */ + srv_tmp_space.set_sanity_check_status(true); + srv_tmp_space.delete_files(); + srv_tmp_space.set_ignore_read_only(true); + + ib::info() << "Creating shared tablespace for temporary tables"; + + bool create_new_temp_space; + + srv_tmp_space.set_space_id(SRV_TMP_SPACE_ID); + + dberr_t err = srv_tmp_space.check_file_spec( + &create_new_temp_space, 12 * 1024 * 1024); + + if (err == DB_FAIL) { + ib::error() << "The innodb_temporary" + " data file must be writable!"; + err = DB_ERROR; + } else if (err != DB_SUCCESS) { + ib::error() << "Could not create the shared innodb_temporary."; + } else if ((err = srv_tmp_space.open_or_create( + true, create_new_db, &sum_of_new_sizes, NULL)) + != DB_SUCCESS) { + ib::error() << "Unable to create the shared innodb_temporary"; + } else if (fil_system.temp_space->open(true)) { + /* Initialize the header page */ + mtr_t mtr; + mtr.start(); + mtr.set_log_mode(MTR_LOG_NO_REDO); + fsp_header_init(fil_system.temp_space, + srv_tmp_space.get_sum_of_sizes(), + &mtr); + mtr.commit(); + } else { + /* This file was just opened in the code above! */ + ib::error() << "The innodb_temporary" + " data file cannot be re-opened" + " after check_file_spec() succeeded!"; + err = DB_ERROR; + } + + return(err); +} + +/** Shutdown background threads, except the page cleaner. */ +static void srv_shutdown_threads() +{ + ut_ad(!srv_undo_sources); + srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; + + lock_sys.timeout_timer.reset(); + srv_master_timer.reset(); + + if (purge_sys.enabled()) { + srv_purge_shutdown(); + } + + if (srv_n_fil_crypt_threads) { + fil_crypt_set_thread_cnt(0); + } +} + +#ifdef UNIV_DEBUG +# define srv_init_abort(_db_err) \ + srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err) +#else +# define srv_init_abort(_db_err) \ + srv_init_abort_low(create_new_db, _db_err) +#endif /* UNIV_DEBUG */ + +/** Innobase start-up aborted. Perform cleanup actions. +@param[in] create_new_db TRUE if new db is being created +@param[in] file File name +@param[in] line Line number +@param[in] err Reason for aborting InnoDB startup +@return DB_SUCCESS or error code. */ +MY_ATTRIBUTE((warn_unused_result, nonnull)) +static +dberr_t +srv_init_abort_low( + bool create_new_db, +#ifdef UNIV_DEBUG + const char* file, + unsigned line, +#endif /* UNIV_DEBUG */ + dberr_t err) +{ + ut_ad(srv_is_being_started); + + if (create_new_db) { + ib::error() << "Database creation was aborted" +#ifdef UNIV_DEBUG + " at " << innobase_basename(file) << "[" << line << "]" +#endif /* UNIV_DEBUG */ + " with error " << err << ". You may need" + " to delete the ibdata1 file before trying to start" + " up again."; + } else { + ib::error() << "Plugin initialization aborted" +#ifdef UNIV_DEBUG + " at " << innobase_basename(file) << "[" << line << "]" +#endif /* UNIV_DEBUG */ + " with error " << err; + } + + srv_shutdown_bg_undo_sources(); + srv_shutdown_threads(); + return(err); +} + +/** Prepare to delete the redo log file. Flush the dirty pages from all the +buffer pools. Flush the redo log buffer to the redo log file. +@param[in] old_exists old redo log file exists +@return lsn upto which data pages have been flushed. */ +static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists) +{ + DBUG_ENTER("srv_prepare_to_delete_redo_log_file"); + + lsn_t flushed_lsn; + ulint count = 0; + + if (log_sys.log.subformat != 2) { + srv_log_file_size = 0; + } + + for (;;) { + /* Clean the buffer pool. */ + buf_flush_sync(); + + DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0);); + DBUG_PRINT("ib_log", ("After innodb_log_abort_1")); + + mysql_mutex_lock(&log_sys.mutex); + + fil_names_clear(log_sys.get_lsn(), false); + + flushed_lsn = log_sys.get_lsn(); + + { + ib::info info; + if (srv_log_file_size == 0 + || (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED) + != log_t::FORMAT_10_5) { + info << "Upgrading redo log: "; + } else if (!old_exists + || srv_log_file_size + != srv_log_file_size_requested) { + if (srv_encrypt_log + == (my_bool)log_sys.is_encrypted()) { + info << (srv_encrypt_log + ? "Resizing encrypted" + : "Resizing"); + } else if (srv_encrypt_log) { + info << "Encrypting and resizing"; + } else { + info << "Removing encryption" + " and resizing"; + } + + info << " redo log from " << srv_log_file_size + << " to "; + } else if (srv_encrypt_log) { + info << "Encrypting redo log: "; + } else { + info << "Removing redo log encryption: "; + } + + info << srv_log_file_size_requested + << " bytes; LSN=" << flushed_lsn; + } + + mysql_mutex_unlock(&log_sys.mutex); + + if (flushed_lsn != log_sys.get_flushed_lsn()) { + log_write_up_to(flushed_lsn, false); + log_sys.log.flush(); + } + + ut_ad(flushed_lsn == log_sys.get_lsn()); + + /* Check if the buffer pools are clean. If not + retry till it is clean. */ + if (ulint pending_io = buf_pool.io_pending()) { + count++; + /* Print a message every 60 seconds if we + are waiting to clean the buffer pools */ + if (srv_print_verbose_log && count > 600) { + ib::info() << "Waiting for " + << pending_io << " buffer " + << "page I/Os to complete"; + count = 0; + } + + os_thread_sleep(100000); + continue; + } + + break; + } + + DBUG_RETURN(flushed_lsn); +} + +/** Tries to locate LOG_FILE_NAME and check it's size, etc +@param[out] log_file_found returns true here if correct file was found +@return dberr_t with DB_SUCCESS or some error */ +static dberr_t find_and_check_log_file(bool &log_file_found) +{ + log_file_found= false; + + auto logfile0= get_log_file_path(); + os_file_stat_t stat_info; + const dberr_t err= os_file_get_status(logfile0.c_str(), &stat_info, false, + srv_read_only_mode); + + auto is_operation_restore= []() -> bool { + return srv_operation == SRV_OPERATION_RESTORE || + srv_operation == SRV_OPERATION_RESTORE_EXPORT; + }; + + if (err == DB_NOT_FOUND) + { + if (is_operation_restore()) + return DB_NOT_FOUND; + + return DB_SUCCESS; + } + + if (stat_info.type != OS_FILE_TYPE_FILE) + return DB_SUCCESS; + + if (!srv_file_check_mode(logfile0.c_str())) + return DB_ERROR; + + const os_offset_t size= stat_info.size; + ut_a(size != (os_offset_t) -1); + + if (size % OS_FILE_LOG_BLOCK_SIZE) + { + ib::error() << "Log file " << logfile0 << " size " << size + << " is not a multiple of " << OS_FILE_LOG_BLOCK_SIZE + << " bytes"; + return DB_ERROR; + } + + if (size == 0 && is_operation_restore()) + { + /* Tolerate an empty LOG_FILE_NAME from a previous run of + mariabackup --prepare. */ + return DB_NOT_FOUND; + } + /* The first log file must consist of at least the following 512-byte pages: + header, checkpoint page 1, empty, checkpoint page 2, redo log page(s). + + Mariabackup --prepare would create an empty LOG_FILE_NAME. Tolerate it. */ + if (size != 0 && size <= OS_FILE_LOG_BLOCK_SIZE * 4) + { + ib::error() << "Log file " << logfile0 << " size " << size + << " is too small"; + return DB_ERROR; + } + srv_log_file_size= size; + + log_file_found= true; + return DB_SUCCESS; +} + +/** Start InnoDB. +@param[in] create_new_db whether to create a new database +@return DB_SUCCESS or error code */ +dberr_t srv_start(bool create_new_db) +{ + lsn_t flushed_lsn; + dberr_t err = DB_SUCCESS; + bool srv_log_file_found = true; + mtr_t mtr; + + ut_ad(srv_operation == SRV_OPERATION_NORMAL + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); + + if (srv_force_recovery) { + ib::info() << "!!! innodb_force_recovery is set to " + << srv_force_recovery << " !!!"; + } + + if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { + srv_read_only_mode = true; + } + + high_level_read_only = srv_read_only_mode + || srv_force_recovery > SRV_FORCE_NO_IBUF_MERGE + || srv_sys_space.created_new_raw(); + + srv_started_redo = false; + + compile_time_assert(sizeof(ulint) == sizeof(void*)); + +#ifdef UNIV_DEBUG + ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!"; +#endif + +#ifdef UNIV_IBUF_DEBUG + ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!"; +#endif + + ib::info() << MUTEX_TYPE; + + ib::info() << "Compressed tables use zlib " ZLIB_VERSION +#ifdef UNIV_ZIP_DEBUG + " with validation" +#endif /* UNIV_ZIP_DEBUG */ + ; +#ifdef UNIV_ZIP_COPY + ib::info() << "and extra copying"; +#endif /* UNIV_ZIP_COPY */ + + /* Since InnoDB does not currently clean up all its internal data + structures in MySQL Embedded Server Library server_end(), we + print an error message if someone tries to start up InnoDB a + second time during the process lifetime. */ + + if (srv_start_has_been_called) { + ib::error() << "Startup called second time" + " during the process lifetime." + " In the MySQL Embedded Server Library" + " you cannot call server_init() more than" + " once during the process lifetime."; + } + + srv_start_has_been_called = true; + + srv_is_being_started = true; + + /* Register performance schema stages before any real work has been + started which may need to be instrumented. */ + mysql_stage_register("innodb", srv_stages, + static_cast<int>(UT_ARR_SIZE(srv_stages))); + + /* Set the maximum number of threads which can wait for a semaphore + inside InnoDB: this is the 'sync wait array' size */ + + srv_max_n_threads = 1 /* io_ibuf_thread */ + + 1 /* io_log_thread */ + + 1 /* srv_print_monitor_task */ + + 1 /* srv_purge_coordinator_thread */ + + 1 /* buf_dump_thread */ + + 1 /* dict_stats_thread */ + + 1 /* fts_optimize_thread */ + + 1 /* trx_rollback_all_recovered */ + + 128 /* added as margin, for use of + InnoDB Memcached etc. */ + + 1/* buf_flush_page_cleaner */ + + max_connections + + srv_n_read_io_threads + + srv_n_write_io_threads + + srv_n_purge_threads + /* FTS Parallel Sort */ + + fts_sort_pll_degree * FTS_NUM_AUX_INDEX + * max_connections; + + srv_boot(); + + ib::info() << my_crc32c_implementation(); + + if (!srv_read_only_mode) { + + mutex_create(LATCH_ID_SRV_MONITOR_FILE, + &srv_monitor_file_mutex); + + if (srv_innodb_status) { + + srv_monitor_file_name = static_cast<char*>( + ut_malloc_nokey( + strlen(fil_path_to_mysql_datadir) + + 20 + sizeof "/innodb_status.")); + + sprintf(srv_monitor_file_name, + "%s/innodb_status." ULINTPF, + fil_path_to_mysql_datadir, + static_cast<ulint> + (IF_WIN(GetCurrentProcessId(), getpid()))); + + srv_monitor_file = my_fopen(srv_monitor_file_name, + O_RDWR|O_TRUNC|O_CREAT, + MYF(MY_WME)); + + if (!srv_monitor_file) { + ib::error() << "Unable to create " + << srv_monitor_file_name << ": " + << strerror(errno); + if (err == DB_SUCCESS) { + err = DB_ERROR; + } + } + } else { + + srv_monitor_file_name = NULL; + srv_monitor_file = os_file_create_tmpfile(); + + if (!srv_monitor_file && err == DB_SUCCESS) { + err = DB_ERROR; + } + } + + mutex_create(LATCH_ID_SRV_MISC_TMPFILE, + &srv_misc_tmpfile_mutex); + + srv_misc_tmpfile = os_file_create_tmpfile(); + + if (!srv_misc_tmpfile && err == DB_SUCCESS) { + err = DB_ERROR; + } + } + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + srv_n_file_io_threads = srv_n_read_io_threads + srv_n_write_io_threads; + + if (!srv_read_only_mode) { + /* Add the log and ibuf IO threads. */ + srv_n_file_io_threads += 2; + } else { + ib::info() << "Disabling background log and ibuf IO write" + << " threads."; + } + + ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS); + + if (os_aio_init()) { + ib::error() << "Cannot initialize AIO sub-system"; + + return(srv_init_abort(DB_ERROR)); + } + +#ifdef LINUX_NATIVE_AIO + if (srv_use_native_aio) { + ib::info() << "Using Linux native AIO"; + } +#endif + + fil_system.create(srv_file_per_table ? 50000 : 5000); + + ib::info() << "Initializing buffer pool, total size = " + << srv_buf_pool_size + << ", chunk size = " << srv_buf_pool_chunk_unit; + + if (buf_pool.create()) { + ib::error() << "Cannot allocate memory for the buffer pool"; + + return(srv_init_abort(DB_ERROR)); + } + + ib::info() << "Completed initialization of buffer pool"; + +#ifdef UNIV_DEBUG + /* We have observed deadlocks with a 5MB buffer pool but + the actual lower limit could very well be a little higher. */ + + if (srv_buf_pool_size <= 5 * 1024 * 1024) { + + ib::info() << "Small buffer pool size (" + << srv_buf_pool_size / 1024 / 1024 + << "M), the flst_validate() debug function can cause a" + << " deadlock if the buffer pool fills up."; + } +#endif /* UNIV_DEBUG */ + + log_sys.create(); + recv_sys.create(); + lock_sys.create(srv_lock_table_size); + + + if (!srv_read_only_mode) { + buf_flush_page_cleaner_init(); + ut_ad(buf_page_cleaner_is_active); + } + + srv_startup_is_before_trx_rollback_phase = !create_new_db; + + /* Check if undo tablespaces and redo log files exist before creating + a new system tablespace */ + if (create_new_db) { + err = srv_check_undo_redo_logs_exists(); + if (err != DB_SUCCESS) { + return(srv_init_abort(DB_ERROR)); + } + recv_sys.debug_free(); + } + + /* Open or create the data files. */ + ulint sum_of_new_sizes; + + err = srv_sys_space.open_or_create( + false, create_new_db, &sum_of_new_sizes, &flushed_lsn); + + switch (err) { + case DB_SUCCESS: + break; + case DB_CANNOT_OPEN_FILE: + ib::error() + << "Could not open or create the system tablespace. If" + " you tried to add new data files to the system" + " tablespace, and it failed here, you should now" + " edit innodb_data_file_path in my.cnf back to what" + " it was, and remove the new ibdata files InnoDB" + " created in this failed attempt. InnoDB only wrote" + " those files full of zeros, but did not yet use" + " them in any way. But be careful: do not remove" + " old data files which contain your precious data!"; + /* fall through */ + default: + /* Other errors might come from Datafile::validate_first_page() */ + return(srv_init_abort(err)); + } + + srv_log_file_size_requested = srv_log_file_size; + + if (innodb_encrypt_temporary_tables && !log_crypt_init()) { + return srv_init_abort(DB_ERROR); + } + + std::string logfile0; + bool create_new_log = create_new_db; + if (create_new_db) { + flushed_lsn = log_sys.get_lsn(); + log_sys.set_flushed_lsn(flushed_lsn); + buf_flush_sync(); + + err = create_log_file(true, flushed_lsn, logfile0); + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + } else { + srv_log_file_size = 0; + + bool log_file_found; + if (dberr_t err = find_and_check_log_file(log_file_found)) { + if (err == DB_NOT_FOUND) { + return DB_SUCCESS; + } + return srv_init_abort(err); + } + + create_new_log = srv_log_file_size == 0; + if (create_new_log) { + if (flushed_lsn < lsn_t(1000)) { + ib::error() + << "Cannot create log file because" + " data files are corrupt or the" + " database was not shut down cleanly" + " after creating the data files."; + return srv_init_abort(DB_ERROR); + } + + srv_log_file_size = srv_log_file_size_requested; + + err = create_log_file(false, flushed_lsn, logfile0); + + if (err == DB_SUCCESS) { + err = create_log_file_rename(flushed_lsn, + logfile0); + } + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + /* Suppress the message about + crash recovery. */ + flushed_lsn = log_sys.get_lsn(); + goto file_checked; + } + + srv_log_file_found = log_file_found; + + log_sys.log.open_file(get_log_file_path()); + + log_sys.log.create(); + + if (!log_set_capacity(srv_log_file_size_requested)) { + return(srv_init_abort(DB_ERROR)); + } + } + +file_checked: + /* Open log file and data files in the systemtablespace: we keep + them open until database shutdown */ + ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug); + + err = fil_system.sys_space->open(create_new_db) + ? srv_undo_tablespaces_init(create_new_db) + : DB_ERROR; + + /* If the force recovery is set very high then we carry on regardless + of all errors. Basically this is fingers crossed mode. */ + + if (err != DB_SUCCESS + && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { + + return(srv_init_abort(err)); + } + + /* Initialize objects used by dict stats gathering thread, which + can also be used by recovery if it tries to drop some table */ + if (!srv_read_only_mode) { + dict_stats_init(); + } + + trx_sys.create(); + + if (create_new_db) { + ut_ad(!srv_read_only_mode); + + mtr_start(&mtr); + ut_ad(fil_system.sys_space->id == 0); + compile_time_assert(TRX_SYS_SPACE == 0); + compile_time_assert(IBUF_SPACE_ID == 0); + fsp_header_init(fil_system.sys_space, + uint32_t(sum_of_new_sizes), &mtr); + + ulint ibuf_root = btr_create( + DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space, + DICT_IBUF_ID_MIN, nullptr, &mtr); + + mtr_commit(&mtr); + + if (ibuf_root == FIL_NULL) { + return(srv_init_abort(DB_ERROR)); + } + + ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO); + + /* To maintain backward compatibility we create only + the first rollback segment before the double write buffer. + All the remaining rollback segments will be created later, + after the double write buffer has been created. */ + trx_sys_create_sys_pages(); + err = trx_lists_init_at_db_start(); + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + err = dict_create(); + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + buf_flush_sync(); + + flushed_lsn = log_sys.get_lsn(); + + err = fil_write_flushed_lsn(flushed_lsn); + + if (err == DB_SUCCESS) { + err = create_log_file_rename(flushed_lsn, logfile0); + } + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + } else { + /* Suppress warnings in fil_space_t::create() for files + that are being read before dict_boot() has recovered + DICT_HDR_MAX_SPACE_ID. */ + fil_system.space_id_reuse_warned = true; + + /* We always try to do a recovery, even if the database had + been shut down normally: this is the normal startup path */ + + err = create_new_log + ? DB_SUCCESS + : recv_recovery_from_checkpoint_start(flushed_lsn); + recv_sys.close_files(); + + recv_sys.dblwr.pages.clear(); + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + switch (srv_operation) { + case SRV_OPERATION_NORMAL: + case SRV_OPERATION_RESTORE_EXPORT: + /* Initialize the change buffer. */ + err = dict_boot(); + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + /* fall through */ + case SRV_OPERATION_RESTORE: + /* This must precede + recv_apply_hashed_log_recs(true). */ + srv_undo_tablespaces_active + = trx_rseg_get_n_undo_tablespaces(); + err = srv_validate_undo_tablespaces(); + if (err != DB_SUCCESS) { + return srv_init_abort(err); + } + if (srv_operation == SRV_OPERATION_RESTORE) { + break; + } + err = trx_lists_init_at_db_start(); + if (err != DB_SUCCESS) { + return srv_init_abort(err); + } + break; + case SRV_OPERATION_RESTORE_DELTA: + case SRV_OPERATION_BACKUP: + ut_ad("wrong mariabackup mode" == 0); + } + + if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { + /* Apply the hashed log records to the + respective file pages, for the last batch of + recv_group_scan_log_recs(). */ + + recv_sys.apply(true); + + if (recv_sys.found_corrupt_log + || recv_sys.found_corrupt_fs) { + return(srv_init_abort(DB_CORRUPTION)); + } + + DBUG_PRINT("ib_log", ("apply completed")); + + if (recv_needed_recovery) { + trx_sys_print_mysql_binlog_offset(); + } + } + + fil_system.space_id_reuse_warned = false; + + if (!srv_read_only_mode) { + const ulint flags = FSP_FLAGS_PAGE_SSIZE(); + for (ulint id = 0; id <= srv_undo_tablespaces; id++) { + if (fil_space_t* space = fil_space_get(id)) { + fsp_flags_try_adjust(space, flags); + } + } + + if (sum_of_new_sizes > 0) { + /* New data file(s) were added */ + mtr.start(); + mtr.x_lock_space(fil_system.sys_space, + __FILE__, __LINE__); + buf_block_t* block = buf_page_get( + page_id_t(0, 0), 0, + RW_SX_LATCH, &mtr); + ulint size = mach_read_from_4( + FSP_HEADER_OFFSET + FSP_SIZE + + block->frame); + ut_ad(size == fil_system.sys_space + ->size_in_header); + size += sum_of_new_sizes; + mtr.write<4>(*block, + FSP_HEADER_OFFSET + FSP_SIZE + + block->frame, size); + fil_system.sys_space->size_in_header + = uint32_t(size); + mtr.commit(); + /* Immediately write the log record about + increased tablespace size to disk, so that it + is durable even if mysqld would crash + quickly */ + log_buffer_flush_to_disk(); + } + } + +#ifdef UNIV_DEBUG + { + mtr.start(); + buf_block_t* block = buf_page_get(page_id_t(0, 0), 0, + RW_S_LATCH, &mtr); + ut_ad(mach_read_from_4(FSP_SIZE + FSP_HEADER_OFFSET + + block->frame) + == fil_system.sys_space->size_in_header); + mtr.commit(); + } +#endif + const ulint tablespace_size_in_header + = fil_system.sys_space->size_in_header; + const ulint sum_of_data_file_sizes + = srv_sys_space.get_sum_of_sizes(); + /* Compare the system tablespace file size to what is + stored in FSP_SIZE. In srv_sys_space.open_or_create() + we already checked that the file sizes match the + innodb_data_file_path specification. */ + if (srv_read_only_mode + || sum_of_data_file_sizes == tablespace_size_in_header) { + /* Do not complain about the size. */ + } else if (!srv_sys_space.can_auto_extend_last_file() + || sum_of_data_file_sizes + < tablespace_size_in_header) { + ib::error() << "Tablespace size stored in header is " + << tablespace_size_in_header + << " pages, but the sum of data file sizes is " + << sum_of_data_file_sizes << " pages"; + + if (srv_force_recovery == 0 + && sum_of_data_file_sizes + < tablespace_size_in_header) { + ib::error() << + "Cannot start InnoDB. The tail of" + " the system tablespace is" + " missing. Have you edited" + " innodb_data_file_path in my.cnf" + " in an inappropriate way, removing" + " data files from there?" + " You can set innodb_force_recovery=1" + " in my.cnf to force" + " a startup if you are trying to" + " recover a badly corrupt database."; + + return(srv_init_abort(DB_ERROR)); + } + } + + recv_sys.debug_free(); + + if (srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT) { + /* After applying the redo log from + SRV_OPERATION_BACKUP, flush the changes + to the data files and truncate or delete the log. + Unless --export is specified, no further change to + InnoDB files is needed. */ + ut_ad(srv_force_recovery <= SRV_FORCE_IGNORE_CORRUPT); + ut_ad(recv_no_log_write); + err = fil_write_flushed_lsn(log_sys.get_lsn()); + DBUG_ASSERT(!buf_pool.any_io_pending()); + log_sys.log.close_file(); + if (err == DB_SUCCESS) { + bool trunc = srv_operation + == SRV_OPERATION_RESTORE; + if (!trunc) { + delete_log_file("0"); + } else { + auto logfile0 = get_log_file_path(); + /* Truncate the first log file. */ + fclose(fopen(logfile0.c_str(), "w")); + } + } + return(err); + } + + /* Upgrade or resize or rebuild the redo logs before + generating any dirty pages, so that the old redo log + file will not be written to. */ + + if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { + /* Completely ignore the redo log. */ + } else if (srv_read_only_mode) { + /* Leave the redo log alone. */ + } else if (srv_log_file_size_requested == srv_log_file_size + && srv_log_file_found + && log_sys.log.format + == (srv_encrypt_log + ? log_t::FORMAT_ENC_10_5 + : log_t::FORMAT_10_5) + && log_sys.log.subformat == 2) { + /* No need to add or remove encryption, + upgrade, downgrade, or resize. */ + } else { + /* Prepare to delete the old redo log file */ + flushed_lsn = srv_prepare_to_delete_redo_log_file( + srv_log_file_found); + + DBUG_EXECUTE_IF("innodb_log_abort_1", + return(srv_init_abort(DB_ERROR));); + /* Prohibit redo log writes from any other + threads until creating a log checkpoint at the + end of create_log_file(). */ + ut_d(recv_no_log_write = true); + DBUG_ASSERT(!buf_pool.any_io_pending()); + + DBUG_EXECUTE_IF("innodb_log_abort_3", + return(srv_init_abort(DB_ERROR));); + DBUG_PRINT("ib_log", ("After innodb_log_abort_3")); + + /* Stamp the LSN to the data files. */ + err = fil_write_flushed_lsn(flushed_lsn); + + DBUG_EXECUTE_IF("innodb_log_abort_4", err = DB_ERROR;); + DBUG_PRINT("ib_log", ("After innodb_log_abort_4")); + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + /* Close the redo log file, so that we can replace it */ + log_sys.log.close_file(); + + DBUG_EXECUTE_IF("innodb_log_abort_5", + return(srv_init_abort(DB_ERROR));); + DBUG_PRINT("ib_log", ("After innodb_log_abort_5")); + + ib::info() + << "Starting to delete and rewrite log file."; + + srv_log_file_size = srv_log_file_size_requested; + + err = create_log_file(false, flushed_lsn, logfile0); + + if (err == DB_SUCCESS) { + err = create_log_file_rename(flushed_lsn, + logfile0); + } + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + } + } + + ut_ad(err == DB_SUCCESS); + ut_a(sum_of_new_sizes != ULINT_UNDEFINED); + + /* Create the doublewrite buffer to a new tablespace */ + if (!srv_read_only_mode && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO + && !buf_dblwr.create()) { + return(srv_init_abort(DB_ERROR)); + } + + /* Here the double write buffer has already been created and so + any new rollback segments will be allocated after the double + write buffer. The default segment should already exist. + We create the new segments only if it's a new database or + the database was shutdown cleanly. */ + + /* Note: When creating the extra rollback segments during an upgrade + we violate the latching order, even if the change buffer is empty. + We make an exception in sync0sync.cc and check srv_is_being_started + for that violation. It cannot create a deadlock because we are still + running in single threaded mode essentially. Only the IO threads + should be running at this stage. */ + + if (!trx_sys_create_rsegs()) { + return(srv_init_abort(DB_ERROR)); + } + + if (!create_new_db) { + ut_ad(high_level_read_only + || srv_force_recovery <= SRV_FORCE_NO_IBUF_MERGE); + + /* Validate a few system page types that were left + uninitialized before MySQL or MariaDB 5.5. */ + if (!high_level_read_only + && !fil_system.sys_space->full_crc32()) { + buf_block_t* block; + mtr.start(); + /* Bitmap page types will be reset in + buf_dblwr_check_block() without redo logging. */ + block = buf_page_get( + page_id_t(IBUF_SPACE_ID, + FSP_IBUF_HEADER_PAGE_NO), + 0, RW_X_LATCH, &mtr); + fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr); + /* Already MySQL 3.23.53 initialized + FSP_IBUF_TREE_ROOT_PAGE_NO to + FIL_PAGE_INDEX. No need to reset that one. */ + block = buf_page_get( + page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), + 0, RW_X_LATCH, &mtr); + fil_block_check_type(*block, FIL_PAGE_TYPE_TRX_SYS, + &mtr); + block = buf_page_get( + page_id_t(TRX_SYS_SPACE, + FSP_FIRST_RSEG_PAGE_NO), + 0, RW_X_LATCH, &mtr); + fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr); + block = buf_page_get( + page_id_t(TRX_SYS_SPACE, FSP_DICT_HDR_PAGE_NO), + 0, RW_X_LATCH, &mtr); + fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr); + mtr.commit(); + } + + /* Roll back any recovered data dictionary + transactions, so that the data dictionary tables will + be free of any locks. The data dictionary latch + should guarantee that there is at most one data + dictionary transaction active at a time. */ + if (!high_level_read_only + && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { + /* If the following call is ever removed, the + first-time ha_innobase::open() must hold (or + acquire and release) a table lock that + conflicts with trx_resurrect_table_locks(), to + ensure that any recovered incomplete ALTER + TABLE will have been rolled back. Otherwise, + dict_table_t::instant could be cleared by + rollback invoking + dict_index_t::clear_instant_alter() while open + table handles exist in client connections. */ + trx_rollback_recovered(false); + } + + /* FIXME: Skip the following if srv_read_only_mode, + while avoiding "Allocated tablespace ID" warnings. */ + if (srv_force_recovery <= SRV_FORCE_NO_IBUF_MERGE) { + /* Open or Create SYS_TABLESPACES and SYS_DATAFILES + so that tablespace names and other metadata can be + found. */ + err = dict_create_or_check_sys_tablespace(); + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + /* The following call is necessary for the insert + buffer to work with multiple tablespaces. We must + know the mapping between space id's and .ibd file + names. + + In a crash recovery, we check that the info in data + dictionary is consistent with what we already know + about space id's from the calls to fil_ibd_load(). + + In a normal startup, we create the space objects for + every table in the InnoDB data dictionary that has + an .ibd file. + + We also determine the maximum tablespace id used. */ + dict_check_tablespaces_and_store_max_id(); + } + + if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO + && !srv_read_only_mode) { + /* Drop partially created indexes. */ + row_merge_drop_temp_indexes(); + /* Drop garbage tables. */ + row_mysql_drop_garbage_tables(); + + /* Drop any auxiliary tables that were not + dropped when the parent table was + dropped. This can happen if the parent table + was dropped but the server crashed before the + auxiliary tables were dropped. */ + fts_drop_orphaned_tables(); + + /* Rollback incomplete non-DDL transactions */ + trx_rollback_is_active = true; + os_thread_create(trx_rollback_all_recovered); + } + } + + srv_startup_is_before_trx_rollback_phase = false; + + if (!srv_read_only_mode) { + /* timer task which watches the timeouts + for lock waits */ + lock_sys.timeout_timer.reset(srv_thread_pool->create_timer( + lock_wait_timeout_task)); + + DBUG_EXECUTE_IF("innodb_skip_monitors", goto skip_monitors;); + /* Create the task which warns of long semaphore waits */ + srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task, + SRV_MONITOR_INTERVAL); + +#ifndef DBUG_OFF +skip_monitors: +#endif + ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN + || !purge_sys.enabled()); + + if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { + srv_undo_sources = true; + /* Create the dict stats gathering task */ + dict_stats_start(); + /* Create the thread that will optimize the + FULLTEXT search index subsystem. */ + fts_optimize_init(); + } + } + + /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */ + err = dict_create_or_check_foreign_constraint_tables(); + if (err == DB_SUCCESS) { + err = dict_create_or_check_sys_tablespace(); + if (err == DB_SUCCESS) { + err = dict_create_or_check_sys_virtual(); + } + } + switch (err) { + case DB_SUCCESS: + break; + case DB_READ_ONLY: + if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) { + break; + } + ib::error() << "Cannot create system tables in read-only mode"; + /* fall through */ + default: + return(srv_init_abort(err)); + } + + if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL) { + /* Initialize the innodb_temporary tablespace and keep + it open until shutdown. */ + err = srv_open_tmp_tablespace(create_new_db); + + if (err != DB_SUCCESS) { + return(srv_init_abort(err)); + } + + trx_temp_rseg_create(); + + if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { + srv_start_periodic_timer(srv_master_timer, srv_master_callback, 1000); + } + } + + if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL + && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { + srv_init_purge_tasks(); + purge_sys.coordinator_startup(); + srv_wake_purge_thread_if_not_active(); + } + + srv_is_being_started = false; + + if (srv_print_verbose_log) { + ib::info() << INNODB_VERSION_STR + << " started; log sequence number " + << recv_sys.recovered_lsn + << "; transaction id " << trx_sys.get_max_trx_id(); + } + + if (srv_force_recovery == 0) { + /* In the change buffer we may have even bigger tablespace + id's, because we may have dropped those tablespaces, but + the buffered records have not been cleaned yet. */ + ibuf_update_max_tablespace_id(); + } + + if (!srv_read_only_mode) { + if (create_new_db) { + srv_buffer_pool_load_at_startup = FALSE; + } + +#ifdef WITH_WSREP + /* + Create the dump/load thread only when not running with + --wsrep-recover. + */ + if (!get_wsrep_recovery()) { +#endif /* WITH_WSREP */ + + /* Start buffer pool dump/load task */ + buf_load_at_startup(); + +#ifdef WITH_WSREP + } else { + ib::warn() << + "Skipping buffer pool dump/restore during " + "wsrep recovery."; + } +#endif /* WITH_WSREP */ + + /* Create thread(s) that handles key rotation. This is + needed already here as log_preflush_pool_modified_pages + will flush dirty pages and that might need e.g. + fil_crypt_threads_event. */ + fil_system_enter(); + fil_crypt_threads_init(); + fil_system_exit(); + + /* Initialize online defragmentation. */ + btr_defragment_init(); + + srv_started_redo = true; + } + + return(DB_SUCCESS); +} + +/** Shut down background threads that can generate undo log. */ +void srv_shutdown_bg_undo_sources() +{ + srv_shutdown_state = SRV_SHUTDOWN_INITIATED; + + if (srv_undo_sources) { + ut_ad(!srv_read_only_mode); + fts_optimize_shutdown(); + dict_stats_shutdown(); + while (row_get_background_drop_list_len_low()) { + srv_inc_activity_count(); + os_thread_yield(); + } + srv_undo_sources = false; + } +} + +/** + Shutdown purge to make sure that there is no possibility that we call any + plugin code (e.g., audit) inside virtual column computation. +*/ +void innodb_preshutdown() +{ + static bool first_time= true; + if (!first_time) + return; + first_time= false; + + if (srv_read_only_mode) + return; + if (!srv_fast_shutdown && srv_operation == SRV_OPERATION_NORMAL) + { + /* Because a slow shutdown must empty the change buffer, we had + better prevent any further changes from being buffered. */ + innodb_change_buffering= 0; + + if (trx_sys.is_initialised()) + while (trx_sys.any_active_transactions()) + os_thread_sleep(1000); + } + srv_shutdown_bg_undo_sources(); + srv_purge_shutdown(); + + if (srv_n_fil_crypt_threads) + fil_crypt_set_thread_cnt(0); +} + + +/** Shut down InnoDB. */ +void innodb_shutdown() +{ + innodb_preshutdown(); + ut_ad(!srv_undo_sources); + switch (srv_operation) { + case SRV_OPERATION_BACKUP: + case SRV_OPERATION_RESTORE_DELTA: + break; + case SRV_OPERATION_RESTORE: + case SRV_OPERATION_RESTORE_EXPORT: + srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; + if (!buf_page_cleaner_is_active) { + break; + } + mysql_mutex_lock(&buf_pool.flush_list_mutex); + while (buf_page_cleaner_is_active) { + pthread_cond_signal(&buf_pool.do_flush_list); + my_cond_wait(&buf_pool.done_flush_list, + &buf_pool.flush_list_mutex.m_mutex); + } + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + break; + case SRV_OPERATION_NORMAL: + /* Shut down the persistent files. */ + logs_empty_and_mark_files_at_shutdown(); + } + + os_aio_free(); + fil_space_t::close_all(); + /* Exit any remaining threads. */ + ut_ad(!buf_page_cleaner_is_active); + srv_shutdown_threads(); + + if (srv_monitor_file) { + my_fclose(srv_monitor_file, MYF(MY_WME)); + srv_monitor_file = 0; + if (srv_monitor_file_name) { + unlink(srv_monitor_file_name); + ut_free(srv_monitor_file_name); + } + } + + if (srv_misc_tmpfile) { + my_fclose(srv_misc_tmpfile, MYF(MY_WME)); + srv_misc_tmpfile = 0; + } + + ut_ad(dict_sys.is_initialised() || !srv_was_started); + ut_ad(trx_sys.is_initialised() || !srv_was_started); + ut_ad(buf_dblwr.is_initialised() || !srv_was_started + || srv_read_only_mode + || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); + ut_ad(lock_sys.is_initialised() || !srv_was_started); + ut_ad(log_sys.is_initialised() || !srv_was_started); + ut_ad(ibuf.index || !srv_was_started); + + dict_stats_deinit(); + + if (srv_started_redo) { + ut_ad(!srv_read_only_mode); + /* srv_shutdown_bg_undo_sources() already invoked + fts_optimize_shutdown(); dict_stats_shutdown(); */ + + fil_crypt_threads_cleanup(); + btr_defragment_shutdown(); + } + + /* This must be disabled before closing the buffer pool + and closing the data dictionary. */ + +#ifdef BTR_CUR_HASH_ADAPT + if (dict_sys.is_initialised()) { + btr_search_disable(); + } +#endif /* BTR_CUR_HASH_ADAPT */ + ibuf_close(); + log_sys.close(); + purge_sys.close(); + trx_sys.close(); + buf_dblwr.close(); + lock_sys.close(); + trx_pool_close(); + + if (!srv_read_only_mode) { + mutex_free(&srv_monitor_file_mutex); + mutex_free(&srv_misc_tmpfile_mutex); + } + + dict_sys.close(); + btr_search_sys_free(); + row_mysql_close(); + srv_free(); + fil_system.close(); + pars_lexer_close(); + recv_sys.close(); + + ut_ad(buf_pool.is_initialised() || !srv_was_started); + buf_pool.close(); + sync_check_close(); + + srv_sys_space.shutdown(); + if (srv_tmp_space.get_sanity_check_status()) { + if (fil_system.temp_space) { + fil_system.temp_space->close(); + } + srv_tmp_space.delete_files(); + } + srv_tmp_space.shutdown(); + +#ifdef WITH_INNODB_DISALLOW_WRITES + os_event_destroy(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ + + if (srv_was_started && srv_print_verbose_log) { + ib::info() << "Shutdown completed; log sequence number " + << srv_shutdown_lsn + << "; transaction id " << trx_sys.get_max_trx_id(); + } + srv_thread_pool_end(); + srv_started_redo = false; + srv_was_started = false; + srv_start_has_been_called = false; +} + +/** Get the meta-data filename from the table name for a +single-table tablespace. +@param[in] table table object +@param[out] filename filename +@param[in] max_len filename max length */ +void +srv_get_meta_data_filename( + dict_table_t* table, + char* filename, + ulint max_len) +{ + ulint len; + char* path; + + /* Make sure the data_dir_path is set. */ + dict_get_and_save_data_dir_path(table, false); + + if (DICT_TF_HAS_DATA_DIR(table->flags)) { + ut_a(table->data_dir_path); + + path = fil_make_filepath( + table->data_dir_path, table->name.m_name, CFG, true); + } else { + path = fil_make_filepath(NULL, table->name.m_name, CFG, false); + } + + ut_a(path); + len = strlen(path); + ut_a(max_len >= len); + + strcpy(filename, path); + + ut_free(path); +} |