summaryrefslogtreecommitdiffstats
path: root/storage/innobase/buf/buf0lru.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/buf/buf0lru.cc')
-rw-r--r--storage/innobase/buf/buf0lru.cc1477
1 files changed, 1477 insertions, 0 deletions
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
new file mode 100644
index 00000000..b282eb17
--- /dev/null
+++ b/storage/innobase/buf/buf0lru.cc
@@ -0,0 +1,1477 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0lru.cc
+The database buffer replacement algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+#include "sync0rw.h"
+#include "fil0fil.h"
+#include "btr0btr.h"
+#include "buf0buddy.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "btr0sea.h"
+#include "os0file.h"
+#include "page0zip.h"
+#include "log0recv.h"
+#include "srv0srv.h"
+#include "srv0mon.h"
+
+/** Flush this many pages in buf_LRU_get_free_block() */
+size_t innodb_lru_flush_size;
+
+/** The number of blocks from the LRU_old pointer onward, including
+the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+of the whole LRU list length, except that the tolerance defined below
+is allowed. Note that the tolerance must be small enough such that for
+even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
+allowed to point to either end of the LRU list. */
+
+static constexpr ulint BUF_LRU_OLD_TOLERANCE = 20;
+
+/** The minimum amount of non-old blocks when the LRU_old list exists
+(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
+@see buf_LRU_old_adjust_len */
+#define BUF_LRU_NON_OLD_MIN_LEN 5
+
+/** If we switch on the InnoDB monitor because there are too few available
+frames in the buffer pool, we set this to TRUE */
+static bool buf_lru_switched_on_innodb_mon = false;
+
+/** True if diagnostic message about difficult to find free blocks
+in the buffer bool has already printed. */
+static bool buf_lru_free_blocks_error_printed;
+
+/******************************************************************//**
+These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
+and page_zip_decompress() operations. Based on the statistics,
+buf_LRU_evict_from_unzip_LRU() decides if we want to evict from
+unzip_LRU or the regular LRU. From unzip_LRU, we will only evict the
+uncompressed frame (meaning we can evict dirty blocks as well). From
+the regular LRU, we will evict the entire block (i.e.: both the
+uncompressed and compressed data), which must be clean. */
+
+/* @{ */
+
+/** Number of intervals for which we keep the history of these stats.
+Updated at SRV_MONITOR_INTERVAL (the buf_LRU_stat_update() call rate). */
+static constexpr ulint BUF_LRU_STAT_N_INTERVAL= 4;
+
+/** Co-efficient with which we multiply I/O operations to equate them
+with page_zip_decompress() operations. */
+static constexpr ulint BUF_LRU_IO_TO_UNZIP_FACTOR= 50;
+
+/** Sampled values buf_LRU_stat_cur.
+Not protected by any mutex. Updated by buf_LRU_stat_update(). */
+static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
+
+/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */
+static ulint buf_LRU_stat_arr_ind;
+
+/** Current operation counters. Not protected by any mutex. Cleared
+by buf_LRU_stat_update(). */
+buf_LRU_stat_t buf_LRU_stat_cur;
+
+/** Running sum of past values of buf_LRU_stat_cur.
+Updated by buf_LRU_stat_update(). Not Protected by any mutex. */
+buf_LRU_stat_t buf_LRU_stat_sum;
+
+/* @} */
+
+/** @name Heuristics for detecting index scan @{ */
+/** Move blocks to "new" LRU list only if the first access was at
+least this many milliseconds ago. Not protected by any mutex or latch. */
+uint buf_LRU_old_threshold_ms;
+/* @} */
+
+/** Remove bpage from buf_pool.LRU and buf_pool.page_hash.
+
+If bpage->state() == BUF_BLOCK_ZIP_PAGE && bpage->oldest_modification() <= 1,
+the object will be freed.
+
+@param bpage buffer block
+@param id page identifier
+@param hash_lock buf_pool.page_hash latch (will be released here)
+@param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed
+
+If a compressed page is freed other compressed pages may be relocated.
+@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
+caller needs to free the page to the free list
+@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
+this case the block is already returned to the buddy allocator. */
+static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
+ page_hash_latch *hash_lock, bool zip);
+
+/** Free a block to buf_pool */
+static void buf_LRU_block_free_hashed_page(buf_block_t *block)
+{
+ block->page.free_file_page();
+ buf_LRU_block_free_non_file_page(block);
+}
+
+/** Increase LRU size in bytes by the page size.
+@param[in] bpage control block */
+static inline void incr_LRU_size_in_bytes(const buf_page_t* bpage)
+{
+ /* FIXME: use atomics, not mutex */
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ buf_pool.stat.LRU_bytes += bpage->physical_size();
+
+ ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size);
+}
+
+/** @return whether the unzip_LRU list should be used for evicting a victim
+instead of the general LRU list */
+bool buf_LRU_evict_from_unzip_LRU()
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ /* If the unzip_LRU list is empty, we can only use the LRU. */
+ if (UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0) {
+ return false;
+ }
+
+ /* If unzip_LRU is at most 10% of the size of the LRU list,
+ then use the LRU. This slack allows us to keep hot
+ decompressed pages in the buffer pool. */
+ if (UT_LIST_GET_LEN(buf_pool.unzip_LRU)
+ <= UT_LIST_GET_LEN(buf_pool.LRU) / 10) {
+ return false;
+ }
+
+ /* If eviction hasn't started yet, we assume by default
+ that a workload is disk bound. */
+ if (buf_pool.freed_page_clock == 0) {
+ return true;
+ }
+
+ /* Calculate the average over past intervals, and add the values
+ of the current interval. */
+ ulint io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+ + buf_LRU_stat_cur.io;
+
+ ulint unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+ + buf_LRU_stat_cur.unzip;
+
+ /* Decide based on our formula. If the load is I/O bound
+ (unzip_avg is smaller than the weighted io_avg), evict an
+ uncompressed frame from unzip_LRU. Otherwise we assume that
+ the load is CPU bound and evict from the regular LRU. */
+ return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
+}
+
+/** Try to free an uncompressed page of a compressed block from the unzip
+LRU list. The compressed page is preserved, and it need not be clean.
+@param limit maximum number of blocks to scan
+@return true if freed */
+static bool buf_LRU_free_from_unzip_LRU_list(ulint limit)
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ if (!buf_LRU_evict_from_unzip_LRU()) {
+ return(false);
+ }
+
+ ulint scanned = 0;
+ bool freed = false;
+
+ for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool.unzip_LRU);
+ block && scanned < limit; ++scanned) {
+ buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
+
+ ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->in_unzip_LRU_list);
+ ut_ad(block->page.in_LRU_list);
+
+ freed = buf_LRU_free_page(&block->page, false);
+ if (freed) {
+ break;
+ }
+
+ block = prev_block;
+ }
+
+ if (scanned) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
+ scanned);
+ }
+
+ return(freed);
+}
+
+/** Try to free a clean page from the common LRU list.
+@param limit maximum number of blocks to scan
+@return whether a page was freed */
+static bool buf_LRU_free_from_common_LRU_list(ulint limit)
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ ulint scanned = 0;
+ bool freed = false;
+
+ for (buf_page_t* bpage = buf_pool.lru_scan_itr.start();
+ bpage && scanned < limit;
+ ++scanned, bpage = buf_pool.lru_scan_itr.get()) {
+ buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+ buf_pool.lru_scan_itr.set(prev);
+
+ const auto accessed = bpage->is_accessed();
+
+ if (buf_LRU_free_page(bpage, true)) {
+ if (!accessed) {
+ /* Keep track of pages that are evicted without
+ ever being accessed. This gives us a measure of
+ the effectiveness of readahead */
+ ++buf_pool.stat.n_ra_pages_evicted;
+ }
+
+ freed = true;
+ break;
+ }
+ }
+
+ if (scanned) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_LRU_SEARCH_SCANNED,
+ MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_SEARCH_SCANNED_PER_CALL,
+ scanned);
+ }
+
+ return(freed);
+}
+
+/** Try to free a replaceable block.
+@param limit maximum number of blocks to scan
+@return true if found and freed */
+bool buf_LRU_scan_and_free_block(ulint limit)
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ return buf_LRU_free_from_unzip_LRU_list(limit) ||
+ buf_LRU_free_from_common_LRU_list(limit);
+}
+
+/** @return a buffer block from the buf_pool.free list
+@retval NULL if the free list is empty */
+buf_block_t* buf_LRU_get_free_only()
+{
+ buf_block_t* block;
+
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ block = reinterpret_cast<buf_block_t*>(
+ UT_LIST_GET_FIRST(buf_pool.free));
+
+ while (block != NULL) {
+ ut_ad(block->page.in_free_list);
+ ut_d(block->page.in_free_list = FALSE);
+ ut_ad(!block->page.oldest_modification());
+ ut_ad(!block->page.in_LRU_list);
+ ut_a(!block->page.in_file());
+ UT_LIST_REMOVE(buf_pool.free, &block->page);
+
+ if (buf_pool.curr_size >= buf_pool.old_size
+ || UT_LIST_GET_LEN(buf_pool.withdraw)
+ >= buf_pool.withdraw_target
+ || !buf_pool.will_be_withdrawn(block->page)) {
+ /* No adaptive hash index entries may point to
+ a free block. */
+ assert_block_ahi_empty(block);
+
+ block->page.set_state(BUF_BLOCK_MEMORY);
+ MEM_MAKE_ADDRESSABLE(block->frame, srv_page_size);
+ break;
+ }
+
+ /* This should be withdrawn */
+ UT_LIST_ADD_LAST(
+ buf_pool.withdraw,
+ &block->page);
+ ut_d(block->in_withdraw_list = true);
+
+ block = reinterpret_cast<buf_block_t*>(
+ UT_LIST_GET_FIRST(buf_pool.free));
+ }
+
+ return(block);
+}
+
+/******************************************************************//**
+Checks how much of buf_pool is occupied by non-data objects like
+AHI, lock heaps etc. Depending on the size of non-data objects this
+function will either assert or issue a warning and switch on the
+status monitor. */
+static void buf_LRU_check_size_of_non_data_objects()
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ if (recv_recovery_is_on() || buf_pool.curr_size != buf_pool.old_size)
+ return;
+
+ const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU);
+
+ if (s < buf_pool.curr_size / 20)
+ ib::fatal() << "Over 95 percent of the buffer pool is"
+ " occupied by lock heaps"
+#ifdef BTR_CUR_HASH_ADAPT
+ " or the adaptive hash index"
+#endif /* BTR_CUR_HASH_ADAPT */
+ "! Check that your transactions do not set too many"
+ " row locks, or review if innodb_buffer_pool_size="
+ << (buf_pool.curr_size >> (20U - srv_page_size_shift))
+ << "M could be bigger.";
+
+ if (s < buf_pool.curr_size / 3)
+ {
+ if (!buf_lru_switched_on_innodb_mon && srv_monitor_timer)
+ {
+ /* Over 67 % of the buffer pool is occupied by lock heaps or
+ the adaptive hash index. This may be a memory leak! */
+ ib::warn() << "Over 67 percent of the buffer pool is"
+ " occupied by lock heaps"
+#ifdef BTR_CUR_HASH_ADAPT
+ " or the adaptive hash index"
+#endif /* BTR_CUR_HASH_ADAPT */
+ "! Check that your transactions do not set too many row locks."
+ " innodb_buffer_pool_size="
+ << (buf_pool.curr_size >> (20U - srv_page_size_shift))
+ << "M. Starting the InnoDB Monitor to print diagnostics.";
+ buf_lru_switched_on_innodb_mon= true;
+ srv_print_innodb_monitor= TRUE;
+ srv_monitor_timer_schedule_now();
+ }
+ }
+ else if (buf_lru_switched_on_innodb_mon)
+ {
+ /* Switch off the InnoDB Monitor; this is a simple way to stop the
+ monitor if the situation becomes less urgent, but may also
+ surprise users who did SET GLOBAL innodb_status_output=ON earlier! */
+ buf_lru_switched_on_innodb_mon= false;
+ srv_print_innodb_monitor= FALSE;
+ }
+}
+
+/** Get a block from the buf_pool.free list.
+If the list is empty, blocks will be moved from the end of buf_pool.LRU
+to buf_pool.free.
+
+This function is called from a user thread when it needs a clean
+block to read in a page. Note that we only ever get a block from
+the free list. Even when we flush a page or find a page in LRU scan
+we put it to free list to be used.
+* iteration 0:
+ * get a block from the buf_pool.free list, success:done
+ * if buf_pool.try_LRU_scan is set
+ * scan LRU up to 100 pages to free a clean block
+ * success:retry the free list
+ * flush up to innodb_lru_flush_size LRU blocks to data files
+ (until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth)
+ * on buf_page_write_complete() the blocks will put on buf_pool.free list
+ * success: retry the free list
+* subsequent iterations: same as iteration 0 except:
+ * scan whole LRU list
+ * scan LRU list even if buf_pool.try_LRU_scan is not set
+
+@param have_mutex whether buf_pool.mutex is already being held
+@return the free control block, in state BUF_BLOCK_MEMORY */
+buf_block_t *buf_LRU_get_free_block(bool have_mutex)
+{
+ ulint n_iterations = 0;
+ ulint flush_failures = 0;
+ MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
+ if (have_mutex) {
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ goto got_mutex;
+ }
+ mysql_mutex_lock(&buf_pool.mutex);
+got_mutex:
+ buf_LRU_check_size_of_non_data_objects();
+ buf_block_t* block;
+
+ DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
+ if (!buf_lru_free_blocks_error_printed) {
+ n_iterations = 21;
+ goto not_found;});
+
+retry:
+ /* If there is a block in the free list, take it */
+ if ((block = buf_LRU_get_free_only()) != nullptr) {
+got_block:
+ if (!have_mutex) {
+ mysql_mutex_unlock(&buf_pool.mutex);
+ }
+ memset(&block->page.zip, 0, sizeof block->page.zip);
+ return block;
+ }
+
+ MONITOR_INC( MONITOR_LRU_GET_FREE_LOOPS );
+ if (n_iterations || buf_pool.try_LRU_scan) {
+ /* If no block was in the free list, search from the
+ end of the LRU list and try to free a block there.
+ If we are doing for the first time we'll scan only
+ tail of the LRU list otherwise we scan the whole LRU
+ list. */
+ if (buf_LRU_scan_and_free_block(n_iterations
+ ? ULINT_UNDEFINED : 100)) {
+ goto retry;
+ }
+
+ /* Tell other threads that there is no point
+ in scanning the LRU list. */
+ buf_pool.try_LRU_scan = false;
+ }
+
+ for (;;) {
+ if ((block = buf_LRU_get_free_only()) != nullptr) {
+ goto got_block;
+ }
+ if (!buf_pool.n_flush_LRU_) {
+ break;
+ }
+ my_cond_wait(&buf_pool.done_free, &buf_pool.mutex.m_mutex);
+ }
+
+#ifndef DBUG_OFF
+not_found:
+#endif
+ mysql_mutex_unlock(&buf_pool.mutex);
+
+ if (n_iterations > 20 && !buf_lru_free_blocks_error_printed
+ && srv_buf_pool_old_size == srv_buf_pool_size) {
+
+ ib::warn() << "Difficult to find free blocks in the buffer pool"
+ " (" << n_iterations << " search iterations)! "
+ << flush_failures << " failed attempts to"
+ " flush a page!"
+ " Consider increasing innodb_buffer_pool_size."
+ " Pending flushes (fsync) log: "
+ << log_sys.get_pending_flushes()
+ << "; buffer pool: "
+ << fil_n_pending_tablespace_flushes
+ << ". " << os_n_file_reads << " OS file reads, "
+ << os_n_file_writes << " OS file writes, "
+ << os_n_fsyncs
+ << " OS fsyncs.";
+
+ buf_lru_free_blocks_error_printed = true;
+ }
+
+ if (n_iterations > 1) {
+ MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
+ }
+
+ /* No free block was found: try to flush the LRU list.
+ The freed blocks will be up for grabs for all threads.
+
+ TODO: A more elegant way would have been to return one freed
+ up block to the caller here but the code that deals with
+ removing the block from buf_pool.page_hash and buf_pool.LRU is fairly
+ involved (particularly in case of ROW_FORMAT=COMPRESSED pages). We
+ can do that in a separate patch sometime in future. */
+
+ if (!buf_flush_LRU(innodb_lru_flush_size)) {
+ MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
+ ++flush_failures;
+ }
+
+ n_iterations++;
+ mysql_mutex_lock(&buf_pool.mutex);
+ buf_pool.stat.LRU_waits++;
+ goto got_mutex;
+}
+
+/** Move the LRU_old pointer so that the length of the old blocks list
+is inside the allowed limits. */
+static void buf_LRU_old_adjust_len()
+{
+ ulint old_len;
+ ulint new_len;
+
+ ut_a(buf_pool.LRU_old);
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ ut_ad(buf_pool.LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+ ut_ad(buf_pool.LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+ compile_time_assert(BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN
+ > BUF_LRU_OLD_RATIO_DIV
+ * (BUF_LRU_OLD_TOLERANCE + 5));
+ compile_time_assert(BUF_LRU_NON_OLD_MIN_LEN < BUF_LRU_OLD_MIN_LEN);
+
+#ifdef UNIV_LRU_DEBUG
+ /* buf_pool.LRU_old must be the first item in the LRU list
+ whose "old" flag is set. */
+ ut_a(buf_pool.LRU_old->old);
+ ut_a(!UT_LIST_GET_PREV(LRU, buf_pool.LRU_old)
+ || !UT_LIST_GET_PREV(LRU, buf_pool.LRU_old)->old);
+ ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old)
+ || UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old)->old);
+#endif /* UNIV_LRU_DEBUG */
+
+ old_len = buf_pool.LRU_old_len;
+ new_len = ut_min(UT_LIST_GET_LEN(buf_pool.LRU)
+ * buf_pool.LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
+ UT_LIST_GET_LEN(buf_pool.LRU)
+ - (BUF_LRU_OLD_TOLERANCE
+ + BUF_LRU_NON_OLD_MIN_LEN));
+
+ for (;;) {
+ buf_page_t* LRU_old = buf_pool.LRU_old;
+
+ ut_a(LRU_old);
+ ut_ad(LRU_old->in_LRU_list);
+#ifdef UNIV_LRU_DEBUG
+ ut_a(LRU_old->old);
+#endif /* UNIV_LRU_DEBUG */
+
+ /* Update the LRU_old pointer if necessary */
+
+ if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
+
+ buf_pool.LRU_old = LRU_old = UT_LIST_GET_PREV(
+ LRU, LRU_old);
+#ifdef UNIV_LRU_DEBUG
+ ut_a(!LRU_old->old);
+#endif /* UNIV_LRU_DEBUG */
+ old_len = ++buf_pool.LRU_old_len;
+ LRU_old->set_old(true);
+
+ } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
+
+ buf_pool.LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
+ old_len = --buf_pool.LRU_old_len;
+ LRU_old->set_old(false);
+ } else {
+ return;
+ }
+ }
+}
+
+/** Initialize the old blocks pointer in the LRU list. This function should be
+called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
+static void buf_LRU_old_init()
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ ut_a(UT_LIST_GET_LEN(buf_pool.LRU) == BUF_LRU_OLD_MIN_LEN);
+
+ /* We first initialize all blocks in the LRU list as old and then use
+ the adjust function to move the LRU_old pointer to the right
+ position */
+
+ for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool.LRU);
+ bpage != NULL;
+ bpage = UT_LIST_GET_PREV(LRU, bpage)) {
+
+ ut_ad(bpage->in_LRU_list);
+
+ /* This loop temporarily violates the
+ assertions of buf_page_t::set_old(). */
+ bpage->old = true;
+ }
+
+ buf_pool.LRU_old = UT_LIST_GET_FIRST(buf_pool.LRU);
+ buf_pool.LRU_old_len = UT_LIST_GET_LEN(buf_pool.LRU);
+
+ buf_LRU_old_adjust_len();
+}
+
+/** Remove a block from the unzip_LRU list if it belonged to the list.
+@param[in] bpage control block */
+static void buf_unzip_LRU_remove_block_if_needed(buf_page_t* bpage)
+{
+ ut_ad(bpage->in_file());
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+
+ if (bpage->belongs_to_unzip_LRU()) {
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
+
+ ut_ad(block->in_unzip_LRU_list);
+ ut_d(block->in_unzip_LRU_list = false);
+
+ UT_LIST_REMOVE(buf_pool.unzip_LRU, block);
+ }
+}
+
+/** Removes a block from the LRU list.
+@param[in] bpage control block */
+static inline void buf_LRU_remove_block(buf_page_t* bpage)
+{
+ /* Important that we adjust the hazard pointers before removing
+ bpage from the LRU list. */
+ buf_page_t* prev_bpage = buf_pool.LRU_remove(bpage);
+
+ /* If the LRU_old pointer is defined and points to just this block,
+ move it backward one step */
+
+ if (bpage == buf_pool.LRU_old) {
+
+ /* Below: the previous block is guaranteed to exist,
+ because the LRU_old pointer is only allowed to differ
+ by BUF_LRU_OLD_TOLERANCE from strict
+ buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
+ list length. */
+ ut_a(prev_bpage);
+#ifdef UNIV_LRU_DEBUG
+ ut_a(!prev_bpage->old);
+#endif /* UNIV_LRU_DEBUG */
+ buf_pool.LRU_old = prev_bpage;
+ prev_bpage->set_old(true);
+
+ buf_pool.LRU_old_len++;
+ }
+
+ buf_pool.stat.LRU_bytes -= bpage->physical_size();
+
+ buf_unzip_LRU_remove_block_if_needed(bpage);
+
+ /* If the LRU list is so short that LRU_old is not defined,
+ clear the "old" flags and return */
+ if (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN) {
+
+ for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.LRU);
+ bpage != NULL;
+ bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
+
+ /* This loop temporarily violates the
+ assertions of buf_page_t::set_old(). */
+ bpage->old = false;
+ }
+
+ buf_pool.LRU_old = NULL;
+ buf_pool.LRU_old_len = 0;
+
+ return;
+ }
+
+ ut_ad(buf_pool.LRU_old);
+
+ /* Update the LRU_old_len field if necessary */
+ if (bpage->old) {
+ buf_pool.LRU_old_len--;
+ }
+
+ /* Adjust the length of the old block list if necessary */
+ buf_LRU_old_adjust_len();
+}
+
+/******************************************************************//**
+Adds a block to the LRU list of decompressed zip pages. */
+void
+buf_unzip_LRU_add_block(
+/*====================*/
+ buf_block_t* block, /*!< in: control block */
+ ibool old) /*!< in: TRUE if should be put to the end
+ of the list, else put to the start */
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ ut_a(block->page.belongs_to_unzip_LRU());
+ ut_ad(!block->in_unzip_LRU_list);
+ ut_d(block->in_unzip_LRU_list = true);
+
+ if (old) {
+ UT_LIST_ADD_LAST(buf_pool.unzip_LRU, block);
+ } else {
+ UT_LIST_ADD_FIRST(buf_pool.unzip_LRU, block);
+ }
+}
+
+/******************************************************************//**
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
+void
+buf_LRU_add_block(
+ buf_page_t* bpage, /*!< in: control block */
+ bool old) /*!< in: true if should be put to the old blocks
+ in the LRU list, else put to the start; if the
+ LRU list is very short, the block is added to
+ the start, regardless of this parameter */
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ ut_ad(!bpage->in_LRU_list);
+
+ if (!old || (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN)) {
+
+ UT_LIST_ADD_FIRST(buf_pool.LRU, bpage);
+
+ bpage->freed_page_clock = buf_pool.freed_page_clock
+ & ((1U << 31) - 1);
+ } else {
+#ifdef UNIV_LRU_DEBUG
+ /* buf_pool.LRU_old must be the first item in the LRU list
+ whose "old" flag is set. */
+ ut_a(buf_pool.LRU_old->old);
+ ut_a(!UT_LIST_GET_PREV(LRU, buf_pool.LRU_old)
+ || !UT_LIST_GET_PREV(LRU, buf_pool.LRU_old)->old);
+ ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old)
+ || UT_LIST_GET_NEXT(LRU, buf_pool.LRU_old)->old);
+#endif /* UNIV_LRU_DEBUG */
+ UT_LIST_INSERT_AFTER(buf_pool.LRU, buf_pool.LRU_old,
+ bpage);
+
+ buf_pool.LRU_old_len++;
+ }
+
+ ut_d(bpage->in_LRU_list = TRUE);
+
+ incr_LRU_size_in_bytes(bpage);
+
+ if (UT_LIST_GET_LEN(buf_pool.LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+ ut_ad(buf_pool.LRU_old);
+
+ /* Adjust the length of the old block list if necessary */
+
+ bpage->set_old(old);
+ buf_LRU_old_adjust_len();
+
+ } else if (UT_LIST_GET_LEN(buf_pool.LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+ /* The LRU list is now long enough for LRU_old to become
+ defined: init it */
+
+ buf_LRU_old_init();
+ } else {
+ bpage->set_old(buf_pool.LRU_old != NULL);
+ }
+
+ /* If this is a zipped block with decompressed frame as well
+ then put it on the unzip_LRU list */
+ if (bpage->belongs_to_unzip_LRU()) {
+ buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
+ }
+}
+
+/** Move a block to the start of the LRU list. */
+void buf_page_make_young(buf_page_t *bpage)
+{
+ ut_ad(bpage->in_file());
+
+ mysql_mutex_lock(&buf_pool.mutex);
+
+ if (UNIV_UNLIKELY(bpage->old))
+ buf_pool.stat.n_pages_made_young++;
+
+ buf_LRU_remove_block(bpage);
+ buf_LRU_add_block(bpage, false);
+
+ mysql_mutex_unlock(&buf_pool.mutex);
+}
+
+/** Try to free a block. If bpage is a descriptor of a compressed-only
+ROW_FORMAT=COMPRESSED page, the buf_page_t object will be freed as well.
+The caller must hold buf_pool.mutex.
+@param bpage block to be freed
+@param zip whether to remove both copies of a ROW_FORMAT=COMPRESSED page
+@retval true if freed and buf_pool.mutex may have been temporarily released
+@retval false if the page was not freed */
+bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
+{
+ const page_id_t id(bpage->id());
+ buf_page_t* b = nullptr;
+
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ ut_ad(bpage->in_file());
+ ut_ad(bpage->in_LRU_list);
+
+ /* First, perform a quick check before we acquire hash_lock. */
+ if (!bpage->can_relocate()) {
+ return false;
+ }
+
+ /* We must hold an exclusive hash_lock to prevent
+ bpage->can_relocate() from changing due to a concurrent
+ execution of buf_page_get_low(). */
+ const ulint fold = id.fold();
+ page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold);
+ hash_lock->write_lock();
+ lsn_t oldest_modification = bpage->oldest_modification_acquire();
+
+ if (UNIV_UNLIKELY(!bpage->can_relocate())) {
+ /* Do not free buffer fixed and I/O-fixed blocks. */
+ goto func_exit;
+ }
+
+ if (oldest_modification == 1) {
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ oldest_modification = bpage->oldest_modification();
+ if (oldest_modification) {
+ ut_ad(oldest_modification == 1);
+ buf_pool.delete_from_flush_list(bpage);
+ }
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ ut_ad(!bpage->oldest_modification());
+ oldest_modification = 0;
+ }
+
+ if (zip || !bpage->zip.data) {
+ /* This would completely free the block. */
+ /* Do not completely free dirty blocks. */
+
+ if (oldest_modification) {
+ goto func_exit;
+ }
+ } else if (oldest_modification
+ && bpage->state() != BUF_BLOCK_FILE_PAGE) {
+func_exit:
+ hash_lock->write_unlock();
+ return(false);
+
+ } else if (bpage->state() == BUF_BLOCK_FILE_PAGE) {
+ b = buf_page_alloc_descriptor();
+ ut_a(b);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ new (b) buf_page_t(*bpage);
+ b->set_state(BUF_BLOCK_ZIP_PAGE);
+ }
+
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ ut_ad(bpage->in_file());
+ ut_ad(bpage->in_LRU_list);
+
+ DBUG_PRINT("ib_buf", ("free page %u:%u",
+ id.space(), id.page_no()));
+
+ ut_ad(bpage->can_relocate());
+
+ if (!buf_LRU_block_remove_hashed(bpage, id, hash_lock, zip)) {
+ ut_ad(!b);
+ mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
+ return(true);
+ }
+
+ /* We have just freed a BUF_BLOCK_FILE_PAGE. If b != nullptr
+ then it was a compressed page with an uncompressed frame and
+ we are interested in freeing only the uncompressed frame.
+ Therefore we have to reinsert the compressed page descriptor
+ into the LRU and page_hash (and possibly flush_list).
+ if !b then it was a regular page that has been freed */
+
+ if (UNIV_LIKELY_NULL(b)) {
+ buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
+
+ ut_ad(!buf_pool.page_hash_get_low(id, fold));
+ ut_ad(b->zip_size());
+
+ /* The field in_LRU_list of
+ the to-be-freed block descriptor should have
+ been cleared in
+ buf_LRU_block_remove_hashed(), which
+ invokes buf_LRU_remove_block(). */
+ ut_ad(!bpage->in_LRU_list);
+
+ /* bpage->state was BUF_BLOCK_FILE_PAGE because
+ b != nullptr. The type cast below is thus valid. */
+ ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
+
+ /* The fields of bpage were copied to b before
+ buf_LRU_block_remove_hashed() was invoked. */
+ ut_ad(!b->in_zip_hash);
+ ut_ad(b->in_LRU_list);
+ ut_ad(b->in_page_hash);
+
+ HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, b);
+
+ /* Insert b where bpage was in the LRU list. */
+ if (prev_b) {
+ ulint lru_len;
+
+ ut_ad(prev_b->in_LRU_list);
+ ut_ad(prev_b->in_file());
+
+ UT_LIST_INSERT_AFTER(buf_pool.LRU, prev_b, b);
+
+ incr_LRU_size_in_bytes(b);
+
+ if (b->is_old()) {
+ buf_pool.LRU_old_len++;
+ if (buf_pool.LRU_old
+ == UT_LIST_GET_NEXT(LRU, b)) {
+
+ buf_pool.LRU_old = b;
+ }
+ }
+
+ lru_len = UT_LIST_GET_LEN(buf_pool.LRU);
+
+ if (lru_len > BUF_LRU_OLD_MIN_LEN) {
+ ut_ad(buf_pool.LRU_old);
+ /* Adjust the length of the
+ old block list if necessary */
+ buf_LRU_old_adjust_len();
+ } else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
+ /* The LRU list is now long
+ enough for LRU_old to become
+ defined: init it */
+ buf_LRU_old_init();
+ }
+#ifdef UNIV_LRU_DEBUG
+ /* Check that the "old" flag is consistent
+ in the block and its neighbours. */
+ b->set_old(b->is_old());
+#endif /* UNIV_LRU_DEBUG */
+ } else {
+ ut_d(b->in_LRU_list = FALSE);
+ buf_LRU_add_block(b, b->old);
+ }
+
+ buf_flush_relocate_on_flush_list(bpage, b);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+ bpage->zip.data = nullptr;
+
+ page_zip_set_size(&bpage->zip, 0);
+
+ /* Prevent buf_page_get_gen() from
+ decompressing the block while we release
+ hash_lock. */
+ b->set_io_fix(BUF_IO_PIN);
+ hash_lock->write_unlock();
+ } else if (!zip) {
+ hash_lock->write_unlock();
+ }
+
+ buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (block->index) {
+ mysql_mutex_unlock(&buf_pool.mutex);
+
+ /* Remove the adaptive hash index on the page.
+ The page was declared uninitialized by
+ buf_LRU_block_remove_hashed(). We need to flag
+ the contents of the page valid (which it still is) in
+ order to avoid bogus Valgrind or MSAN warnings.*/
+
+ MEM_MAKE_DEFINED(block->frame, srv_page_size);
+ btr_search_drop_page_hash_index(block);
+ MEM_UNDEFINED(block->frame, srv_page_size);
+
+ if (UNIV_LIKELY_NULL(b)) {
+ ut_ad(b->zip_size());
+ b->io_unfix();
+ }
+
+ mysql_mutex_lock(&buf_pool.mutex);
+ } else
+#endif
+ if (UNIV_LIKELY_NULL(b)) {
+ ut_ad(b->zip_size());
+ b->io_unfix();
+ }
+
+ buf_LRU_block_free_hashed_page(block);
+
+ return(true);
+}
+
+/******************************************************************//**
+Puts a block back to the free list. */
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+ buf_block_t* block) /*!< in: block, must not contain a file page */
+{
+ void* data;
+
+ ut_ad(block->page.state() == BUF_BLOCK_MEMORY);
+ assert_block_ahi_empty(block);
+ ut_ad(!block->page.in_free_list);
+ ut_ad(!block->page.oldest_modification());
+ ut_ad(!block->page.in_LRU_list);
+
+ block->page.set_state(BUF_BLOCK_NOT_USED);
+
+ MEM_UNDEFINED(block->frame, srv_page_size);
+ /* Wipe page_no and space_id */
+ static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment");
+ memset_aligned<4>(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
+ static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2,
+ "not perfect alignment");
+ memset_aligned<2>(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ 0xfe, 4);
+ data = block->page.zip.data;
+
+ if (data != NULL) {
+ block->page.zip.data = NULL;
+ buf_pool_mutex_exit_forbid();
+
+ ut_ad(block->zip_size());
+
+ buf_buddy_free(data, block->zip_size());
+
+ buf_pool_mutex_exit_allow();
+ page_zip_set_size(&block->page.zip, 0);
+ }
+
+ if (buf_pool.curr_size < buf_pool.old_size
+ && UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target
+ && buf_pool.will_be_withdrawn(block->page)) {
+ /* This should be withdrawn */
+ UT_LIST_ADD_LAST(
+ buf_pool.withdraw,
+ &block->page);
+ ut_d(block->in_withdraw_list = true);
+ } else {
+ UT_LIST_ADD_FIRST(buf_pool.free, &block->page);
+ ut_d(block->page.in_free_list = true);
+ pthread_cond_signal(&buf_pool.done_free);
+ }
+
+ MEM_NOACCESS(block->frame, srv_page_size);
+}
+
+/** Release a memory block to the buffer pool. */
+ATTRIBUTE_COLD void buf_pool_t::free_block(buf_block_t *block)
+{
+ ut_ad(this == &buf_pool);
+ mysql_mutex_lock(&mutex);
+ buf_LRU_block_free_non_file_page(block);
+ mysql_mutex_unlock(&mutex);
+}
+
+
+/** Remove bpage from buf_pool.LRU and buf_pool.page_hash.
+
+If bpage->state() == BUF_BLOCK_ZIP_PAGE && !bpage->oldest_modification(),
+the object will be freed.
+
+@param bpage buffer block
+@param id page identifier
+@param hash_lock buf_pool.page_hash latch (will be released here)
+@param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed
+
+If a compressed page is freed other compressed pages may be relocated.
+@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
+caller needs to free the page to the free list
+@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
+this case the block is already returned to the buddy allocator. */
+static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
+ page_hash_latch *hash_lock, bool zip)
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ ut_ad(hash_lock->is_write_locked());
+
+ ut_a(bpage->io_fix() == BUF_IO_NONE);
+ ut_a(!bpage->buf_fix_count());
+
+ buf_LRU_remove_block(bpage);
+
+ buf_pool.freed_page_clock += 1;
+
+ switch (bpage->state()) {
+ case BUF_BLOCK_FILE_PAGE:
+ MEM_CHECK_ADDRESSABLE(bpage, sizeof(buf_block_t));
+ MEM_CHECK_ADDRESSABLE(((buf_block_t*) bpage)->frame,
+ srv_page_size);
+ buf_block_modify_clock_inc((buf_block_t*) bpage);
+ if (bpage->zip.data) {
+ const page_t* page = ((buf_block_t*) bpage)->frame;
+
+ ut_a(!zip || !bpage->oldest_modification());
+ ut_ad(bpage->zip_size());
+
+ switch (fil_page_get_type(page)) {
+ case FIL_PAGE_TYPE_ALLOCATED:
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_TYPE_XDES:
+ /* These are essentially uncompressed pages. */
+ if (!zip) {
+ /* InnoDB writes the data to the
+ uncompressed page frame. Copy it
+ to the compressed page, which will
+ be preserved. */
+ memcpy(bpage->zip.data, page,
+ bpage->zip_size());
+ }
+ break;
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ break;
+ case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
+#if defined UNIV_ZIP_DEBUG && defined BTR_CUR_HASH_ADAPT
+ /* During recovery, we only update the
+ compressed page, not the uncompressed one. */
+ ut_a(recv_recovery_is_on()
+ || page_zip_validate(
+ &bpage->zip, page,
+ ((buf_block_t*) bpage)->index));
+#endif /* UNIV_ZIP_DEBUG && BTR_CUR_HASH_ADAPT */
+ break;
+ default:
+ ib::error() << "The compressed page to be"
+ " evicted seems corrupt:";
+ ut_print_buf(stderr, page, srv_page_size);
+
+ ib::error() << "Possibly older version of"
+ " the page:";
+
+ ut_print_buf(stderr, bpage->zip.data,
+ bpage->zip_size());
+ putc('\n', stderr);
+ ut_error;
+ }
+
+ break;
+ }
+ /* fall through */
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_a(!bpage->oldest_modification());
+ MEM_CHECK_ADDRESSABLE(bpage->zip.data, bpage->zip_size());
+ break;
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ }
+
+ ut_ad(!bpage->in_zip_hash);
+ HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, id.fold(), bpage);
+
+ switch (bpage->state()) {
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_ad(!bpage->in_free_list);
+ ut_ad(!bpage->in_LRU_list);
+ ut_a(bpage->zip.data);
+ ut_a(bpage->zip.ssize);
+ ut_ad(!bpage->oldest_modification());
+
+ hash_lock->write_unlock();
+ buf_pool_mutex_exit_forbid();
+
+ buf_buddy_free(bpage->zip.data, bpage->zip_size());
+
+ buf_pool_mutex_exit_allow();
+ buf_page_free_descriptor(bpage);
+ return(false);
+
+ case BUF_BLOCK_FILE_PAGE:
+ static_assert(FIL_NULL == 0xffffffffU, "fill pattern");
+ static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment");
+ memset_aligned<4>(reinterpret_cast<buf_block_t*>(bpage)->frame
+ + FIL_PAGE_OFFSET, 0xff, 4);
+ static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2,
+ "not perfect alignment");
+ memset_aligned<2>(reinterpret_cast<buf_block_t*>(bpage)->frame
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
+ MEM_UNDEFINED(((buf_block_t*) bpage)->frame, srv_page_size);
+ bpage->set_state(BUF_BLOCK_REMOVE_HASH);
+
+ if (!zip) {
+ return true;
+ }
+
+ /* Question: If we release hash_lock here
+ then what protects us against:
+ 1) Some other thread buffer fixing this page
+ 2) Some other thread trying to read this page and
+ not finding it in buffer pool attempting to read it
+ from the disk.
+ Answer:
+ 1) Cannot happen because the page is no longer in the
+ page_hash. Only possibility is when while invalidating
+ a tablespace we buffer fix the prev_page in LRU to
+ avoid relocation during the scan. But that is not
+ possible because we are holding buf_pool mutex.
+
+ 2) Not possible because in buf_page_init_for_read()
+ we do a look up of page_hash while holding buf_pool
+ mutex and since we are holding buf_pool mutex here
+ and by the time we'll release it in the caller we'd
+ have inserted the compressed only descriptor in the
+ page_hash. */
+ hash_lock->write_unlock();
+
+ if (bpage->zip.data) {
+ /* Free the compressed page. */
+ void* data = bpage->zip.data;
+ bpage->zip.data = NULL;
+
+ ut_ad(!bpage->in_free_list);
+ ut_ad(!bpage->oldest_modification());
+ ut_ad(!bpage->in_LRU_list);
+ buf_pool_mutex_exit_forbid();
+
+ buf_buddy_free(data, bpage->zip_size());
+
+ buf_pool_mutex_exit_allow();
+
+ page_zip_set_size(&bpage->zip, 0);
+ }
+
+ return(true);
+
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ break;
+ }
+
+ ut_error;
+ return(false);
+}
+
+/** Remove one page from LRU list and put it to free list.
+@param bpage file page to be freed
+@param id page identifier
+@param hash_lock buf_pool.page_hash latch (will be released here) */
+void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id,
+ page_hash_latch *hash_lock)
+{
+ while (bpage->buf_fix_count())
+ /* Wait for other threads to release the fix count
+ before releasing the bpage from LRU list. */
+ (void) LF_BACKOFF();
+
+ if (buf_LRU_block_remove_hashed(bpage, id, hash_lock, true))
+ buf_LRU_block_free_hashed_page(reinterpret_cast<buf_block_t*>(bpage));
+}
+
+/** Update buf_pool.LRU_old_ratio.
+@param[in] old_pct Reserve this percentage of
+ the buffer pool for "old" blocks
+@param[in] adjust true=adjust the LRU list;
+ false=just assign buf_pool.LRU_old_ratio
+ during the initialization of InnoDB
+@return updated old_pct */
+uint buf_LRU_old_ratio_update(uint old_pct, bool adjust)
+{
+ uint ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
+ if (ratio < BUF_LRU_OLD_RATIO_MIN) {
+ ratio = BUF_LRU_OLD_RATIO_MIN;
+ } else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
+ ratio = BUF_LRU_OLD_RATIO_MAX;
+ }
+
+ if (adjust) {
+ mysql_mutex_lock(&buf_pool.mutex);
+
+ if (ratio != buf_pool.LRU_old_ratio) {
+ buf_pool.LRU_old_ratio = ratio;
+
+ if (UT_LIST_GET_LEN(buf_pool.LRU)
+ >= BUF_LRU_OLD_MIN_LEN) {
+ buf_LRU_old_adjust_len();
+ }
+ }
+
+ mysql_mutex_unlock(&buf_pool.mutex);
+ } else {
+ buf_pool.LRU_old_ratio = ratio;
+ }
+ /* the reverse of
+ ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
+ return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
+}
+
+/********************************************************************//**
+Update the historical stats that we are collecting for LRU eviction
+policy at the end of each interval. */
+void
+buf_LRU_stat_update()
+{
+ buf_LRU_stat_t* item;
+ buf_LRU_stat_t cur_stat;
+
+ if (!buf_pool.freed_page_clock) {
+ goto func_exit;
+ }
+
+ /* Update the index. */
+ item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
+ buf_LRU_stat_arr_ind++;
+ buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
+
+ /* Add the current value and subtract the obsolete entry.
+ Since buf_LRU_stat_cur is not protected by any mutex,
+ it can be changing between adding to buf_LRU_stat_sum
+ and copying to item. Assign it to local variables to make
+ sure the same value assign to the buf_LRU_stat_sum
+ and item */
+ cur_stat = buf_LRU_stat_cur;
+
+ buf_LRU_stat_sum.io += cur_stat.io - item->io;
+ buf_LRU_stat_sum.unzip += cur_stat.unzip - item->unzip;
+
+ /* Put current entry in the array. */
+ memcpy(item, &cur_stat, sizeof *item);
+
+func_exit:
+ /* Clear the current entry. */
+ memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
+}
+
+#ifdef UNIV_DEBUG
+/** Validate the LRU list. */
+void buf_LRU_validate()
+{
+ ulint old_len;
+ ulint new_len;
+
+ mysql_mutex_lock(&buf_pool.mutex);
+
+ if (UT_LIST_GET_LEN(buf_pool.LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+ ut_a(buf_pool.LRU_old);
+ old_len = buf_pool.LRU_old_len;
+
+ new_len = ut_min(UT_LIST_GET_LEN(buf_pool.LRU)
+ * buf_pool.LRU_old_ratio
+ / BUF_LRU_OLD_RATIO_DIV,
+ UT_LIST_GET_LEN(buf_pool.LRU)
+ - (BUF_LRU_OLD_TOLERANCE
+ + BUF_LRU_NON_OLD_MIN_LEN));
+
+ ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
+ ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
+ }
+
+ CheckInLRUList::validate();
+
+ old_len = 0;
+
+ for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.LRU);
+ bpage != NULL;
+ bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
+
+ switch (bpage->state()) {
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ ut_ad(reinterpret_cast<buf_block_t*>(bpage)
+ ->in_unzip_LRU_list
+ == bpage->belongs_to_unzip_LRU());
+ case BUF_BLOCK_ZIP_PAGE:
+ break;
+ }
+
+ if (bpage->is_old()) {
+ const buf_page_t* prev
+ = UT_LIST_GET_PREV(LRU, bpage);
+ const buf_page_t* next
+ = UT_LIST_GET_NEXT(LRU, bpage);
+
+ if (!old_len++) {
+ ut_a(buf_pool.LRU_old == bpage);
+ } else {
+ ut_a(!prev || prev->is_old());
+ }
+
+ ut_a(!next || next->is_old());
+ }
+ }
+
+ ut_a(buf_pool.LRU_old_len == old_len);
+
+ CheckInFreeList::validate();
+
+ for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.free);
+ bpage != NULL;
+ bpage = UT_LIST_GET_NEXT(list, bpage)) {
+
+ ut_a(bpage->state() == BUF_BLOCK_NOT_USED);
+ }
+
+ CheckUnzipLRUAndLRUList::validate();
+
+ for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool.unzip_LRU);
+ block != NULL;
+ block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
+
+ ut_ad(block->in_unzip_LRU_list);
+ ut_ad(block->page.in_LRU_list);
+ ut_a(block->page.belongs_to_unzip_LRU());
+ }
+
+ mysql_mutex_unlock(&buf_pool.mutex);
+}
+#endif /* UNIV_DEBUG */
+
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG
+/** Dump the LRU list to stderr. */
+void buf_LRU_print()
+{
+ mysql_mutex_lock(&buf_pool.mutex);
+
+ for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.LRU);
+ bpage != NULL;
+ bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
+ const page_id_t id(bpage->id());
+
+ fprintf(stderr, "BLOCK space %u page %u ",
+ id.space(), id.page_no());
+
+ if (bpage->is_old()) {
+ fputs("old ", stderr);
+ }
+
+ if (const uint32_t buf_fix_count = bpage->buf_fix_count()) {
+ fprintf(stderr, "buffix count %u ", buf_fix_count);
+ }
+
+ if (const auto io_fix = bpage->io_fix()) {
+ fprintf(stderr, "io_fix %d ", io_fix);
+ }
+
+ if (bpage->oldest_modification()) {
+ fputs("modif. ", stderr);
+ }
+
+ switch (const auto state = bpage->state()) {
+ const byte* frame;
+ case BUF_BLOCK_FILE_PAGE:
+ frame = buf_block_get_frame((buf_block_t*) bpage);
+ fprintf(stderr, "\ntype %u index id " IB_ID_FMT "\n",
+ fil_page_get_type(frame),
+ btr_page_get_index_id(frame));
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ frame = bpage->zip.data;
+ fprintf(stderr, "\ntype %u size " ULINTPF
+ " index id " IB_ID_FMT "\n",
+ fil_page_get_type(frame),
+ bpage->zip_size(),
+ btr_page_get_index_id(frame));
+ break;
+
+ default:
+ fprintf(stderr, "\n!state %d!\n", state);
+ break;
+ }
+ }
+
+ mysql_mutex_unlock(&buf_pool.mutex);
+}
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG */