1 files changed, 2456 insertions, 0 deletions
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
new file mode 100644
index 00000000..5a118df4
--- /dev/null
+++ b/storage/innobase/include/buf0buf.h
@@ -0,0 +1,2456 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buf.h
+The database buffer pool high-level routines
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0buf_h
+#define buf0buf_h
+
+/** Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
+
+#include "fil0fil.h"
+#include "mtr0types.h"
+#include "buf0types.h"
+#include "span.h"
+#include "assume_aligned.h"
+#ifndef UNIV_INNOCHECKSUM
+#include "hash0hash.h"
+#include "ut0byte.h"
+#include "page0types.h"
+#include "log0log.h"
+#include "srv0srv.h"
+#include <ostream>
+
+// Forward declaration
+struct fil_addr_t;
+
+/** @name Modes for buf_page_get_gen */
+/* @{ */
+#define BUF_GET			10	/*!< get always */
+#define	BUF_GET_IF_IN_POOL	11	/*!< get if in pool */
+#define BUF_PEEK_IF_IN_POOL	12	/*!< get if in pool, do not make
+					the block young in the LRU list */
+#define BUF_GET_NO_LATCH	14	/*!< get and bufferfix, but
+					set no latch; we have
+					separated this case, because
+					it is error-prone programming
+					not to set a latch, and it
+					should be used with care */
+#define BUF_GET_IF_IN_POOL_OR_WATCH	15
+					/*!< Get the page only if it's in the
+					buffer pool, if not then set a watch
+					on the page. */
+#define BUF_GET_POSSIBLY_FREED		16
+					/*!< Like BUF_GET, but do not mind
+					if the file page has been freed. */
+#define BUF_EVICT_IF_IN_POOL	20	/*!< evict a clean block if found */
+/* @} */
+
+# ifdef UNIV_DEBUG
+extern my_bool	buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
+					buffer pool is not allowed. */
+# endif /* UNIV_DEBUG */
+
+/** buf_page_t::state() values, distinguishing buf_page_t and buf_block_t */
+enum buf_page_state
+{
+  /** available in buf_pool.free or buf_pool.watch */
+  BUF_BLOCK_NOT_USED,
+  /** allocated for something else than a file page */
+  BUF_BLOCK_MEMORY,
+  /** a previously allocated file page, in transit to NOT_USED */
+  BUF_BLOCK_REMOVE_HASH,
+  /** a buf_block_t that is also in buf_pool.LRU */
+  BUF_BLOCK_FILE_PAGE,
+  /** the buf_page_t of a ROW_FORMAT=COMPRESSED page
+  whose uncompressed page frame has been evicted */
+  BUF_BLOCK_ZIP_PAGE
+};
+
+/** This structure defines information we will fetch from each buffer pool. It
+will be used to print table IO stats */
+struct buf_pool_info_t
+{
+	/* General buffer pool info */
+	ulint	pool_size;		/*!< Buffer Pool size in pages */
+	ulint	lru_len;		/*!< Length of buf_pool.LRU */
+	ulint	old_lru_len;		/*!< buf_pool.LRU_old_len */
+	ulint	free_list_len;		/*!< Length of buf_pool.free list */
+	ulint	flush_list_len;		/*!< Length of buf_pool.flush_list */
+	ulint	n_pend_unzip;		/*!< buf_pool.n_pend_unzip, pages
+					pending decompress */
+	ulint	n_pend_reads;		/*!< buf_pool.n_pend_reads, pages
+					pending read */
+	ulint	n_pending_flush_lru;	/*!< Pages pending flush in LRU */
+	ulint	n_pending_flush_list;	/*!< Pages pending flush in FLUSH
+					LIST */
+	ulint	n_pages_made_young;	/*!< number of pages made young */
+	ulint	n_pages_not_made_young;	/*!< number of pages not made young */
+	ulint	n_pages_read;		/*!< buf_pool.n_pages_read */
+	ulint	n_pages_created;	/*!< buf_pool.n_pages_created */
+	ulint	n_pages_written;	/*!< buf_pool.n_pages_written */
+	ulint	n_page_gets;		/*!< buf_pool.n_page_gets */
+	ulint	n_ra_pages_read_rnd;	/*!< buf_pool.n_ra_pages_read_rnd,
+					number of pages readahead */
+	ulint	n_ra_pages_read;	/*!< buf_pool.n_ra_pages_read, number
+					of pages readahead */
+	ulint	n_ra_pages_evicted;	/*!< buf_pool.n_ra_pages_evicted,
+					number of readahead pages evicted
+					without access */
+	ulint	n_page_get_delta;	/*!< num of buffer pool page gets since
+					last printout */
+
+	/* Buffer pool access stats */
+	double	page_made_young_rate;	/*!< page made young rate in pages
+					per second */
+	double	page_not_made_young_rate;/*!< page not made young rate
+					in pages per second */
+	double	pages_read_rate;	/*!< num of pages read per second */
+	double	pages_created_rate;	/*!< num of pages create per second */
+	double	pages_written_rate;	/*!< num of  pages written per second */
+	ulint	page_read_delta;	/*!< num of pages read since last
+					printout */
+	ulint	young_making_delta;	/*!< num of pages made young since
+					last printout */
+	ulint	not_young_making_delta;	/*!< num of pages not make young since
+					last printout */
+
+	/* Statistics about read ahead algorithm.  */
+	double	pages_readahead_rnd_rate;/*!< random readahead rate in pages per
+					second */
+	double	pages_readahead_rate;	/*!< readahead rate in pages per
+					second */
+	double	pages_evicted_rate;	/*!< rate of readahead page evicted
+					without access, in pages per second */
+
+	/* Stats about LRU eviction */
+	ulint	unzip_lru_len;		/*!< length of buf_pool.unzip_LRU
+					list */
+	/* Counters for LRU policy */
+	ulint	io_sum;			/*!< buf_LRU_stat_sum.io */
+	ulint	io_cur;			/*!< buf_LRU_stat_cur.io, num of IO
+					for current interval */
+	ulint	unzip_sum;		/*!< buf_LRU_stat_sum.unzip */
+	ulint	unzip_cur;		/*!< buf_LRU_stat_cur.unzip, num
+					pages decompressed in current
+					interval */
+};
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Print the given page_id_t object.
+@param[in,out]	out	the output stream
+@param[in]	page_id	the page_id_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const page_id_t		page_id);
+
+#ifndef UNIV_INNOCHECKSUM
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return size in bytes */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void);
+/*========================*/
+
+/********************************************************************//**
+Allocates a buf_page_t descriptor. This function must succeed. In case
+of failure we assert in this function. */
+UNIV_INLINE
+buf_page_t*
+buf_page_alloc_descriptor(void)
+/*===========================*/
+	MY_ATTRIBUTE((malloc));
+/********************************************************************//**
+Free a buf_page_t descriptor. */
+UNIV_INLINE
+void
+buf_page_free_descriptor(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: bpage descriptor to free. */
+	MY_ATTRIBUTE((nonnull));
+
+/** Allocate a buffer block.
+@return own: the allocated block, in state BUF_BLOCK_MEMORY */
+inline buf_block_t *buf_block_alloc();
+/********************************************************************//**
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
+void
+buf_block_free(
+/*===========*/
+	buf_block_t*	block);	/*!< in, own: block to be freed */
+
+/**************************************************************//**
+NOTE! The following macros should be used instead of buf_page_get_gen,
+to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
+in LA! */
+#define buf_page_get(ID, SIZE, LA, MTR)					\
+	buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR)
+
+/**************************************************************//**
+Use these macros to bufferfix a page with no latching. Remember not to
+read the contents of the page unless you know it is safe. Do not modify
+the contents of the page! We have separated this case, because it is
+error-prone programming not to set a latch, and it should be used
+with care. */
+#define buf_page_get_with_no_latch(ID, SIZE, MTR)	\
+	buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
+			 __FILE__, __LINE__, MTR)
+/********************************************************************//**
+This is the general function used to get optimistic access to a database
+page.
+@return TRUE if success */
+ibool
+buf_page_optimistic_get(
+/*====================*/
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+	buf_block_t*	block,	/*!< in: guessed block */
+	ib_uint64_t	modify_clock,/*!< in: modify clock value */
+	const char*	file,	/*!< in: file name */
+	unsigned	line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+
+/** Given a tablespace id and page number tries to get that page. If the
+page is not in the buffer pool it is not loaded and NULL is returned.
+Suitable for using when holding the lock_sys_t::mutex.
+@param[in]	page_id	page id
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	mtr	mini-transaction
+@return pointer to a page or NULL */
+buf_block_t*
+buf_page_try_get_func(
+	const page_id_t		page_id,
+	const char*		file,
+	unsigned		line,
+	mtr_t*			mtr);
+
+/** Tries to get a page.
+If the page is not in the buffer pool it is not loaded. Suitable for using
+when holding the lock_sys_t::mutex.
+@param[in]	page_id	page identifier
+@param[in]	mtr	mini-transaction
+@return the page if in buffer pool, NULL if not */
+#define buf_page_try_get(page_id, mtr)	\
+	buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
+
+/** Get read access to a compressed page (usually of type
+FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
+The page must be released with buf_page_release_zip().
+NOTE: the page is not protected by any latch.  Mutual exclusion has to
+be implemented at a higher level.  In other words, all possible
+accesses to a given page through this function must be protected by
+the same set of mutexes or latches.
+@param[in]	page_id		page id
+@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size
+@return pointer to the block */
+buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size);
+
+/** Get access to a database page. Buffered redo log may be applied.
+@param[in]	page_id			page id
+@param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
+@param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	guess			guessed block or NULL
+@param[in]	mode			BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in]	file			file name
+@param[in]	line			line where called
+@param[in]	mtr			mini-transaction
+@param[out]	err			DB_SUCCESS or error code
+@param[in]	allow_ibuf_merge	Allow change buffer merge while
+reading the pages from file.
+@return pointer to the block or NULL */
+buf_block_t*
+buf_page_get_gen(
+	const page_id_t		page_id,
+	ulint			zip_size,
+	ulint			rw_latch,
+	buf_block_t*		guess,
+	ulint			mode,
+	const char*		file,
+	unsigned		line,
+	mtr_t*			mtr,
+	dberr_t*		err = NULL,
+	bool			allow_ibuf_merge = false);
+
+/** This is the low level function used to get access to a database page.
+@param[in]	page_id			page id
+@param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
+@param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	guess			guessed block or NULL
+@param[in]	mode			BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in]	file			file name
+@param[in]	line			line where called
+@param[in]	mtr			mini-transaction
+@param[out]	err			DB_SUCCESS or error code
+@param[in]	allow_ibuf_merge	Allow change buffer merge to happen
+while reading the page from file
+then it makes sure that it does merging of change buffer changes while
+reading the page from file.
+@return pointer to the block or NULL */
+buf_block_t*
+buf_page_get_low(
+	const page_id_t		page_id,
+	ulint			zip_size,
+	ulint			rw_latch,
+	buf_block_t*		guess,
+	ulint			mode,
+	const char*		file,
+	unsigned		line,
+	mtr_t*			mtr,
+	dberr_t*		err,
+	bool			allow_ibuf_merge);
+
+/** Initialize a page in the buffer pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_get_gen).
+@param[in,out]	space		space object
+@param[in]	offset		offset of the tablespace
+@param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
+@param[in,out]	mtr		mini-transaction
+@param[in,out]	free_block	pre-allocated buffer block
+@return pointer to the block, page bufferfixed */
+buf_block_t*
+buf_page_create(fil_space_t *space, uint32_t offset,
+                ulint zip_size, mtr_t *mtr, buf_block_t *free_block);
+
+/********************************************************************//**
+Releases a compressed-only page acquired with buf_page_get_zip(). */
+UNIV_INLINE
+void
+buf_page_release_zip(
+/*=================*/
+	buf_page_t*	bpage);		/*!< in: buffer block */
+/********************************************************************//**
+Releases a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release_latch(
+/*=====================*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		rw_latch);	/*!< in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+/** Move a block to the start of the LRU list. */
+void buf_page_make_young(buf_page_t *bpage);
+/** Mark the page status as FREED for the given tablespace id and
+page number. If the page is not in buffer pool then ignore it.
+@param[in,out]	space	tablespace
+@param[in]	page	page number
+@param[in,out]	mtr	mini-transaction
+@param[in]	file	file name
+@param[in]	line	line where called */
+void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr,
+                   const char *file, unsigned line);
+
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return freed_page_clock */
+UNIV_INLINE
+unsigned
+buf_page_get_freed_page_clock(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+	MY_ATTRIBUTE((warn_unused_result));
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return freed_page_clock */
+UNIV_INLINE
+unsigned
+buf_block_get_freed_page_clock(
+/*===========================*/
+	const buf_block_t*	block)	/*!< in: block */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Determine if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@param[in]	bpage		buffer pool page
+@return whether bpage is close to MRU end of LRU */
+inline bool buf_page_peek_if_young(const buf_page_t *bpage);
+
+/** Determine if a block should be moved to the start of the LRU list if
+there is danger of dropping from the buffer pool.
+@param[in]	bpage		buffer pool page
+@return true if bpage should be made younger */
+inline bool buf_page_peek_if_too_old(const buf_page_t *bpage);
+
+/** Move a page to the start of the buffer pool LRU list if it is too old.
+@param[in,out]	bpage		buffer pool page */
+inline void buf_page_make_young_if_needed(buf_page_t *bpage)
+{
+	if (UNIV_UNLIKELY(buf_page_peek_if_too_old(bpage))) {
+		buf_page_make_young(bpage);
+	}
+}
+
+/********************************************************************//**
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool.mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+void
+buf_block_modify_clock_inc(
+/*=======================*/
+	buf_block_t*	block);	/*!< in: block */
+/********************************************************************//**
+Returns the value of the modify clock. The caller must have an s-lock
+or x-lock on the block.
+@return value */
+UNIV_INLINE
+ib_uint64_t
+buf_block_get_modify_clock(
+/*=======================*/
+	buf_block_t*	block);	/*!< in: block */
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_func(
+/*=======================*/
+# ifdef UNIV_DEBUG
+	const char*	file,	/*!< in: file name */
+	unsigned	line,	/*!< in: line */
+# endif /* UNIV_DEBUG */
+	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+	MY_ATTRIBUTE((nonnull));
+
+# ifdef UNIV_DEBUG
+/** Increments the bufferfix count.
+@param[in,out]	b	block to bufferfix
+@param[in]	f	file name where requested
+@param[in]	l	line number where requested */
+#  define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
+# else /* UNIV_DEBUG */
+/** Increments the bufferfix count.
+@param[in,out]	b	block to bufferfix
+@param[in]	f	file name where requested
+@param[in]	l	line number where requested */
+#  define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+# endif /* UNIV_DEBUG */
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Check if a buffer is all zeroes.
+@param[in]	buf	data to check
+@return whether the buffer is all zeroes */
+bool buf_is_zeroes(st_::span<const byte> buf);
+
+/** Checks if the page is in crc32 checksum format.
+@param[in]	read_buf		database page
+@param[in]	checksum_field1		new checksum field
+@param[in]	checksum_field2		old checksum field
+@return true if the page is in crc32 checksum format. */
+bool
+buf_page_is_checksum_valid_crc32(
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2)
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Checks if the page is in innodb checksum format.
+@param[in]	read_buf	database page
+@param[in]	checksum_field1	new checksum field
+@param[in]	checksum_field2	old checksum field
+@return true if the page is in innodb checksum format. */
+bool
+buf_page_is_checksum_valid_innodb(
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2)
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Checks if the page is in none checksum format.
+@param[in]	read_buf	database page
+@param[in]	checksum_field1	new checksum field
+@param[in]	checksum_field2	old checksum field
+@return true if the page is in none checksum format. */
+bool
+buf_page_is_checksum_valid_none(
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2)
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Check if a page is corrupt.
+@param[in]	check_lsn	whether the LSN should be checked
+@param[in]	read_buf	database page
+@param[in]	fsp_flags	tablespace flags
+@return whether the page is corrupted */
+bool
+buf_page_is_corrupted(
+	bool			check_lsn,
+	const byte*		read_buf,
+	ulint			fsp_flags)
+	MY_ATTRIBUTE((warn_unused_result));
+
+inline void *aligned_malloc(size_t size, size_t align)
+{
+#ifdef _MSC_VER
+  return _aligned_malloc(size, align);
+#else
+  void *result;
+  if (posix_memalign(&result, align, size))
+    result= NULL;
+  return result;
+#endif
+}
+
+inline void aligned_free(void *ptr)
+{
+#ifdef _MSC_VER
+  _aligned_free(ptr);
+#else
+  free(ptr);
+#endif
+}
+
+/** Read the key version from the page. In full crc32 format,
+key version is stored at {0-3th} bytes. In other format, it is
+stored in 26th position.
+@param[in]	read_buf	database page
+@param[in]	fsp_flags	tablespace flags
+@return key version of the page. */
+inline uint32_t buf_page_get_key_version(const byte* read_buf, ulint fsp_flags)
+{
+  static_assert(FIL_PAGE_FCRC32_KEY_VERSION == 0, "compatibility");
+  return fil_space_t::full_crc32(fsp_flags)
+    ? mach_read_from_4(my_assume_aligned<4>(read_buf))
+    : mach_read_from_4(my_assume_aligned<2>
+		       (read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION));
+}
+
+/** Read the compression info from the page. In full crc32 format,
+compression info is at MSB of page type. In other format, it is
+stored in page type.
+@param[in]	read_buf	database page
+@param[in]	fsp_flags	tablespace flags
+@return true if page is compressed. */
+inline bool buf_page_is_compressed(const byte* read_buf, ulint fsp_flags)
+{
+  uint16_t page_type= fil_page_get_type(read_buf);
+  return fil_space_t::full_crc32(fsp_flags)
+    ? !!(page_type & 1U << FIL_PAGE_COMPRESS_FCRC32_MARKER)
+    : page_type == FIL_PAGE_PAGE_COMPRESSED;
+}
+
+/** Get the compressed or uncompressed size of a full_crc32 page.
+@param[in]	buf	page_compressed or uncompressed page
+@param[out]	comp	whether the page could be compressed
+@param[out]	cr	whether the page could be corrupted
+@return the payload size in the file page */
+inline uint buf_page_full_crc32_size(const byte* buf, bool* comp, bool* cr)
+{
+	uint t = fil_page_get_type(buf);
+	uint page_size = uint(srv_page_size);
+
+	if (!(t & 1U << FIL_PAGE_COMPRESS_FCRC32_MARKER)) {
+		return page_size;
+	}
+
+	t &= ~(1U << FIL_PAGE_COMPRESS_FCRC32_MARKER);
+	t <<= 8;
+
+	if (t < page_size) {
+		page_size = t;
+		if (comp) {
+			*comp = true;
+		}
+	} else if (cr) {
+		*cr = true;
+	}
+
+	return page_size;
+}
+
+#ifndef UNIV_INNOCHECKSUM
+/** Dump a page to stderr.
+@param[in]	read_buf	database page
+@param[in]	zip_size	compressed page size, or 0 */
+void buf_page_print(const byte* read_buf, ulint zip_size = 0)
+	ATTRIBUTE_COLD __attribute__((nonnull));
+/********************************************************************//**
+Decompress a block.
+@return TRUE if successful */
+ibool
+buf_zip_decompress(
+/*===============*/
+	buf_block_t*	block,	/*!< in/out: block */
+	ibool		check);	/*!< in: TRUE=verify the page checksum */
+
+#ifdef UNIV_DEBUG
+/** @return the number of latched pages in the buffer pool */
+ulint buf_get_latched_pages_number();
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Prints info of the buffer i/o. */
+void
+buf_print_io(
+/*=========*/
+	FILE*	file);	/*!< in: file where to print */
+/** Collect buffer pool metadata.
+@param[out]	pool_info	buffer pool metadata */
+void buf_stats_get_pool_info(buf_pool_info_t *pool_info);
+
+/** Refresh the statistics used to print per-second averages. */
+void buf_refresh_io_stats();
+
+/** Invalidate all pages in the buffer pool.
+All pages must be in a replaceable state (not modified or latched). */
+void buf_pool_invalidate();
+
+/*========================================================================
+--------------------------- LOWER LEVEL ROUTINES -------------------------
+=========================================================================*/
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. */
+UNIV_INLINE
+void
+buf_block_dbg_add_level(
+/*====================*/
+	buf_block_t*	block,	/*!< in: buffer page
+				where we have acquired latch */
+	latch_level_t	level);	/*!< in: latching order level */
+#else /* UNIV_DEBUG */
+# define buf_block_dbg_add_level(block, level) /* nothing */
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block.
+@return pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	MY_ATTRIBUTE((warn_unused_result));
+#else /* UNIV_DEBUG */
+# define buf_block_get_frame(block) (block)->frame
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable. */
+#define buf_block_get_page_zip(block) \
+	(UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
+#define is_buf_block_get_page_zip(block) \
+        UNIV_LIKELY_NULL((block)->page.zip.data)
+
+/** Monitor the buffer page read/write activity, and increment corresponding
+counter value in MONITOR_MODULE_BUF_PAGE.
+@param bpage   buffer page whose read or write was completed
+@param io_type BUF_IO_READ or BUF_IO_WRITE */
+ATTRIBUTE_COLD __attribute__((nonnull))
+void buf_page_monitor(const buf_page_t *bpage, buf_io_fix io_type);
+
+/** Complete a read request of a file page to buf_pool.
+@param bpage    recently read page
+@param node     data file
+@return whether the operation succeeded
+@retval DB_SUCCESS              always when writing, or if a read page was OK
+@retval DB_PAGE_CORRUPTED       if the checksum fails on a page read
+@retval DB_DECRYPTION_FAILED    if the page cannot be decrypted */
+dberr_t buf_page_read_complete(buf_page_t *bpage, const fil_node_t &node);
+
+/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
+if needed.
+@param[in]	size	size in bytes
+@return	aligned size */
+UNIV_INLINE
+ulint
+buf_pool_size_align(
+	ulint	size);
+
+/** Verify that post encryption checksum match with the calculated checksum.
+This function should be called only if tablespace contains crypt data metadata.
+@param[in]	page		page frame
+@param[in]	fsp_flags	tablespace flags
+@return true if page is encrypted and OK, false otherwise */
+bool buf_page_verify_crypt_checksum(
+	const byte*	page,
+	ulint		fsp_flags);
+
+/** Calculate a ROW_FORMAT=COMPRESSED page checksum and update the page.
+@param[in,out]	page		page to update
+@param[in]	size		compressed page size */
+void buf_flush_update_zip_checksum(buf_frame_t* page, ulint size);
+
+/** @brief The temporary memory structure.
+
+NOTE! The definition appears here only for other modules of this
+directory (buf) to see it. Do not use from outside! */
+
+class buf_tmp_buffer_t
+{
+  /** whether this slot is reserved */
+  std::atomic<bool> reserved;
+public:
+  /** For encryption, the data needs to be copied to a separate buffer
+  before it's encrypted&written. The buffer block itself can be replaced
+  while a write of crypt_buf to file is in progress. */
+  byte *crypt_buf;
+  /** buffer for fil_page_compress(), for flushing page_compressed pages */
+  byte *comp_buf;
+  /** pointer to resulting buffer after encryption or compression;
+  not separately allocated memory */
+  byte *out_buf;
+
+  /** Release the slot */
+  void release() { reserved.store(false, std::memory_order_relaxed); }
+
+  /** Acquire the slot
+  @return whether the slot was acquired */
+  bool acquire() { return !reserved.exchange(true, std::memory_order_relaxed);}
+
+  /** Allocate a buffer for encryption, decryption or decompression. */
+  void allocate()
+  {
+    if (!crypt_buf)
+      crypt_buf= static_cast<byte*>
+      (aligned_malloc(srv_page_size, srv_page_size));
+  }
+};
+
+/** The common buffer control block structure
+for compressed and uncompressed frames */
+
+class buf_pool_t;
+
+class buf_page_t
+{
+  friend buf_pool_t;
+  friend buf_block_t;
+  /** @name General fields */
+  /* @{ */
+
+public: // FIXME: fix fil_iterate()
+  /** Page id. Protected by buf_pool.hash_lock_get(id) when
+  the page is in buf_pool.page_hash. */
+  page_id_t id_;
+private:
+  /** Count of how manyfold this block is currently bufferfixed. */
+  Atomic_counter<uint32_t> buf_fix_count_;
+
+  /** log sequence number of the START of the log entry written of the
+  oldest modification to this block which has not yet been written
+  to the data file;
+
+  0 if no modifications are pending;
+  1 if no modifications are pending, but the block is in buf_pool.flush_list;
+  2 if modifications are pending, but the block is not in buf_pool.flush_list
+  (because id().space() is the temporary tablespace). */
+  Atomic_relaxed<lsn_t> oldest_modification_;
+
+  /** type of pending I/O operation; protected by buf_pool.mutex
+  if in_LRU_list */
+  Atomic_relaxed<buf_io_fix> io_fix_;
+  /** Block state. @see in_file().
+  State transitions between in_file() states and to
+  BUF_BLOCK_REMOVE_HASH are protected by buf_pool.hash_lock_get(id)
+  when the block is in buf_pool.page_hash.
+  Other transitions when in_LRU_list are protected by buf_pool.mutex. */
+  buf_page_state state_;
+
+public:
+  /** buf_pool.page_hash link; protected by buf_pool.hash_lock_get(id) */
+  buf_page_t *hash;
+  /* @} */
+	page_zip_des_t	zip;		/*!< compressed page; zip.data
+					(but not the data it points to) is
+					also protected by buf_pool.mutex;
+					state == BUF_BLOCK_ZIP_PAGE and
+					zip.data == NULL means an active
+					buf_pool.watch */
+
+	buf_tmp_buffer_t* slot;		/*!< Slot for temporary memory
+					used for encryption/compression
+					or NULL */
+#ifdef UNIV_DEBUG
+  /** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */
+  bool in_zip_hash;
+  /** whether this->LRU is in buf_pool.LRU (in_file() holds);
+  protected by buf_pool.mutex */
+  bool in_LRU_list;
+  /** whether this is in buf_pool.page_hash (in_file() holds);
+  protected by buf_pool.mutex */
+  bool in_page_hash;
+  /** whether this->list is in buf_pool.free (state() == BUF_BLOCK_NOT_USED);
+  protected by buf_pool.flush_list_mutex */
+  bool in_free_list;
+#endif /* UNIV_DEBUG */
+  /** list member in one of the lists of buf_pool; protected by
+  buf_pool.mutex or buf_pool.flush_list_mutex
+
+  state() == BUF_BLOCK_NOT_USED: buf_pool.free or buf_pool.withdraw
+
+  in_file() && oldest_modification():
+  buf_pool.flush_list (protected by buf_pool.flush_list_mutex)
+
+  The contents is undefined if in_file() && !oldest_modification(),
+  or if state() is BUF_BLOCK_MEMORY or BUF_BLOCK_REMOVE_HASH. */
+  UT_LIST_NODE_T(buf_page_t) list;
+
+	/** @name LRU replacement algorithm fields.
+	Protected by buf_pool.mutex. */
+	/* @{ */
+
+	UT_LIST_NODE_T(buf_page_t) LRU;
+					/*!< node of the LRU list */
+	unsigned	old:1;		/*!< TRUE if the block is in the old
+					blocks in buf_pool.LRU_old */
+	unsigned	freed_page_clock:31;/*!< the value of
+					buf_pool.freed_page_clock
+					when this block was the last
+					time put to the head of the
+					LRU list; a thread is allowed
+					to read this for heuristic
+					purposes without holding any
+					mutex or latch */
+	/* @} */
+	Atomic_counter<unsigned> access_time;	/*!< time of first access, or
+					0 if the block was never accessed
+					in the buffer pool.
+
+					For state==BUF_BLOCK_MEMORY
+					blocks, this field can be repurposed
+					for something else.
+
+					When this field counts log records
+					and bytes allocated for recv_sys.pages,
+					the field is protected by
+					recv_sys_t::mutex. */
+  /** Change buffer entries for the page exist.
+  Protected by io_fix()==BUF_IO_READ or by buf_block_t::lock. */
+  bool ibuf_exist;
+
+  /** Block initialization status. Can be modified while holding io_fix()
+  or buf_block_t::lock X-latch */
+  enum {
+    /** the page was read normally and should be flushed normally */
+    NORMAL = 0,
+    /** the page was (re)initialized, and the doublewrite buffer can be
+    skipped on the next flush */
+    INIT_ON_FLUSH,
+    /** the page was freed and need to be flushed.
+    For page_compressed, page flush will punch a hole to free space.
+    Else if innodb_immediate_scrub_data_uncompressed, the page will
+    be overwritten with zeroes. */
+    FREED
+  } status;
+
+  buf_page_t() : id_(0)
+  {
+    static_assert(BUF_BLOCK_NOT_USED == 0, "compatibility");
+    memset((void*) this, 0, sizeof *this);
+  }
+
+  /** Initialize some fields */
+  void init()
+  {
+    io_fix_= BUF_IO_NONE;
+    buf_fix_count_= 0;
+    old= 0;
+    freed_page_clock= 0;
+    access_time= 0;
+    oldest_modification_= 0;
+    slot= nullptr;
+    ibuf_exist= false;
+    status= NORMAL;
+    ut_d(in_zip_hash= false);
+    ut_d(in_free_list= false);
+    ut_d(in_LRU_list= false);
+    ut_d(in_page_hash= false);
+    HASH_INVALIDATE(this, hash);
+  }
+
+  /** Initialize some more fields */
+  void init(buf_page_state state, page_id_t id, uint32_t buf_fix_count= 0)
+  {
+    init();
+    state_= state;
+    id_= id;
+    buf_fix_count_= buf_fix_count;
+  }
+
+  /** Initialize some more fields */
+  void init(page_id_t id, uint32_t buf_fix_count= 0)
+  {
+    init();
+    id_= id;
+    buf_fix_count_= buf_fix_count;
+  }
+
+public:
+  const page_id_t &id() const { return id_; }
+  buf_page_state state() const { return state_; }
+  uint32_t buf_fix_count() const { return buf_fix_count_; }
+  buf_io_fix io_fix() const { return io_fix_; }
+  void io_unfix()
+  {
+    ut_d(const auto old_io_fix= io_fix());
+    ut_ad(old_io_fix == BUF_IO_READ || old_io_fix == BUF_IO_PIN);
+    io_fix_= BUF_IO_NONE;
+  }
+
+  /** @return if this belongs to buf_pool.unzip_LRU */
+  bool belongs_to_unzip_LRU() const
+  {
+    return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
+  }
+
+  inline void add_buf_fix_count(uint32_t count);
+  inline void set_buf_fix_count(uint32_t count);
+  inline void set_state(buf_page_state state);
+  inline void set_io_fix(buf_io_fix io_fix);
+  inline void set_corrupt_id();
+
+  /** @return the log sequence number of the oldest pending modification
+  @retval 0 if the block is being removed from (or not in) buf_pool.flush_list
+  @retval 1 if the block is in buf_pool.flush_list but not modified
+  @retval 2 if the block belongs to the temporary tablespace and
+  has unwritten changes */
+  lsn_t oldest_modification() const { return oldest_modification_; }
+  /** @return the log sequence number of the oldest pending modification,
+  @retval 0 if the block is definitely not in buf_pool.flush_list
+  @retval 1 if the block is in buf_pool.flush_list but not modified
+  @retval 2 if the block belongs to the temporary tablespace and
+  has unwritten changes */
+  lsn_t oldest_modification_acquire() const
+  { return oldest_modification_.load(std::memory_order_acquire); }
+  /** Set oldest_modification when adding to buf_pool.flush_list */
+  inline void set_oldest_modification(lsn_t lsn);
+  /** Clear oldest_modification after removing from buf_pool.flush_list */
+  inline void clear_oldest_modification();
+  /** Note that a block is no longer dirty, while not removing
+  it from buf_pool.flush_list */
+  inline void clear_oldest_modification(bool temporary);
+
+  /** Notify that a page in a temporary tablespace has been modified. */
+  void set_temp_modified()
+  {
+    ut_ad(fsp_is_system_temporary(id().space()));
+    ut_ad(state() == BUF_BLOCK_FILE_PAGE);
+    ut_ad(!oldest_modification());
+    oldest_modification_= 2;
+  }
+
+  /** Prepare to release a file page to buf_pool.free. */
+  void free_file_page()
+  {
+    ut_ad(state() == BUF_BLOCK_REMOVE_HASH);
+    /* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
+    ut_d(oldest_modification_= 0;)
+    set_corrupt_id();
+    ut_d(set_state(BUF_BLOCK_MEMORY));
+  }
+
+  void fix() { buf_fix_count_++; }
+  uint32_t unfix()
+  {
+    uint32_t count= buf_fix_count_--;
+    ut_ad(count != 0);
+    return count - 1;
+  }
+
+  /** @return the physical size, in bytes */
+  ulint physical_size() const
+  {
+    return zip.ssize ? (UNIV_ZIP_SIZE_MIN >> 1) << zip.ssize : srv_page_size;
+  }
+
+  /** @return the ROW_FORMAT=COMPRESSED physical size, in bytes
+  @retval 0 if not compressed */
+  ulint zip_size() const
+  {
+    return zip.ssize ? (UNIV_ZIP_SIZE_MIN >> 1) << zip.ssize : 0;
+  }
+
+  /** @return the byte offset of the page within a file */
+  os_offset_t physical_offset() const
+  {
+    os_offset_t o= id().page_no();
+    return zip.ssize
+      ? o << (zip.ssize + (UNIV_ZIP_SIZE_SHIFT_MIN - 1))
+      : o << srv_page_size_shift;
+  }
+
+  /** @return whether the block is mapped to a data file */
+  bool in_file() const
+  {
+    switch (state_) {
+    case BUF_BLOCK_ZIP_PAGE:
+    case BUF_BLOCK_FILE_PAGE:
+      return true;
+    case BUF_BLOCK_NOT_USED:
+    case BUF_BLOCK_MEMORY:
+    case BUF_BLOCK_REMOVE_HASH:
+      return false;
+    }
+
+    ut_error;
+    return false;
+  }
+
+  /** @return whether the block is modified and ready for flushing */
+  inline bool ready_for_flush() const;
+  /** @return whether the state can be changed to BUF_BLOCK_NOT_USED */
+  bool ready_for_replace() const
+  { return !oldest_modification() && can_relocate(); }
+  /** @return whether the block can be relocated in memory.
+  The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
+  inline bool can_relocate() const;
+  /** @return whether the block has been flagged old in buf_pool.LRU */
+  inline bool is_old() const;
+  /** Set whether a block is old in buf_pool.LRU */
+  inline void set_old(bool old);
+  /** Flag a page accessed in buf_pool
+  @return whether this is not the first access */
+  bool set_accessed()
+  {
+    if (is_accessed()) return true;
+    access_time= static_cast<uint32_t>(ut_time_ms());
+    return false;
+  }
+  /** @return ut_time_ms() at the time of first access of a block in buf_pool
+  @retval 0 if not accessed */
+  unsigned is_accessed() const { ut_ad(in_file()); return access_time; }
+};
+
+/** The buffer control block structure */
+
+struct buf_block_t{
+
+	/** @name General fields */
+	/* @{ */
+
+	buf_page_t	page;		/*!< page information; this must
+					be the first field, so that
+					buf_pool.page_hash can point
+					to buf_page_t or buf_block_t */
+	byte*		frame;		/*!< pointer to buffer frame which
+					is of size srv_page_size, and
+					aligned to an address divisible by
+					srv_page_size */
+	rw_lock_t	lock;		/*!< read-write lock of the buffer
+					frame */
+#ifdef UNIV_DEBUG
+  /** whether page.list is in buf_pool.withdraw
+  ((state() == BUF_BLOCK_NOT_USED)) and the buffer pool is being shrunk;
+  protected by buf_pool.mutex */
+  bool in_withdraw_list;
+  /** whether unzip_LRU is in buf_pool.unzip_LRU
+  (state() == BUF_BLOCK_FILE_PAGE and zip.data != nullptr);
+  protected by buf_pool.mutex */
+  bool in_unzip_LRU_list;
+#endif
+	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
+					/*!< node of the decompressed LRU list;
+					a block is in the unzip_LRU list
+					if page.state() == BUF_BLOCK_FILE_PAGE
+					and page.zip.data != NULL */
+	/* @} */
+	/** @name Optimistic search field */
+	/* @{ */
+
+	ib_uint64_t	modify_clock;	/*!< this clock is incremented every
+					time a pointer to a record on the
+					page may become obsolete; this is
+					used in the optimistic cursor
+					positioning: if the modify clock has
+					not changed, we know that the pointer
+					is still valid; this field may be
+					changed if the thread (1) owns the
+					pool mutex and the page is not
+					bufferfixed, or (2) the thread has an
+					x-latch on the block */
+	/* @} */
+#ifdef BTR_CUR_HASH_ADAPT
+	/** @name Hash search fields (unprotected)
+	NOTE that these fields are NOT protected by any semaphore! */
+	/* @{ */
+
+	volatile uint16_t n_bytes;	/*!< recommended prefix length for hash
+					search: number of bytes in
+					an incomplete last field */
+	volatile uint16_t n_fields;	/*!< recommended prefix length for hash
+					search: number of full fields */
+	uint16_t	n_hash_helps;	/*!< counter which controls building
+					of a new hash index for the page */
+	volatile bool	left_side;	/*!< true or false, depending on
+					whether the leftmost record of several
+					records with the same prefix should be
+					indexed in the hash index */
+	/* @} */
+
+	/** @name Hash search fields
+	These 5 fields may only be modified when:
+	we are holding the appropriate x-latch in btr_search_latches[], and
+	one of the following holds:
+	(1) the block state is BUF_BLOCK_FILE_PAGE, and
+	we are holding an s-latch or x-latch on buf_block_t::lock, or
+	(2) buf_block_t::buf_fix_count == 0, or
+	(3) the block state is BUF_BLOCK_REMOVE_HASH.
+
+	An exception to this is when we init or create a page
+	in the buffer pool in buf0buf.cc.
+
+	Another exception for buf_pool_t::clear_hash_index() is that
+	assigning block->index = NULL (and block->n_pointers = 0)
+	is allowed whenever btr_search_own_all(RW_LOCK_X).
+
+	Another exception is that ha_insert_for_fold() may
+	decrement n_pointers without holding the appropriate latch
+	in btr_search_latches[]. Thus, n_pointers must be
+	protected by atomic memory access.
+
+	This implies that the fields may be read without race
+	condition whenever any of the following hold:
+	- the btr_search_latches[] s-latch or x-latch is being held, or
+	- the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
+	and holding some latch prevents the state from changing to that.
+
+	Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
+	is prone to race conditions while buf_pool_t::clear_hash_index() is
+	executing (the adaptive hash index is being disabled). Such use
+	is explicitly commented. */
+
+	/* @{ */
+
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	Atomic_counter<ulint>
+			n_pointers;	/*!< used in debugging: the number of
+					pointers in the adaptive hash index
+					pointing to this frame;
+					protected by atomic memory access
+					or btr_search_own_all(). */
+#  define assert_block_ahi_empty(block)					\
+	ut_a((block)->n_pointers == 0)
+#  define assert_block_ahi_empty_on_init(block) do {			\
+	MEM_MAKE_DEFINED(&(block)->n_pointers, sizeof (block)->n_pointers); \
+	assert_block_ahi_empty(block);					\
+} while (0)
+#  define assert_block_ahi_valid(block)					\
+	ut_a((block)->index || (block)->n_pointers == 0)
+# else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+#  define assert_block_ahi_empty(block) /* nothing */
+#  define assert_block_ahi_empty_on_init(block) /* nothing */
+#  define assert_block_ahi_valid(block) /* nothing */
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	unsigned	curr_n_fields:10;/*!< prefix length for hash indexing:
+					number of full fields */
+	unsigned	curr_n_bytes:15;/*!< number of bytes in hash
+					indexing */
+	unsigned	curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
+	dict_index_t*	index;		/*!< Index for which the
+					adaptive hash index has been
+					created, or NULL if the page
+					does not exist in the
+					index. Note that it does not
+					guarantee that the index is
+					complete, though: there may
+					have been hash collisions,
+					record deletions, etc. */
+	/* @} */
+#else /* BTR_CUR_HASH_ADAPT */
+# define assert_block_ahi_empty(block) /* nothing */
+# define assert_block_ahi_empty_on_init(block) /* nothing */
+# define assert_block_ahi_valid(block) /* nothing */
+#endif /* BTR_CUR_HASH_ADAPT */
+# ifdef UNIV_DEBUG
+	/** @name Debug fields */
+	/* @{ */
+	rw_lock_t*	debug_latch;	/*!< in the debug version, each thread
+					which bufferfixes the block acquires
+					an s-latch here; so we can use the
+					debug utilities in sync0rw */
+	/* @} */
+# endif
+  void fix() { page.fix(); }
+  uint32_t unfix()
+  {
+    ut_ad(page.buf_fix_count() || page.io_fix() != BUF_IO_NONE ||
+          page.state() == BUF_BLOCK_ZIP_PAGE ||
+          !rw_lock_own_flagged(&lock, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S |
+                               RW_LOCK_FLAG_SX));
+    return page.unfix();
+  }
+
+  /** @return the physical size, in bytes */
+  ulint physical_size() const { return page.physical_size(); }
+
+  /** @return the ROW_FORMAT=COMPRESSED physical size, in bytes
+  @retval 0 if not compressed */
+  ulint zip_size() const { return page.zip_size(); }
+
+  /** Initialize the block.
+  @param page_id  page identifier
+  @param zip_size ROW_FORMAT=COMPRESSED page size, or 0
+  @param fix      initial buf_fix_count() */
+  void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
+};
+
+/**********************************************************************//**
+Compute the hash fold value for blocks in buf_pool.zip_hash. */
+/* @{ */
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
+#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
+/* @} */
+
+/** A "Hazard Pointer" class used to iterate over page lists
+inside the buffer pool. A hazard pointer is a buf_page_t pointer
+which we intend to iterate over next and we want it remain valid
+even after we release the buffer pool mutex. */
+class HazardPointer
+{
+public:
+  virtual ~HazardPointer() {}
+
+  /** @return current value */
+  buf_page_t *get() const { mysql_mutex_assert_owner(m_mutex); return m_hp; }
+
+  /** Set current value
+  @param bpage buffer block to be set as hp */
+  void set(buf_page_t *bpage)
+  {
+    mysql_mutex_assert_owner(m_mutex);
+    ut_ad(!bpage || bpage->in_file());
+    m_hp= bpage;
+  }
+
+  /** Checks if a bpage is the hp
+  @param bpage  buffer block to be compared
+  @return true if it is hp */
+  bool is_hp(const buf_page_t *bpage) const
+  { mysql_mutex_assert_owner(m_mutex); return bpage == m_hp; }
+
+  /** Adjust the value of hp. This happens when some
+  other thread working on the same list attempts to
+  remove the hp from the list. */
+  virtual void adjust(const buf_page_t*) = 0;
+
+#ifdef UNIV_DEBUG
+  /** mutex that protects access to the m_hp. */
+  const mysql_mutex_t *m_mutex= nullptr;
+#endif /* UNIV_DEBUG */
+
+protected:
+  /** hazard pointer */
+  buf_page_t *m_hp= nullptr;
+};
+
+/** Class implementing buf_pool.flush_list hazard pointer */
+class FlushHp : public HazardPointer
+{
+public:
+  ~FlushHp() override {}
+
+  /** Adjust the value of hp. This happens when some
+  other thread working on the same list attempts to
+  remove the hp from the list.
+  @param bpage  buffer block to be compared */
+  void adjust(const buf_page_t *bpage) override
+  {
+    ut_ad(bpage != NULL);
+
+    /* We only support reverse traversal for now. */
+    if (is_hp(bpage))
+      m_hp= UT_LIST_GET_PREV(list, m_hp);
+
+    ut_ad(!m_hp || m_hp->oldest_modification());
+  }
+};
+
+/** Class implementing buf_pool.LRU hazard pointer */
+class LRUHp : public HazardPointer {
+public:
+  ~LRUHp() override {}
+
+  /** Adjust the value of hp. This happens when some
+  other thread working on the same list attempts to
+  remove the hp from the list.
+  @param bpage  buffer block to be compared */
+  void adjust(const buf_page_t *bpage) override
+  {
+    ut_ad(bpage);
+    /** We only support reverse traversal for now. */
+    if (is_hp(bpage))
+      m_hp= UT_LIST_GET_PREV(LRU, m_hp);
+
+    ut_ad(!m_hp || m_hp->in_LRU_list);
+  }
+};
+
+/** Special purpose iterators to be used when scanning the LRU list.
+The idea is that when one thread finishes the scan it leaves the
+itr in that position and the other thread can start scan from
+there */
+class LRUItr : public LRUHp {
+public:
+  LRUItr() : LRUHp() {}
+  ~LRUItr() override {}
+
+  /** Select from where to start a scan. If we have scanned
+  too deep into the LRU list it resets the value to the tail
+  of the LRU list.
+  @return buf_page_t from where to start scan. */
+  inline buf_page_t *start();
+};
+
+/** Struct that is embedded in the free zip blocks */
+struct buf_buddy_free_t {
+	union {
+		ulint	size;	/*!< size of the block */
+		byte	bytes[FIL_PAGE_DATA];
+				/*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
+				== BUF_BUDDY_FREE_STAMP denotes a free
+				block. If the space_id field of buddy
+				block != BUF_BUDDY_FREE_STAMP, the block
+				is not in any zip_free list. If the
+				space_id is BUF_BUDDY_FREE_STAMP then
+				stamp[0] will contain the
+				buddy block size. */
+	} stamp;
+
+	buf_page_t	bpage;	/*!< Embedded bpage descriptor */
+	UT_LIST_NODE_T(buf_buddy_free_t) list;
+				/*!< Node of zip_free list */
+};
+
+/** @brief The buffer pool statistics structure. */
+struct buf_pool_stat_t{
+	ulint	n_page_gets;	/*!< number of page gets performed;
+				also successful searches through
+				the adaptive hash index are
+				counted as page gets; this field
+				is NOT protected by the buffer
+				pool mutex */
+	ulint	n_pages_read;	/*!< number read operations */
+	ulint	n_pages_written;/*!< number write operations */
+	ulint	n_pages_created;/*!< number of pages created
+				in the pool with no read */
+	ulint	n_ra_pages_read_rnd;/*!< number of pages read in
+				as part of random read ahead */
+	ulint	n_ra_pages_read;/*!< number of pages read in
+				as part of read ahead */
+	ulint	n_ra_pages_evicted;/*!< number of read ahead
+				pages that are evicted without
+				being accessed */
+	ulint	n_pages_made_young; /*!< number of pages made young, in
+				buf_page_make_young() */
+	ulint	n_pages_not_made_young; /*!< number of pages not made
+				young because the first access
+				was not long enough ago, in
+				buf_page_peek_if_too_old() */
+	/** number of waits for eviction; writes protected by buf_pool.mutex */
+	ulint	LRU_waits;
+	ulint	LRU_bytes;	/*!< LRU size in bytes */
+	ulint	flush_list_bytes;/*!< flush_list size in bytes */
+};
+
+/** Statistics of buddy blocks of a given size. */
+struct buf_buddy_stat_t {
+	/** Number of blocks allocated from the buddy system. */
+	ulint		used;
+	/** Number of blocks relocated by the buddy system. */
+	ib_uint64_t	relocated;
+	/** Total duration of block relocations, in microseconds. */
+	ib_uint64_t	relocated_usec;
+};
+
+/** The buffer pool */
+class buf_pool_t
+{
+  /** A chunk of buffers */
+  struct chunk_t
+  {
+    /** number of elements in blocks[] */
+    size_t size;
+    /** memory allocated for the page frames */
+    unsigned char *mem;
+    /** descriptor of mem */
+    ut_new_pfx_t mem_pfx;
+    /** array of buffer control blocks */
+    buf_block_t *blocks;
+
+    /** Map of first page frame address to chunks[] */
+    using map= std::map<const void*, chunk_t*, std::less<const void*>,
+                        ut_allocator<std::pair<const void* const,chunk_t*>>>;
+    /** Chunk map that may be under construction by buf_resize_thread() */
+    static map *map_reg;
+    /** Current chunk map for lookup only */
+    static map *map_ref;
+
+    /** @return the memory size bytes. */
+    size_t mem_size() const { return mem_pfx.m_size; }
+
+    /** Register the chunk */
+    void reg() { map_reg->emplace(map::value_type(blocks->frame, this)); }
+
+    /** Allocate a chunk of buffer frames.
+    @param bytes    requested size
+    @return whether the allocation succeeded */
+    inline bool create(size_t bytes);
+
+#ifdef UNIV_DEBUG
+    /** Find a block that points to a ROW_FORMAT=COMPRESSED page
+    @param data  pointer to the start of a ROW_FORMAT=COMPRESSED page frame
+    @return the block
+    @retval nullptr  if not found */
+    const buf_block_t *contains_zip(const void *data) const
+    {
+      const buf_block_t *block= blocks;
+      for (auto i= size; i--; block++)
+        if (block->page.zip.data == data)
+          return block;
+      return nullptr;
+    }
+
+    /** Check that all blocks are in a replaceable state.
+    @return address of a non-free block
+    @retval nullptr if all freed */
+    inline const buf_block_t *not_freed() const;
+#endif /* UNIV_DEBUG */
+  };
+
+  /** Withdraw blocks from the buffer pool until meeting withdraw_target.
+  @return whether retry is needed */
+  inline bool withdraw_blocks();
+
+  /** Determine if a pointer belongs to a buf_block_t. It can be a pointer to
+  the buf_block_t itself or a member of it.
+  @param ptr    a pointer that will not be dereferenced
+  @return whether the ptr belongs to a buf_block_t struct */
+  bool is_block_field(const void *ptr) const
+  {
+    const chunk_t *chunk= chunks;
+    const chunk_t *const echunk= chunk + ut_min(n_chunks, n_chunks_new);
+
+    /* TODO: protect chunks with a mutex (the older pointer will
+    currently remain during resize()) */
+    for (; chunk < echunk; chunk++)
+      if (ptr >= reinterpret_cast<const void*>(chunk->blocks) &&
+          ptr < reinterpret_cast<const void*>(chunk->blocks + chunk->size))
+        return true;
+    return false;
+  }
+
+  /** Try to reallocate a control block.
+  @param block  control block to reallocate
+  @return whether the reallocation succeeded */
+  inline bool realloc(buf_block_t *block);
+
+public:
+  bool is_initialised() const { return chunks != nullptr; }
+
+  /** Create the buffer pool.
+  @return whether the creation failed */
+  bool create();
+
+  /** Clean up after successful create() */
+  void close();
+
+  /** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */
+  inline void resize();
+
+  /** @return whether resize() is in progress */
+  bool resize_in_progress() const
+  {
+    return UNIV_UNLIKELY(resizing.load(std::memory_order_relaxed));
+  }
+
+  /** @return the current size in blocks */
+  size_t get_n_pages() const
+  {
+    ut_ad(is_initialised());
+    size_t size= 0;
+    for (auto j= n_chunks; j--; )
+      size+= chunks[j].size;
+    return size;
+  }
+
+  /** Determine whether a frame is intended to be withdrawn during resize().
+  @param ptr    pointer within a buf_block_t::frame
+  @return whether the frame will be withdrawn */
+  bool will_be_withdrawn(const byte *ptr) const
+  {
+    ut_ad(curr_size < old_size);
+#ifdef SAFE_MUTEX
+    if (resizing.load(std::memory_order_relaxed))
+      mysql_mutex_assert_owner(&mutex);
+#endif /* SAFE_MUTEX */
+
+    for (const chunk_t *chunk= chunks + n_chunks_new,
+         * const echunk= chunks + n_chunks;
+         chunk != echunk; chunk++)
+      if (ptr >= chunk->blocks->frame &&
+          ptr < (chunk->blocks + chunk->size - 1)->frame + srv_page_size)
+        return true;
+    return false;
+  }
+
+  /** Determine whether a block is intended to be withdrawn during resize().
+  @param bpage  buffer pool block
+  @return whether the frame will be withdrawn */
+  bool will_be_withdrawn(const buf_page_t &bpage) const
+  {
+    ut_ad(curr_size < old_size);
+#ifdef SAFE_MUTEX
+    if (resizing.load(std::memory_order_relaxed))
+      mysql_mutex_assert_owner(&mutex);
+#endif /* SAFE_MUTEX */
+
+    for (const chunk_t *chunk= chunks + n_chunks_new,
+         * const echunk= chunks + n_chunks;
+         chunk != echunk; chunk++)
+      if (&bpage >= &chunk->blocks->page &&
+          &bpage < &chunk->blocks[chunk->size].page)
+        return true;
+    return false;
+  }
+
+  /** Release and evict a corrupted page.
+  @param bpage    page that was being read */
+  ATTRIBUTE_COLD void corrupted_evict(buf_page_t *bpage);
+
+  /** Release a memory block to the buffer pool. */
+  ATTRIBUTE_COLD void free_block(buf_block_t *block);
+
+#ifdef UNIV_DEBUG
+  /** Find a block that points to a ROW_FORMAT=COMPRESSED page
+  @param data  pointer to the start of a ROW_FORMAT=COMPRESSED page frame
+  @return the block
+  @retval nullptr  if not found */
+  const buf_block_t *contains_zip(const void *data) const
+  {
+    mysql_mutex_assert_owner(&mutex);
+    for (const chunk_t *chunk= chunks, * const end= chunks + n_chunks;
+         chunk != end; chunk++)
+      if (const buf_block_t *block= chunk->contains_zip(data))
+        return block;
+    return nullptr;
+  }
+
+  /** Assert that all buffer pool pages are in a replaceable state */
+  void assert_all_freed();
+#endif /* UNIV_DEBUG */
+
+#ifdef BTR_CUR_HASH_ADAPT
+  /** Clear the adaptive hash index on all pages in the buffer pool. */
+  inline void clear_hash_index();
+
+  /** Get a buffer block from an adaptive hash index pointer.
+  This function does not return if the block is not identified.
+  @param ptr  pointer to within a page frame
+  @return pointer to block, never NULL */
+  inline buf_block_t *block_from_ahi(const byte *ptr) const;
+#endif /* BTR_CUR_HASH_ADAPT */
+
+  bool is_block_lock(const rw_lock_t *l) const
+  { return is_block_field(static_cast<const void*>(l)); }
+
+  /**
+  @return the smallest oldest_modification lsn for any page
+  @retval empty_lsn if all modified persistent pages have been flushed */
+  lsn_t get_oldest_modification(lsn_t empty_lsn)
+  {
+    mysql_mutex_assert_owner(&flush_list_mutex);
+    while (buf_page_t *bpage= UT_LIST_GET_LAST(flush_list))
+    {
+      ut_ad(!fsp_is_system_temporary(bpage->id().space()));
+      lsn_t lsn= bpage->oldest_modification();
+      if (lsn != 1)
+      {
+        ut_ad(lsn > 2);
+        return lsn;
+      }
+      delete_from_flush_list(bpage);
+    }
+    return empty_lsn;
+  }
+
+  /** Determine if a buffer block was created by chunk_t::create().
+  @param block  block descriptor (not dereferenced)
+  @return whether block has been created by chunk_t::create() */
+  bool is_uncompressed(const buf_block_t *block) const
+  {
+    return is_block_field(reinterpret_cast<const void*>(block));
+  }
+
+  /** Get the page_hash latch for a page */
+  page_hash_latch *hash_lock_get(const page_id_t id) const
+  {
+    return page_hash.lock_get(id.fold());
+  }
+
+  /** Look up a block descriptor.
+  @param id    page identifier
+  @param fold  id.fold()
+  @return block descriptor, possibly in watch[]
+  @retval nullptr  if not found*/
+  buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold)
+  {
+    ut_ad(id.fold() == fold);
+#ifdef SAFE_MUTEX
+    DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
+                page_hash.lock_get(fold)->is_locked());
+#endif /* SAFE_MUTEX */
+    buf_page_t *bpage;
+    /* Look for the page in the hash table */
+    HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage,
+                ut_ad(bpage->in_page_hash), id == bpage->id());
+    return bpage;
+  }
+private:
+  /** Look up a block descriptor.
+  @tparam exclusive  whether the latch is to be acquired exclusively
+  @tparam watch      whether to allow watch_is_sentinel()
+  @param page_id     page identifier
+  @param fold        page_id.fold()
+  @param hash_lock   pointer to the acquired latch (to be released by caller)
+  @return pointer to the block
+  @retval nullptr  if no block was found; !lock || !*lock will also hold */
+  template<bool exclusive,bool watch>
+  buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
+                                   page_hash_latch **hash_lock)
+  {
+    ut_ad(hash_lock || !exclusive);
+    page_hash_latch *latch= page_hash.lock<exclusive>(fold);
+    buf_page_t *bpage= page_hash_get_low(page_id, fold);
+    if (!bpage || watch_is_sentinel(*bpage))
+    {
+      latch->release<exclusive>();
+      if (hash_lock)
+        *hash_lock= nullptr;
+      return watch ? bpage : nullptr;
+    }
+
+    ut_ad(bpage->in_file());
+    ut_ad(page_id == bpage->id());
+
+    if (hash_lock)
+      *hash_lock= latch; /* to be released by the caller */
+    else
+      latch->release<exclusive>();
+    return bpage;
+  }
+public:
+  /** Look up a block descriptor.
+  @tparam exclusive  whether the latch is to be acquired exclusively
+  @param page_id     page identifier
+  @param fold        page_id.fold()
+  @param hash_lock   pointer to the acquired latch (to be released by caller)
+  @return pointer to the block
+  @retval nullptr  if no block was found; !lock || !*lock will also hold */
+  template<bool exclusive>
+  buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
+                                   page_hash_latch **hash_lock)
+  { return page_hash_get_locked<exclusive,false>(page_id, fold, hash_lock); }
+
+  /** @return whether the buffer pool contains a page
+  @tparam watch      whether to allow watch_is_sentinel()
+  @param page_id     page identifier */
+  template<bool watch= false>
+  bool page_hash_contains(const page_id_t page_id)
+  {
+    return page_hash_get_locked<false,watch>(page_id, page_id.fold(), nullptr);
+  }
+
+  /** Determine if a block is a sentinel for a buffer pool watch.
+  @param bpage page descriptor
+  @return whether bpage a sentinel for a buffer pool watch */
+  bool watch_is_sentinel(const buf_page_t &bpage)
+  {
+#ifdef SAFE_MUTEX
+    DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
+                hash_lock_get(bpage.id())->is_locked());
+#endif /* SAFE_MUTEX */
+    ut_ad(bpage.in_file());
+
+    if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)])
+    {
+      ut_ad(bpage.state() != BUF_BLOCK_ZIP_PAGE || bpage.zip.data);
+      return false;
+    }
+
+    ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE);
+    ut_ad(!bpage.in_zip_hash);
+    ut_ad(!bpage.zip.data);
+    return true;
+  }
+
+  /** Check if a watched page has been read.
+  This may only be called after !watch_set() and before invoking watch_unset().
+  @param id   page identifier
+  @return whether the page was read to the buffer pool */
+  bool watch_occurred(const page_id_t id)
+  {
+    const ulint fold= id.fold();
+    page_hash_latch *hash_lock= page_hash.lock<false>(fold);
+    /* The page must exist because watch_set() increments buf_fix_count. */
+    buf_page_t *bpage= page_hash_get_low(id, fold);
+    const bool is_sentinel= watch_is_sentinel(*bpage);
+    hash_lock->read_unlock();
+    return !is_sentinel;
+  }
+
+  /** Register a watch for a page identifier. The caller must hold an
+  exclusive page hash latch. The *hash_lock may be released,
+  relocated, and reacquired.
+  @param id         page identifier
+  @param hash_lock  exclusively held page_hash latch
+  @return a buffer pool block corresponding to id
+  @retval nullptr   if the block was not present, and a watch was installed */
+  inline buf_page_t *watch_set(const page_id_t id,
+                               page_hash_latch **hash_lock);
+
+  /** Stop watching whether a page has been read in.
+  watch_set(id) must have returned nullptr before.
+  @param id   page identifier */
+  void watch_unset(const page_id_t id)
+  {
+    const ulint fold= id.fold();
+    page_hash_latch *hash_lock= page_hash.lock<true>(fold);
+    /* The page must exist because watch_set() increments buf_fix_count. */
+    buf_page_t *watch= page_hash_get_low(id, fold);
+    if (watch->unfix() == 0 && watch_is_sentinel(*watch))
+    {
+      /* The following is based on watch_remove(). */
+      ut_ad(watch->in_page_hash);
+      ut_d(watch->in_page_hash= false);
+      HASH_DELETE(buf_page_t, hash, &page_hash, fold, watch);
+      hash_lock->write_unlock();
+      // Now that the watch is detached from page_hash, release it to watch[].
+      mysql_mutex_lock(&mutex);
+      /* It is possible that watch_remove() already removed the watch. */
+      if (watch->id_ == id)
+      {
+        ut_ad(!watch->buf_fix_count());
+        ut_ad(watch->state() == BUF_BLOCK_ZIP_PAGE);
+        watch->set_state(BUF_BLOCK_NOT_USED);
+      }
+      mysql_mutex_unlock(&mutex);
+    }
+    else
+      hash_lock->write_unlock();
+  }
+
+  /** Remove the sentinel block for the watch before replacing it with a
+  real block. watch_unset() or watch_occurred() will notice
+  that the block has been replaced with the real block.
+  @param watch   sentinel */
+  inline void watch_remove(buf_page_t *watch);
+
+  /** @return whether less than 1/4 of the buffer pool is available */
+  bool running_out() const
+  {
+    return !recv_recovery_is_on() &&
+      UNIV_UNLIKELY(UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) <
+                    std::min(curr_size, old_size) / 4);
+  }
+
+#ifdef UNIV_DEBUG
+  /** Validate the buffer pool. */
+  void validate();
+#endif /* UNIV_DEBUG */
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG
+  /** Write information of the buf_pool to the error log. */
+  void print();
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG */
+
+  /** Remove a block from the LRU list.
+  @return the predecessor in the LRU list */
+  buf_page_t *LRU_remove(buf_page_t *bpage)
+  {
+    mysql_mutex_assert_owner(&mutex);
+    ut_ad(bpage->in_LRU_list);
+    ut_ad(bpage->in_page_hash);
+    ut_ad(!bpage->in_zip_hash);
+    ut_ad(bpage->in_file());
+    lru_hp.adjust(bpage);
+    lru_scan_itr.adjust(bpage);
+    ut_d(bpage->in_LRU_list= false);
+    buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
+    UT_LIST_REMOVE(LRU, bpage);
+    return prev;
+  }
+
+  /** Number of pages to read ahead */
+  static constexpr uint32_t READ_AHEAD_PAGES= 64;
+
+  /** Buffer pool mutex */
+  MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
+  /** Number of pending LRU flush; protected by mutex. */
+  ulint n_flush_LRU_;
+  /** broadcast when n_flush_LRU reaches 0; protected by mutex */
+  pthread_cond_t done_flush_LRU;
+  /** Number of pending flush_list flush; protected by mutex */
+  ulint n_flush_list_;
+  /** broadcast when n_flush_list reaches 0; protected by mutex */
+  pthread_cond_t done_flush_list;
+
+  TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; }
+  TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; }
+
+	/** @name General fields */
+	/* @{ */
+	ulint		curr_pool_size;	/*!< Current pool size in bytes */
+	ulint		LRU_old_ratio;  /*!< Reserve this much of the buffer
+					pool for "old" blocks */
+#ifdef UNIV_DEBUG
+	ulint		buddy_n_frames; /*!< Number of frames allocated from
+					the buffer pool to the buddy system */
+	ulint		mutex_exit_forbidden; /*!< Forbid release mutex */
+#endif
+	ut_allocator<unsigned char>	allocator;	/*!< Allocator used for
+					allocating memory for the the "chunks"
+					member. */
+	volatile ulint	n_chunks;	/*!< number of buffer pool chunks */
+	volatile ulint	n_chunks_new;	/*!< new number of buffer pool chunks */
+	chunk_t*	chunks;		/*!< buffer pool chunks */
+	chunk_t*	chunks_old;	/*!< old buffer pool chunks to be freed
+					after resizing buffer pool */
+	/** current pool size in pages */
+	Atomic_counter<ulint> curr_size;
+	/** previous pool size in pages */
+	Atomic_counter<ulint> old_size;
+	/** read-ahead request size in pages */
+	Atomic_counter<uint32_t> read_ahead_area;
+
+  /** Hash table with singly-linked overflow lists. @see hash_table_t */
+  struct page_hash_table
+  {
+    /** Number of array[] elements per page_hash_latch.
+    Must be one less than a power of 2. */
+    static constexpr size_t ELEMENTS_PER_LATCH= CPU_LEVEL1_DCACHE_LINESIZE /
+      sizeof(void*) - 1;
+
+    /** number of payload elements in array[] */
+    Atomic_relaxed<ulint> n_cells;
+    /** the hash table, with pad(n_cells) elements, aligned to L1 cache size */
+    hash_cell_t *array;
+
+    /** Create the hash table.
+    @param n  the lower bound of n_cells */
+    void create(ulint n);
+
+    /** Free the hash table. */
+    void free() { aligned_free(array); array= nullptr; }
+
+    /** @return the index of an array element */
+    ulint calc_hash(ulint fold) const { return calc_hash(fold, n_cells); }
+    /** @return raw array index converted to padded index */
+    static ulint pad(ulint h) { return 1 + (h / ELEMENTS_PER_LATCH) + h; }
+  private:
+    /** @return the hash value before any ELEMENTS_PER_LATCH padding */
+    static ulint hash(ulint fold, ulint n) { return ut_hash_ulint(fold, n); }
+
+    /** @return the index of an array element */
+    static ulint calc_hash(ulint fold, ulint n_cells)
+    {
+      return pad(hash(fold, n_cells));
+    }
+    /** Get a page_hash latch. */
+    page_hash_latch *lock_get(ulint fold, ulint n) const
+    {
+      static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH),
+                    "must be one less than a power of 2");
+      return reinterpret_cast<page_hash_latch*>
+        (&array[calc_hash(fold, n) & ~ELEMENTS_PER_LATCH]);
+    }
+  public:
+    /** Get a page_hash latch. */
+    page_hash_latch *lock_get(ulint fold) const
+    { return lock_get(fold, n_cells); }
+
+    /** Acquire an array latch.
+    @tparam exclusive  whether the latch is to be acquired exclusively
+    @param fold    hash bucket key */
+    template<bool exclusive> page_hash_latch *lock(ulint fold)
+    {
+      page_hash_latch *latch= lock_get(fold, n_cells);
+      latch->acquire<exclusive>();
+      return latch;
+    }
+
+    /** Exclusively aqcuire all latches */
+    inline void write_lock_all();
+
+    /** Release all latches */
+    inline void write_unlock_all();
+  };
+
+  /** Hash table of file pages (buf_page_t::in_file() holds),
+  indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */
+  page_hash_table page_hash;
+
+  /** map of block->frame to buf_block_t blocks that belong
+  to buf_buddy_alloc(); protected by buf_pool.mutex */
+  hash_table_t zip_hash;
+	/** number of pending read operations */
+	Atomic_counter<ulint> n_pend_reads;
+	Atomic_counter<ulint>
+			n_pend_unzip;	/*!< number of pending decompressions */
+
+	time_t		last_printout_time;
+					/*!< when buf_print_io was last time
+					called */
+	buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
+					/*!< Statistics of buddy system,
+					indexed by block size */
+	buf_pool_stat_t	stat;		/*!< current statistics */
+	buf_pool_stat_t	old_stat;	/*!< old statistics */
+
+	/* @} */
+
+  /** @name Page flushing algorithm fields */
+  /* @{ */
+
+  /** mutex protecting flush_list, buf_page_t::set_oldest_modification()
+  and buf_page_t::list pointers when !oldest_modification() */
+  MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex;
+  /** "hazard pointer" for flush_list scans; protected by flush_list_mutex */
+  FlushHp flush_hp;
+  /** modified blocks (a subset of LRU) */
+  UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
+private:
+  /** whether the page cleaner needs wakeup from indefinite sleep */
+  bool page_cleaner_is_idle;
+  /** track server activity count for signaling idle flushing */
+  ulint last_activity_count;
+public:
+  /** signalled to wake up the page_cleaner; protected by flush_list_mutex */
+  pthread_cond_t do_flush_list;
+
+  /** @return whether the page cleaner must sleep due to being idle */
+  bool page_cleaner_idle() const
+  {
+    mysql_mutex_assert_owner(&flush_list_mutex);
+    return page_cleaner_is_idle;
+  }
+  /** Wake up the page cleaner if needed */
+  inline void page_cleaner_wakeup();
+
+  /** Register whether an explicit wakeup of the page cleaner is needed */
+  void page_cleaner_set_idle(bool deep_sleep)
+  {
+    mysql_mutex_assert_owner(&flush_list_mutex);
+    page_cleaner_is_idle= deep_sleep;
+  }
+
+  /** Update server last activity count */
+  void update_last_activity_count(ulint activity_count)
+  {
+    mysql_mutex_assert_owner(&flush_list_mutex);
+    last_activity_count= activity_count;
+  }
+
+  // n_flush_LRU() + n_flush_list()
+  // is approximately COUNT(io_fix()==BUF_IO_WRITE) in flush_list
+
+	unsigned	freed_page_clock;/*!< a sequence number used
+					to count the number of buffer
+					blocks removed from the end of
+					the LRU list; NOTE that this
+					counter may wrap around at 4
+					billion! A thread is allowed
+					to read this for heuristic
+					purposes without holding any
+					mutex or latch */
+	bool		try_LRU_scan;	/*!< Cleared when an LRU
+					scan for free block fails. This
+					flag is used to avoid repeated
+					scans of LRU list when we know
+					that there is no free block
+					available in the scan depth for
+					eviction. Set whenever
+					we flush a batch from the
+					buffer pool. Protected by the
+					buf_pool.mutex */
+	/* @} */
+
+	/** @name LRU replacement algorithm fields */
+	/* @{ */
+
+	UT_LIST_BASE_NODE_T(buf_page_t) free;
+					/*!< base node of the free
+					block list */
+  /** signaled each time when the free list grows; protected by mutex */
+  pthread_cond_t done_free;
+
+	UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
+					/*!< base node of the withdraw
+					block list. It is only used during
+					shrinking buffer pool size, not to
+					reuse the blocks will be removed */
+
+	ulint		withdraw_target;/*!< target length of withdraw
+					block list, when withdrawing */
+
+	/** "hazard pointer" used during scan of LRU while doing
+	LRU list batch.  Protected by buf_pool_t::mutex. */
+	LRUHp		lru_hp;
+
+	/** Iterator used to scan the LRU list when searching for
+	replacable victim. Protected by buf_pool_t::mutex. */
+	LRUItr		lru_scan_itr;
+
+	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
+					/*!< base node of the LRU list */
+
+	buf_page_t*	LRU_old;	/*!< pointer to the about
+					LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+					oldest blocks in the LRU list;
+					NULL if LRU length less than
+					BUF_LRU_OLD_MIN_LEN;
+					NOTE: when LRU_old != NULL, its length
+					should always equal LRU_old_len */
+	ulint		LRU_old_len;	/*!< length of the LRU list from
+					the block to which LRU_old points
+					onward, including that block;
+					see buf0lru.cc for the restrictions
+					on this value; 0 if LRU_old == NULL;
+					NOTE: LRU_old_len must be adjusted
+					whenever LRU_old shrinks or grows! */
+
+	UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
+					/*!< base node of the
+					unzip_LRU list */
+
+	/* @} */
+  /** free ROW_FORMAT=COMPRESSED page frames */
+  UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
+#if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
+# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
+#endif
+
+  /** Sentinels to detect if pages are read into the buffer pool while
+  a delete-buffering operation is pending. Protected by mutex. */
+  buf_page_t watch[innodb_purge_threads_MAX + 1];
+  /** Reserve a buffer. */
+  buf_tmp_buffer_t *io_buf_reserve() { return io_buf.reserve(); }
+
+  /** @return whether any I/O is pending */
+  bool any_io_pending() const
+  {
+    return n_pend_reads || n_flush_LRU() || n_flush_list();
+  }
+  /** @return total amount of pending I/O */
+  ulint io_pending() const
+  {
+    return n_pend_reads + n_flush_LRU() + n_flush_list();
+  }
+
+private:
+  /** Remove a block from the flush list. */
+  inline void delete_from_flush_list_low(buf_page_t *bpage);
+  /** Remove a block from flush_list.
+  @param bpage   buffer pool page
+  @param clear   whether to invoke buf_page_t::clear_oldest_modification() */
+  void delete_from_flush_list(buf_page_t *bpage, bool clear);
+public:
+  /** Remove a block from flush_list.
+  @param bpage   buffer pool page */
+  void delete_from_flush_list(buf_page_t *bpage)
+  { delete_from_flush_list(bpage, true); }
+
+  /** Insert a modified block into the flush list.
+  @param block    modified block
+  @param lsn      start LSN of the mini-transaction that modified the block */
+  void insert_into_flush_list(buf_block_t *block, lsn_t lsn);
+
+  /** Free a page whose underlying file page has been freed. */
+  inline void release_freed_page(buf_page_t *bpage);
+
+private:
+  /** Temporary memory for page_compressed and encrypted I/O */
+  struct io_buf_t
+  {
+    /** number of elements in slots[] */
+    ulint n_slots;
+    /** array of slots */
+    buf_tmp_buffer_t *slots;
+
+    void create(ulint n_slots)
+    {
+      this->n_slots= n_slots;
+      slots= static_cast<buf_tmp_buffer_t*>
+        (ut_malloc_nokey(n_slots * sizeof *slots));
+      memset((void*) slots, 0, n_slots * sizeof *slots);
+    }
+
+    void close()
+    {
+      for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
+      {
+        aligned_free(s->crypt_buf);
+        aligned_free(s->comp_buf);
+      }
+      ut_free(slots);
+      slots= nullptr;
+      n_slots= 0;
+    }
+
+    /** Reserve a buffer */
+    buf_tmp_buffer_t *reserve()
+    {
+      for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
+        if (s->acquire())
+          return s;
+      return nullptr;
+    }
+  } io_buf;
+
+  /** whether resize() is in the critical path */
+  std::atomic<bool> resizing;
+};
+
+/** The InnoDB buffer pool */
+extern buf_pool_t buf_pool;
+
+inline void page_hash_latch::read_lock()
+{
+  mysql_mutex_assert_not_owner(&buf_pool.mutex);
+  if (!read_trylock())
+    read_lock_wait();
+}
+
+inline void page_hash_latch::write_lock()
+{
+  if (!write_trylock())
+    write_lock_wait();
+}
+
+inline void buf_page_t::add_buf_fix_count(uint32_t count)
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+  buf_fix_count_+= count;
+}
+
+inline void buf_page_t::set_buf_fix_count(uint32_t count)
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+  buf_fix_count_= count;
+}
+
+inline void buf_page_t::set_state(buf_page_state state)
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+#ifdef UNIV_DEBUG
+  switch (state) {
+  case BUF_BLOCK_REMOVE_HASH:
+    /* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
+    before buf_LRU_free_one_page(), so we cannot assert that
+    we are holding the hash_lock. */
+    break;
+  case BUF_BLOCK_MEMORY:
+    if (!in_file()) break;
+    /* fall through */
+  case BUF_BLOCK_FILE_PAGE:
+    ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
+    break;
+  case BUF_BLOCK_NOT_USED:
+    if (!in_file()) break;
+    /* fall through */
+  case BUF_BLOCK_ZIP_PAGE:
+    ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked() ||
+          (this >= &buf_pool.watch[0] &&
+           this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]));
+    break;
+  }
+#endif
+  state_= state;
+}
+
+inline void buf_page_t::set_io_fix(buf_io_fix io_fix)
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+  io_fix_= io_fix;
+}
+
+inline void buf_page_t::set_corrupt_id()
+{
+#ifdef UNIV_DEBUG
+  switch (oldest_modification()) {
+  case 0:
+    break;
+  case 2:
+    ut_ad(fsp_is_system_temporary(id().space()));
+    /* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
+    ut_d(oldest_modification_= 0;)
+    break;
+  default:
+    ut_ad("block is dirty" == 0);
+  }
+  switch (state()) {
+  case BUF_BLOCK_REMOVE_HASH:
+    break;
+  case BUF_BLOCK_ZIP_PAGE:
+  case BUF_BLOCK_FILE_PAGE:
+    ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
+    break;
+  case BUF_BLOCK_NOT_USED:
+  case BUF_BLOCK_MEMORY:
+    ut_ad("invalid state" == 0);
+  }
+#endif
+  id_= page_id_t(~0ULL);
+}
+
+/** Set oldest_modification when adding to buf_pool.flush_list */
+inline void buf_page_t::set_oldest_modification(lsn_t lsn)
+{
+  mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
+  ut_ad(oldest_modification() <= 1);
+  ut_ad(lsn > 2);
+  oldest_modification_= lsn;
+}
+
+/** Clear oldest_modification after removing from buf_pool.flush_list */
+inline void buf_page_t::clear_oldest_modification()
+{
+  mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
+  ut_d(const auto state= state_);
+  ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE ||
+        state == BUF_BLOCK_REMOVE_HASH);
+  ut_ad(oldest_modification());
+  ut_ad(!list.prev);
+  ut_ad(!list.next);
+  /* We must use release memory order to guarantee that callers of
+  oldest_modification_acquire() will observe the block as
+  being detached from buf_pool.flush_list, after reading the value 0. */
+  oldest_modification_.store(0, std::memory_order_release);
+}
+
+/** Note that a block is no longer dirty, while not removing
+it from buf_pool.flush_list */
+inline void buf_page_t::clear_oldest_modification(bool temporary)
+{
+  mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
+  ut_ad(temporary == fsp_is_system_temporary(id().space()));
+  ut_ad(io_fix_ == BUF_IO_WRITE);
+  if (temporary)
+  {
+    ut_ad(oldest_modification() == 2);
+    oldest_modification_= 0;
+  }
+  else
+  {
+    /* We use release memory order to guarantee that callers of
+    oldest_modification_acquire() will observe the block as
+    being detached from buf_pool.flush_list, after reading the value 0. */
+    ut_ad(oldest_modification() > 2);
+    oldest_modification_.store(1, std::memory_order_release);
+  }
+}
+
+/** @return whether the block is modified and ready for flushing */
+inline bool buf_page_t::ready_for_flush() const
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+  ut_ad(in_LRU_list);
+  ut_a(in_file());
+  ut_ad(fsp_is_system_temporary(id().space())
+        ? oldest_modification() == 2
+        : oldest_modification() > 2);
+  return io_fix_ == BUF_IO_NONE;
+}
+
+/** @return whether the block can be relocated in memory.
+The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
+inline bool buf_page_t::can_relocate() const
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+  ut_ad(in_file());
+  ut_ad(in_LRU_list);
+  return io_fix_ == BUF_IO_NONE && !buf_fix_count_;
+}
+
+/** @return whether the block has been flagged old in buf_pool.LRU */
+inline bool buf_page_t::is_old() const
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+  ut_ad(in_file());
+  ut_ad(in_LRU_list);
+  return old;
+}
+
+/** Set whether a block is old in buf_pool.LRU */
+inline void buf_page_t::set_old(bool old)
+{
+  mysql_mutex_assert_owner(&buf_pool.mutex);
+  ut_ad(in_LRU_list);
+
+#ifdef UNIV_LRU_DEBUG
+  ut_a((buf_pool.LRU_old_len == 0) == (buf_pool.LRU_old == nullptr));
+  /* If a block is flagged "old", the LRU_old list must exist. */
+  ut_a(!old || buf_pool.LRU_old);
+
+  if (UT_LIST_GET_PREV(LRU, this) && UT_LIST_GET_NEXT(LRU, this))
+  {
+    const buf_page_t *prev= UT_LIST_GET_PREV(LRU, this);
+    const buf_page_t *next = UT_LIST_GET_NEXT(LRU, this);
+    if (prev->old == next->old)
+      ut_a(prev->old == old);
+    else
+    {
+      ut_a(!prev->old);
+      ut_a(buf_pool.LRU_old == (old ? this : next));
+    }
+  }
+#endif /* UNIV_LRU_DEBUG */
+
+  this->old= old;
+}
+
+#ifdef UNIV_DEBUG
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid() do {		\
+	mysql_mutex_assert_owner(&buf_pool.mutex);	\
+	buf_pool.mutex_exit_forbidden++;		\
+} while (0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow() do {		\
+	mysql_mutex_assert_owner(&buf_pool.mutex);	\
+	ut_ad(buf_pool.mutex_exit_forbidden--);		\
+} while (0)
+#else
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid() ((void) 0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow() ((void) 0)
+#endif
+
+/**********************************************************************
+Let us list the consistency conditions for different control block states.
+
+NOT_USED:	is in free list, not in LRU list, not in flush list, nor
+		page hash table
+MEMORY:		is not in free list, LRU list, or flush list, nor page
+		hash table
+FILE_PAGE:	space and offset are defined, is in page hash table
+		if io_fix == BUF_IO_WRITE,
+			buf_pool.n_flush_LRU() || buf_pool.n_flush_list()
+
+		(1) if buf_fix_count == 0, then
+			is in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			is x-locked,
+				if and only if io_fix == BUF_IO_READ
+			is s-locked,
+				if and only if io_fix == BUF_IO_WRITE
+
+		(2) if buf_fix_count > 0, then
+			is not in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			if io_fix == BUF_IO_READ,
+				is x-locked
+			if io_fix == BUF_IO_WRITE,
+				is s-locked
+
+State transitions:
+
+NOT_USED => MEMORY
+MEMORY => FILE_PAGE
+MEMORY => NOT_USED
+FILE_PAGE => NOT_USED	NOTE: This transition is allowed if and only if
+				(1) buf_fix_count == 0,
+				(2) oldest_modification == 0, and
+				(3) io_fix == 0.
+*/
+
+/** Select from where to start a scan. If we have scanned
+too deep into the LRU list it resets the value to the tail
+of the LRU list.
+@return buf_page_t from where to start scan. */
+inline buf_page_t *LRUItr::start()
+{
+  mysql_mutex_assert_owner(m_mutex);
+
+  if (!m_hp || m_hp->old)
+    m_hp= UT_LIST_GET_LAST(buf_pool.LRU);
+
+  return m_hp;
+}
+
+#ifdef UNIV_DEBUG
+/** Functor to validate the LRU list. */
+struct	CheckInLRUList {
+	void	operator()(const buf_page_t* elem) const
+	{
+		ut_a(elem->in_LRU_list);
+	}
+
+	static void validate()
+	{
+		ut_list_validate(buf_pool.LRU, CheckInLRUList());
+	}
+};
+
+/** Functor to validate the LRU list. */
+struct	CheckInFreeList {
+	void	operator()(const buf_page_t* elem) const
+	{
+		ut_a(elem->in_free_list);
+	}
+
+	static void validate()
+	{
+		ut_list_validate(buf_pool.free, CheckInFreeList());
+	}
+};
+
+struct	CheckUnzipLRUAndLRUList {
+	void	operator()(const buf_block_t* elem) const
+	{
+                ut_a(elem->page.in_LRU_list);
+                ut_a(elem->in_unzip_LRU_list);
+	}
+
+	static void validate()
+	{
+		ut_list_validate(buf_pool.unzip_LRU,
+				 CheckUnzipLRUAndLRUList());
+	}
+};
+#endif /* UNIV_DEBUG */
+
+#include "buf0buf.ic"
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+#endif