summaryrefslogtreecommitdiffstats
path: root/storage/innobase/page/page0page.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
commit3f619478f796eddbba6e39502fe941b285dd97b1 (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/page/page0page.cc
parentInitial commit. (diff)
downloadmariadb-upstream.tar.xz
mariadb-upstream.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/page/page0page.cc')
-rw-r--r--storage/innobase/page/page0page.cc2523
1 files changed, 2523 insertions, 0 deletions
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
new file mode 100644
index 00000000..258d47a5
--- /dev/null
+++ b/storage/innobase/page/page0page.cc
@@ -0,0 +1,2523 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2017, 2022, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file page/page0page.cc
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "page0page.h"
+#include "page0cur.h"
+#include "page0zip.h"
+#include "buf0buf.h"
+#include "buf0checksum.h"
+#include "btr0btr.h"
+#include "srv0srv.h"
+#include "lock0lock.h"
+#include "fut0lst.h"
+#include "btr0sea.h"
+#include "trx0sys.h"
+#include <algorithm>
+
+/* THE INDEX PAGE
+ ==============
+
+The index page consists of a page header which contains the page's
+id and other information. On top of it are the index records
+in a heap linked into a one way linear list according to alphabetic order.
+
+Just below page end is an array of pointers which we call page directory,
+to about every sixth record in the list. The pointers are placed in
+the directory in the alphabetical order of the records pointed to,
+enabling us to make binary search using the array. Each slot n:o I
+in the directory points to a record, where a 4-bit field contains a count
+of those records which are in the linear list between pointer I and
+the pointer I - 1 in the directory, including the record
+pointed to by pointer I and not including the record pointed to by I - 1.
+We say that the record pointed to by slot I, or that slot I, owns
+these records. The count is always kept in the range 4 to 8, with
+the exception that it is 1 for the first slot, and 1--8 for the second slot.
+
+An essentially binary search can be performed in the list of index
+records, like we could do if we had pointer to every record in the
+page directory. The data structure is, however, more efficient when
+we are doing inserts, because most inserts are just pushed on a heap.
+Only every 8th insert requires block move in the directory pointer
+table, which itself is quite small. A record is deleted from the page
+by just taking it off the linear list and updating the number of owned
+records-field of the record which owns it, and updating the page directory,
+if necessary. A special case is the one when the record owns itself.
+Because the overhead of inserts is so small, we may also increase the
+page size from the projected default of 8 kB to 64 kB without too
+much loss of efficiency in inserts. Bigger page becomes actual
+when the disk transfer rate compared to seek and latency time rises.
+On the present system, the page size is set so that the page transfer
+time (3 ms) is 20 % of the disk random access time (15 ms).
+
+When the page is split, merged, or becomes full but contains deleted
+records, we have to reorganize the page.
+
+Assuming a page size of 8 kB, a typical index page of a secondary
+index contains 300 index entries, and the size of the page directory
+is 50 x 4 bytes = 200 bytes. */
+
+/***************************************************************//**
+Looks for the directory slot which owns the given record.
+@return the directory slot number
+@retval ULINT_UNDEFINED on corruption */
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+ const rec_t* rec) /*!< in: the physical record */
+{
+ ut_ad(page_rec_check(rec));
+
+ const page_t* page = page_align(rec);
+ const page_dir_slot_t* first_slot = page_dir_get_nth_slot(page, 0);
+ const page_dir_slot_t* slot = page_dir_get_nth_slot(
+ page, ulint(page_dir_get_n_slots(page)) - 1);
+ const rec_t* r = rec;
+
+ if (page_is_comp(page)) {
+ while (rec_get_n_owned_new(r) == 0) {
+ r = page_rec_get_next_low(r, true);
+ if (UNIV_UNLIKELY(r < page + PAGE_NEW_SUPREMUM
+ || r >= slot)) {
+ return ULINT_UNDEFINED;
+ }
+ }
+ } else {
+ while (rec_get_n_owned_old(r) == 0) {
+ r = page_rec_get_next_low(r, false);
+ if (UNIV_UNLIKELY(r < page + PAGE_OLD_SUPREMUM
+ || r >= slot)) {
+ return ULINT_UNDEFINED;
+ }
+ }
+ }
+
+ while (UNIV_LIKELY(*(uint16*) slot
+ != mach_encode_2(ulint(r - page)))) {
+ if (UNIV_UNLIKELY(slot == first_slot)) {
+ return ULINT_UNDEFINED;
+ }
+
+ slot += PAGE_DIR_SLOT_SIZE;
+ }
+
+ return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
+}
+
+/**************************************************************//**
+Used to check the consistency of a directory slot.
+@return TRUE if succeed */
+static
+ibool
+page_dir_slot_check(
+/*================*/
+ const page_dir_slot_t* slot) /*!< in: slot */
+{
+ const page_t* page;
+ ulint n_slots;
+ ulint n_owned;
+
+ ut_a(slot);
+
+ page = page_align(slot);
+
+ n_slots = page_dir_get_n_slots(page);
+
+ ut_a(slot <= page_dir_get_nth_slot(page, 0));
+ ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
+
+ ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
+
+ if (page_is_comp(page)) {
+ n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
+ } else {
+ n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
+ }
+
+ if (slot == page_dir_get_nth_slot(page, 0)) {
+ ut_a(n_owned == 1);
+ } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
+ ut_a(n_owned >= 1);
+ ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ } else {
+ ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
+ ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************//**
+Sets the max trx id field value. */
+void
+page_set_max_trx_id(
+/*================*/
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */
+{
+ ut_ad(!mtr || mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(!page_zip || page_zip == &block->page.zip);
+ static_assert((PAGE_HEADER + PAGE_MAX_TRX_ID) % 8 == 0, "alignment");
+ byte *max_trx_id= my_assume_aligned<8>(PAGE_MAX_TRX_ID +
+ PAGE_HEADER + block->page.frame);
+
+ mtr->write<8>(*block, max_trx_id, trx_id);
+ if (UNIV_LIKELY_NULL(page_zip))
+ memcpy_aligned<8>(&page_zip->data[PAGE_MAX_TRX_ID + PAGE_HEADER],
+ max_trx_id, 8);
+}
+
+/** Persist the AUTO_INCREMENT value on a clustered index root page.
+@param[in,out] block clustered index root page
+@param[in] index clustered index
+@param[in] autoinc next available AUTO_INCREMENT value
+@param[in,out] mtr mini-transaction
+@param[in] reset whether to reset the AUTO_INCREMENT
+ to a possibly smaller value than currently
+ exists in the page */
+void
+page_set_autoinc(
+ buf_block_t* block,
+ ib_uint64_t autoinc,
+ mtr_t* mtr,
+ bool reset)
+{
+ ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX |
+ MTR_MEMO_PAGE_SX_FIX));
+
+ byte *field= my_assume_aligned<8>(PAGE_HEADER + PAGE_ROOT_AUTO_INC +
+ block->page.frame);
+ ib_uint64_t old= mach_read_from_8(field);
+ if (old == autoinc || (old > autoinc && !reset))
+ return; /* nothing to update */
+
+ mtr->write<8>(*block, field, autoinc);
+ if (UNIV_LIKELY_NULL(block->page.zip.data))
+ memcpy_aligned<8>(PAGE_HEADER + PAGE_ROOT_AUTO_INC + block->page.zip.data,
+ field, 8);
+}
+
+/** The page infimum and supremum of an empty page in ROW_FORMAT=REDUNDANT */
+static const byte infimum_supremum_redundant[] = {
+ /* the infimum record */
+ 0x08/*end offset*/,
+ 0x01/*n_owned*/,
+ 0x00, 0x00/*heap_no=0*/,
+ 0x03/*n_fields=1, 1-byte offsets*/,
+ 0x00, 0x74/* pointer to supremum */,
+ 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0,
+ /* the supremum record */
+ 0x09/*end offset*/,
+ 0x01/*n_owned*/,
+ 0x00, 0x08/*heap_no=1*/,
+ 0x03/*n_fields=1, 1-byte offsets*/,
+ 0x00, 0x00/* end of record list */,
+ 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm', 0
+};
+
+/** The page infimum and supremum of an empty page in ROW_FORMAT=COMPACT */
+static const byte infimum_supremum_compact[] = {
+ /* the infimum record */
+ 0x01/*n_owned=1*/,
+ 0x00, 0x02/* heap_no=0, REC_STATUS_INFIMUM */,
+ 0x00, 0x0d/* pointer to supremum */,
+ 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0,
+ /* the supremum record */
+ 0x01/*n_owned=1*/,
+ 0x00, 0x0b/* heap_no=1, REC_STATUS_SUPREMUM */,
+ 0x00, 0x00/* end of record list */,
+ 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm'
+};
+
+/** Create an index page.
+@param[in,out] block buffer block
+@param[in] comp nonzero=compact page format */
+void page_create_low(const buf_block_t* block, bool comp)
+{
+ page_t* page;
+
+ compile_time_assert(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE
+ <= PAGE_DATA);
+ compile_time_assert(PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE
+ <= PAGE_DATA);
+
+ page = block->page.frame;
+
+ fil_page_set_type(page, FIL_PAGE_INDEX);
+
+ memset(page + PAGE_HEADER, 0, PAGE_HEADER_PRIV_END);
+ page[PAGE_HEADER + PAGE_N_DIR_SLOTS + 1] = 2;
+ page[PAGE_HEADER + PAGE_INSTANT] = 0;
+ page[PAGE_HEADER + PAGE_DIRECTION_B] = PAGE_NO_DIRECTION;
+
+ if (comp) {
+ page[PAGE_HEADER + PAGE_N_HEAP] = 0x80;/*page_is_comp()*/
+ page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW;
+ page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_NEW_SUPREMUM_END;
+ memcpy(page + PAGE_DATA, infimum_supremum_compact,
+ sizeof infimum_supremum_compact);
+ memset(page
+ + PAGE_NEW_SUPREMUM_END, 0,
+ srv_page_size - PAGE_DIR - PAGE_NEW_SUPREMUM_END);
+ page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+ = PAGE_NEW_SUPREMUM;
+ page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+ = PAGE_NEW_INFIMUM;
+ } else {
+ page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW;
+ page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_OLD_SUPREMUM_END;
+ memcpy(page + PAGE_DATA, infimum_supremum_redundant,
+ sizeof infimum_supremum_redundant);
+ memset(page
+ + PAGE_OLD_SUPREMUM_END, 0,
+ srv_page_size - PAGE_DIR - PAGE_OLD_SUPREMUM_END);
+ page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+ = PAGE_OLD_SUPREMUM;
+ page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+ = PAGE_OLD_INFIMUM;
+ }
+}
+
+/** Create an uncompressed index page.
+@param[in,out] block buffer block
+@param[in,out] mtr mini-transaction
+@param[in] comp set unless ROW_FORMAT=REDUNDANT */
+void page_create(buf_block_t *block, mtr_t *mtr, bool comp)
+{
+ mtr->page_create(*block, comp);
+ buf_block_modify_clock_inc(block);
+ page_create_low(block, comp);
+}
+
+/**********************************************************//**
+Create a compressed B-tree index page. */
+void
+page_create_zip(
+/*============*/
+ buf_block_t* block, /*!< in/out: a buffer frame
+ where the page is created */
+ dict_index_t* index, /*!< in: the index of the
+ page */
+ ulint level, /*!< in: the B-tree level
+ of the page */
+ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
+ mtr_t* mtr) /*!< in/out: mini-transaction
+ handle */
+{
+ ut_ad(block);
+ ut_ad(buf_block_get_page_zip(block));
+ ut_ad(dict_table_is_comp(index->table));
+
+ /* PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC are always 0 for
+ temporary tables. */
+ ut_ad(max_trx_id == 0 || !index->table->is_temporary());
+ /* In secondary indexes and the change buffer, PAGE_MAX_TRX_ID
+ must be zero on non-leaf pages. max_trx_id can be 0 when the
+ index consists of an empty root (leaf) page. */
+ ut_ad(max_trx_id == 0
+ || level == 0
+ || !dict_index_is_sec_or_ibuf(index)
+ || index->table->is_temporary());
+ /* In the clustered index, PAGE_ROOT_AUTOINC or
+ PAGE_MAX_TRX_ID must be 0 on other pages than the root. */
+ ut_ad(level == 0 || max_trx_id == 0
+ || !dict_index_is_sec_or_ibuf(index)
+ || index->table->is_temporary());
+
+ buf_block_modify_clock_inc(block);
+ page_create_low(block, true);
+
+ if (index->is_spatial()) {
+ mach_write_to_2(FIL_PAGE_TYPE + block->page.frame,
+ FIL_PAGE_RTREE);
+ memset(block->page.frame + FIL_RTREE_SPLIT_SEQ_NUM, 0, 8);
+ memset(block->page.zip.data + FIL_RTREE_SPLIT_SEQ_NUM, 0, 8);
+ }
+
+ mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + block->page.frame, level);
+ mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + block->page.frame,
+ max_trx_id);
+
+ if (!page_zip_compress(block, index, page_zip_level, mtr)) {
+ /* The compression of a newly created
+ page should always succeed. */
+ ut_error;
+ }
+}
+
+/**********************************************************//**
+Empty a previously created B-tree index page. */
+void
+page_create_empty(
+/*==============*/
+ buf_block_t* block, /*!< in/out: B-tree block */
+ dict_index_t* index, /*!< in: the index of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ trx_id_t max_trx_id;
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+
+ ut_ad(fil_page_index_page_check(block->page.frame));
+ ut_ad(!index->is_dummy);
+ ut_ad(block->page.id().space() == index->table->space->id);
+
+ /* Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && !index->table->is_temporary()
+ && page_is_leaf(block->page.frame)) {
+ max_trx_id = page_get_max_trx_id(block->page.frame);
+ ut_ad(max_trx_id);
+ } else if (block->page.id().page_no() == index->page) {
+ /* Preserve PAGE_ROOT_AUTO_INC. */
+ max_trx_id = page_get_max_trx_id(block->page.frame);
+ } else {
+ max_trx_id = 0;
+ }
+
+ if (page_zip) {
+ ut_ad(!index->table->is_temporary());
+ page_create_zip(block, index,
+ page_header_get_field(block->page.frame,
+ PAGE_LEVEL),
+ max_trx_id, mtr);
+ } else {
+ page_create(block, mtr, index->table->not_redundant());
+ if (index->is_spatial()) {
+ static_assert(((FIL_PAGE_INDEX & 0xff00)
+ | byte(FIL_PAGE_RTREE))
+ == FIL_PAGE_RTREE, "compatibility");
+ mtr->write<1>(*block,
+ FIL_PAGE_TYPE + 1 + block->page.frame,
+ byte(FIL_PAGE_RTREE));
+ if (mach_read_from_8(block->page.frame
+ + FIL_RTREE_SPLIT_SEQ_NUM)) {
+ mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM,
+ 8, 0);
+ }
+ }
+
+ if (max_trx_id) {
+ mtr->write<8>(*block, PAGE_HEADER + PAGE_MAX_TRX_ID
+ + block->page.frame, max_trx_id);
+ }
+ }
+}
+
+/*************************************************************//**
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return error code */
+dberr_t
+page_copy_rec_list_end_no_locks(
+/*============================*/
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_cur_t cur1;
+ page_cur_t cur2;
+ mem_heap_t* heap = NULL;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ cur1.index = cur2.index = index;
+ page_cur_position(rec, block, &cur1);
+
+ if (page_cur_is_before_first(&cur1) && !page_cur_move_to_next(&cur1)) {
+ return DB_CORRUPTION;
+ }
+
+ if (UNIV_UNLIKELY(page_is_comp(new_page) != page_rec_is_comp(rec)
+ || mach_read_from_2(new_page + srv_page_size - 10)
+ != ulint(page_is_comp(new_page)
+ ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM))) {
+ return DB_CORRUPTION;
+ }
+
+ const ulint n_core = page_is_leaf(block->page.frame)
+ ? index->n_core_fields : 0;
+
+ dberr_t err = DB_SUCCESS;
+ page_cur_set_before_first(new_block, &cur2);
+
+ /* Copy records from the original page to the new page */
+
+ while (!page_cur_is_after_last(&cur1)) {
+ rec_t* ins_rec;
+ offsets = rec_get_offsets(cur1.rec, index, offsets, n_core,
+ ULINT_UNDEFINED, &heap);
+ ins_rec = page_cur_insert_rec_low(&cur2, cur1.rec, offsets,
+ mtr);
+ if (UNIV_UNLIKELY(!ins_rec || !page_cur_move_to_next(&cur1))) {
+ err = DB_CORRUPTION;
+ break;
+ }
+ ut_ad(!(rec_get_info_bits(cur1.rec, page_is_comp(new_page))
+ & REC_INFO_MIN_REC_FLAG));
+ cur2.rec = ins_rec;
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ return err;
+}
+
+/*************************************************************//**
+Copies records from page to new_page, from a given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_t::commit().
+
+@return pointer to the original successor of the infimum record on new_block
+@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
+rec_t*
+page_copy_rec_list_end(
+/*===================*/
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ dberr_t* err) /*!< out: error code */
+{
+ page_t* new_page = new_block->page.frame;
+ page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
+ page_t* page = block->page.frame;
+ rec_t* ret = page_rec_get_next(
+ page_get_infimum_rec(new_page));
+ ulint num_moved = 0;
+ ut_ad(page_align(rec) == page);
+
+ if (UNIV_UNLIKELY(!ret)) {
+ *err = DB_CORRUPTION;
+ return nullptr;
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ if (new_page_zip) {
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ ut_a(page_zip);
+
+ /* Strict page_zip_validate() may fail here.
+ Furthermore, btr_compress() may set FIL_PAGE_PREV to
+ FIL_NULL on new_page while leaving it intact on
+ new_page_zip. So, we cannot validate new_page_zip. */
+ ut_a(page_zip_validate_low(page_zip, page, index, TRUE));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+ ut_ad(buf_block_get_frame(block) == page);
+ ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
+ ut_ad(page_is_comp(page) == page_is_comp(new_page));
+ /* Here, "ret" may be pointing to a user record or the
+ predefined supremum record. */
+
+ const mtr_log_t log_mode = new_page_zip
+ ? mtr->set_log_mode(MTR_LOG_NONE) : MTR_LOG_NONE;
+ const bool was_empty = page_dir_get_n_heap(new_page)
+ == PAGE_HEAP_NO_USER_LOW;
+ alignas(2) byte h[PAGE_N_DIRECTION + 2 - PAGE_LAST_INSERT];
+ memcpy_aligned<2>(h, PAGE_HEADER + PAGE_LAST_INSERT + new_page,
+ sizeof h);
+ mem_heap_t* heap = nullptr;
+ rtr_rec_move_t* rec_move = nullptr;
+
+ if (index->is_spatial()) {
+ ulint max_to_move = page_get_n_recs(
+ buf_block_get_frame(block));
+ heap = mem_heap_create(256);
+ rec_move= static_cast<rtr_rec_move_t*>(
+ mem_heap_alloc(heap, max_to_move * sizeof *rec_move));
+ /* For spatial index, we need to insert recs one by one
+ to keep recs ordered. */
+ *err = rtr_page_copy_rec_list_end_no_locks(new_block,
+ block, rec, index,
+ heap, rec_move,
+ max_to_move,
+ &num_moved,
+ mtr);
+ } else {
+ *err = page_copy_rec_list_end_no_locks(new_block, block, rec,
+ index, mtr);
+ if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
+err_exit:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return nullptr;
+ }
+ if (was_empty) {
+ mtr->memcpy<mtr_t::MAYBE_NOP>(*new_block, PAGE_HEADER
+ + PAGE_LAST_INSERT
+ + new_page, h, sizeof h);
+ }
+ }
+
+ /* Update PAGE_MAX_TRX_ID on the uncompressed page.
+ Modifications will be redo logged and copied to the compressed
+ page in page_zip_compress() or page_zip_reorganize() below.
+ Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && page_is_leaf(page)
+ && !index->table->is_temporary()) {
+ ut_ad(!was_empty || page_dir_get_n_heap(new_page)
+ == PAGE_HEAP_NO_USER_LOW
+ + page_header_get_field(new_page, PAGE_N_RECS));
+ page_update_max_trx_id(new_block, NULL,
+ page_get_max_trx_id(page), mtr);
+ }
+
+ if (new_page_zip) {
+ mtr_set_log_mode(mtr, log_mode);
+
+ if (!page_zip_compress(new_block, index,
+ page_zip_level, mtr)) {
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ ulint ret_pos
+ = page_rec_get_n_recs_before(ret);
+ /* Before copying, "ret" was the successor of
+ the predefined infimum record. It must still
+ have at least one predecessor (the predefined
+ infimum record, or a freshly copied record
+ that is smaller than "ret"). */
+ if (UNIV_UNLIKELY(!ret_pos
+ || ret_pos == ULINT_UNDEFINED)) {
+ *err = DB_CORRUPTION;
+ goto err_exit;
+ }
+
+ *err = page_zip_reorganize(new_block, index,
+ page_zip_level, mtr);
+ switch (*err) {
+ case DB_FAIL:
+ if (!page_zip_decompress(new_page_zip,
+ new_page, FALSE)) {
+ ut_error;
+ }
+ ut_ad(page_validate(new_page, index));
+ /* fall through */
+ default:
+ goto err_exit;
+ case DB_SUCCESS:
+ /* The page was reorganized:
+ Seek to ret_pos. */
+ ret = page_rec_get_nth(new_page, ret_pos);
+ ut_ad(ret);
+ }
+ }
+ }
+
+ /* Update the lock table and possible hash index */
+
+ if (!index->has_locking()) {
+ } else if (UNIV_LIKELY_NULL(rec_move)) {
+ lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
+ } else {
+ lock_move_rec_list_end(new_block, block, rec);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ btr_search_move_or_delete_hash_entries(new_block, block);
+
+ return(ret);
+}
+
+/*************************************************************//**
+Copies records from page to new_page, up to the given record,
+NOT including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return pointer to the original predecessor of the supremum record on new_block
+@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
+rec_t*
+page_copy_rec_list_start(
+/*=====================*/
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ dberr_t* err) /*!< out: error code */
+{
+ ut_ad(page_align(rec) == block->page.frame);
+
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
+ page_cur_t cur1;
+ page_cur_t cur2;
+ mem_heap_t* heap = NULL;
+ ulint num_moved = 0;
+ rtr_rec_move_t* rec_move = NULL;
+ rec_t* ret
+ = page_rec_get_prev(page_get_supremum_rec(new_page));
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ if (UNIV_UNLIKELY(!ret)) {
+corrupted:
+ *err = DB_CORRUPTION;
+ return nullptr;
+ }
+
+ /* Here, "ret" may be pointing to a user record or the
+ predefined infimum record. */
+
+ if (page_rec_is_infimum(rec)) {
+ *err = DB_SUCCESS;
+ return(ret);
+ }
+
+ page_cur_set_before_first(block, &cur1);
+ if (UNIV_UNLIKELY(!page_cur_move_to_next(&cur1))) {
+ goto corrupted;
+ }
+
+ mtr_log_t log_mode = MTR_LOG_NONE;
+
+ if (new_page_zip) {
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ }
+
+ cur2.index = index;
+ page_cur_position(ret, new_block, &cur2);
+
+ const ulint n_core = page_rec_is_leaf(rec) ? index->n_core_fields : 0;
+
+ /* Copy records from the original page to the new page */
+ if (index->is_spatial()) {
+ ut_ad(!index->is_instant());
+ ulint max_to_move = page_get_n_recs(
+ buf_block_get_frame(block));
+ heap = mem_heap_create(256);
+
+ rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+ heap,
+ sizeof (*rec_move) * max_to_move));
+
+ /* For spatial index, we need to insert recs one by one
+ to keep recs ordered. */
+ *err = rtr_page_copy_rec_list_start_no_locks(new_block,
+ block, rec, index,
+ heap, rec_move,
+ max_to_move,
+ &num_moved, mtr);
+ if (*err != DB_SUCCESS) {
+ return nullptr;
+ }
+ } else {
+ while (page_cur_get_rec(&cur1) != rec) {
+ offsets = rec_get_offsets(cur1.rec, index, offsets,
+ n_core,
+ ULINT_UNDEFINED, &heap);
+ cur2.rec = page_cur_insert_rec_low(&cur2, cur1.rec,
+ offsets, mtr);
+ if (UNIV_UNLIKELY(!cur2.rec
+ || !page_cur_move_to_next(&cur1))) {
+ *err = DB_CORRUPTION;
+ return nullptr;
+ }
+
+ ut_ad(!(rec_get_info_bits(cur1.rec,
+ page_is_comp(new_page))
+ & REC_INFO_MIN_REC_FLAG));
+ }
+ }
+
+ /* Update PAGE_MAX_TRX_ID on the uncompressed page.
+ Modifications will be redo logged and copied to the compressed
+ page in page_zip_compress() or page_zip_reorganize() below.
+ Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (n_core && !index->is_primary() && !index->table->is_temporary()) {
+ page_update_max_trx_id(new_block, nullptr,
+ page_get_max_trx_id(block->page.frame),
+ mtr);
+ }
+
+ if (new_page_zip) {
+ mtr_set_log_mode(mtr, log_mode);
+
+ DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail",
+ goto zip_reorganize;);
+
+ if (!page_zip_compress(new_block, index,
+ page_zip_level, mtr)) {
+#ifndef DBUG_OFF
+zip_reorganize:
+#endif /* DBUG_OFF */
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ ulint ret_pos = page_rec_get_n_recs_before(ret);
+ /* Before copying, "ret" was the predecessor
+ of the predefined supremum record. If it was
+ the predefined infimum record, then it would
+ still be the infimum, and we would have
+ ret_pos == 0. */
+ if (UNIV_UNLIKELY(!ret_pos
+ || ret_pos == ULINT_UNDEFINED)) {
+ *err = DB_CORRUPTION;
+ return nullptr;
+ }
+ *err = page_zip_reorganize(new_block, index,
+ page_zip_level, mtr);
+ switch (*err) {
+ case DB_SUCCESS:
+ ret = page_rec_get_nth(new_page, ret_pos);
+ ut_ad(ret);
+ break;
+ case DB_FAIL:
+ if (UNIV_UNLIKELY
+ (!page_zip_decompress(new_page_zip,
+ new_page, FALSE))) {
+ ut_error;
+ }
+ ut_ad(page_validate(new_page, index));
+ /* fall through */
+ default:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ return nullptr;
+ }
+ }
+ }
+
+ /* Update the lock table and possible hash index */
+
+ if (!index->has_locking()) {
+ } else if (dict_index_is_spatial(index)) {
+ lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
+ } else {
+ lock_move_rec_list_start(new_block, block, rec, ret);
+ }
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
+ btr_search_move_or_delete_hash_entries(new_block, block);
+
+ *err = DB_SUCCESS;
+ return(ret);
+}
+
+/*************************************************************//**
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+dberr_t
+page_delete_rec_list_end(
+/*=====================*/
+ rec_t* rec, /*!< in: pointer to record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n_recs, /*!< in: number of records to delete,
+ or ULINT_UNDEFINED if not known */
+ ulint size, /*!< in: the sum of the sizes of the
+ records in the end of the chain to
+ delete, or ULINT_UNDEFINED if not known */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t * const page= block->page.frame;
+
+ ut_ad(size == ULINT_UNDEFINED || size < srv_page_size);
+ ut_ad(page_align(rec) == page);
+ ut_ad(index->table->not_redundant() == !!page_is_comp(page));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!block->page.zip.data ||
+ page_zip_validate(&block->page.zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (page_rec_is_supremum(rec))
+ {
+ ut_ad(n_recs == 0 || n_recs == ULINT_UNDEFINED);
+ /* Nothing to do, there are no records bigger than the page supremum. */
+ return DB_SUCCESS;
+ }
+
+ if (page_rec_is_infimum(rec) ||
+ n_recs == page_get_n_recs(page) ||
+ rec == (page_is_comp(page)
+ ? page_rec_get_next_low(page + PAGE_NEW_INFIMUM, 1)
+ : page_rec_get_next_low(page + PAGE_OLD_INFIMUM, 0)))
+ {
+ /* We are deleting all records. */
+ page_create_empty(block, index, mtr);
+ return DB_SUCCESS;
+ }
+
+#if 0 // FIXME: consider deleting the last record as a special case
+ if (page_rec_is_last(rec))
+ {
+ page_cur_t cursor= { index, rec, offsets, block };
+ page_cur_delete_rec(&cursor, index, offsets, mtr);
+ return DB_SUCCESS;
+ }
+#endif
+
+ /* The page becomes invalid for optimistic searches */
+ buf_block_modify_clock_inc(block);
+
+ const ulint n_core= page_is_leaf(page) ? index->n_core_fields : 0;
+ mem_heap_t *heap= nullptr;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs *offsets= offsets_;
+ rec_offs_init(offsets_);
+
+#if 1 // FIXME: remove this, and write minimal amount of log! */
+ if (UNIV_LIKELY_NULL(block->page.zip.data))
+ {
+ ut_ad(page_is_comp(page));
+ do
+ {
+ page_cur_t cur;
+ page_cur_position(rec, block, &cur);
+ cur.index= index;
+ offsets= rec_get_offsets(rec, index, offsets, n_core,
+ ULINT_UNDEFINED, &heap);
+ rec= const_cast<rec_t*>(page_rec_get_next_low(rec, true));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(&block->page.zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+ page_cur_delete_rec(&cur, offsets, mtr);
+ }
+ while (page_offset(rec) != PAGE_NEW_SUPREMUM);
+
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return DB_SUCCESS;
+ }
+#endif
+
+ byte *prev_rec= page_rec_get_prev(rec);
+ if (UNIV_UNLIKELY(!prev_rec))
+ return DB_CORRUPTION;
+ byte *last_rec= page_rec_get_prev(page_get_supremum_rec(page));
+ if (UNIV_UNLIKELY(!last_rec))
+ return DB_CORRUPTION;
+
+ // FIXME: consider a special case of shrinking PAGE_HEAP_TOP
+
+ const bool scrub= srv_immediate_scrub_data_uncompressed;
+ if (scrub || size == ULINT_UNDEFINED || n_recs == ULINT_UNDEFINED)
+ {
+ rec_t *rec2= rec;
+ /* Calculate the sum of sizes and the number of records */
+ size= 0;
+ n_recs= 0;
+
+ do
+ {
+ offsets = rec_get_offsets(rec2, index, offsets, n_core,
+ ULINT_UNDEFINED, &heap);
+ ulint s= rec_offs_size(offsets);
+ ut_ad(ulint(rec2 - page) + s - rec_offs_extra_size(offsets) <
+ srv_page_size);
+ ut_ad(size + s < srv_page_size);
+ size+= s;
+ n_recs++;
+
+ if (scrub)
+ mtr->memset(block, page_offset(rec2), rec_offs_data_size(offsets), 0);
+
+ rec2= page_rec_get_next(rec2);
+ }
+ while (rec2 && !page_rec_is_supremum(rec2));
+
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+
+ if (UNIV_UNLIKELY(!rec))
+ return DB_CORRUPTION;
+ }
+
+ ut_ad(size < srv_page_size);
+
+ ulint slot_index, n_owned;
+ {
+ const rec_t *owner_rec= rec;
+ ulint count= 0;
+
+ if (page_is_comp(page))
+ while (!(n_owned= rec_get_n_owned_new(owner_rec)))
+ {
+ count++;
+ if (!(owner_rec= page_rec_get_next_low(owner_rec, true)))
+ return DB_CORRUPTION;
+ }
+ else
+ while (!(n_owned= rec_get_n_owned_old(owner_rec)))
+ {
+ count++;
+ if (!(owner_rec= page_rec_get_next_low(owner_rec, false)))
+ return DB_CORRUPTION;
+ }
+
+ ut_ad(n_owned > count);
+ n_owned-= count;
+ slot_index= page_dir_find_owner_slot(owner_rec);
+ }
+
+ if (UNIV_UNLIKELY(!slot_index || slot_index == ULINT_UNDEFINED))
+ return DB_CORRUPTION;
+
+ mtr->write<2,mtr_t::MAYBE_NOP>(*block, my_assume_aligned<2>
+ (PAGE_N_DIR_SLOTS + PAGE_HEADER + page),
+ slot_index + 1);
+ mtr->write<2,mtr_t::MAYBE_NOP>(*block, my_assume_aligned<2>
+ (PAGE_LAST_INSERT + PAGE_HEADER + page), 0U);
+ /* Catenate the deleted chain segment to the page free list */
+ alignas(4) byte page_header[4];
+ byte *page_free= my_assume_aligned<4>(PAGE_HEADER + PAGE_FREE + page);
+ const uint16_t free= page_header_get_field(page, PAGE_FREE);
+ static_assert(PAGE_FREE + 2 == PAGE_GARBAGE, "compatibility");
+
+ mach_write_to_2(page_header, page_offset(rec));
+ mach_write_to_2(my_assume_aligned<2>(page_header + 2),
+ mach_read_from_2(my_assume_aligned<2>(page_free + 2)) +
+ size);
+ mtr->memcpy(*block, page_free, page_header, 4);
+
+ byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER + page);
+ mtr->write<2>(*block, page_n_recs,
+ ulint{mach_read_from_2(page_n_recs)} - n_recs);
+
+ /* Update the page directory; there is no need to balance the number
+ of the records owned by the supremum record, as it is allowed to be
+ less than PAGE_DIR_SLOT_MIN_N_OWNED */
+ page_dir_slot_t *slot= page_dir_get_nth_slot(page, slot_index);
+
+ if (page_is_comp(page))
+ {
+ mtr->write<2,mtr_t::MAYBE_NOP>(*block, slot, PAGE_NEW_SUPREMUM);
+ byte *owned= PAGE_NEW_SUPREMUM - REC_NEW_N_OWNED + page;
+ byte new_owned= static_cast<byte>((*owned & ~REC_N_OWNED_MASK) |
+ n_owned << REC_N_OWNED_SHIFT);
+#if 0 // FIXME: implement minimal logging for ROW_FORMAT=COMPRESSED
+ if (UNIV_LIKELY_NULL(block->page.zip.data))
+ {
+ *owned= new_owned;
+ memcpy_aligned<2>(PAGE_N_DIR_SLOTS + PAGE_HEADER + block->page.zip.data,
+ PAGE_N_DIR_SLOTS + PAGE_HEADER + page,
+ PAGE_N_RECS + 2 - PAGE_N_DIR_SLOTS);
+ // TODO: the equivalent of page_zip_dir_delete() for all records
+ mach_write_to_2(prev_rec - REC_NEXT, static_cast<uint16_t>
+ (PAGE_NEW_SUPREMUM - page_offset(prev_rec)));
+ mach_write_to_2(last_rec - REC_NEXT, free
+ ? static_cast<uint16_t>(free - page_offset(last_rec))
+ : 0U);
+ return DB_SUCCESS;
+ }
+#endif
+ mtr->write<1,mtr_t::MAYBE_NOP>(*block, owned, new_owned);
+ mtr->write<2>(*block, prev_rec - REC_NEXT, static_cast<uint16_t>
+ (PAGE_NEW_SUPREMUM - page_offset(prev_rec)));
+ mtr->write<2>(*block, last_rec - REC_NEXT, free
+ ? static_cast<uint16_t>(free - page_offset(last_rec))
+ : 0U);
+ }
+ else
+ {
+ mtr->write<2,mtr_t::MAYBE_NOP>(*block, slot, PAGE_OLD_SUPREMUM);
+ byte *owned= PAGE_OLD_SUPREMUM - REC_OLD_N_OWNED + page;
+ byte new_owned= static_cast<byte>((*owned & ~REC_N_OWNED_MASK) |
+ n_owned << REC_N_OWNED_SHIFT);
+ mtr->write<1,mtr_t::MAYBE_NOP>(*block, owned, new_owned);
+ mtr->write<2>(*block, prev_rec - REC_NEXT, PAGE_OLD_SUPREMUM);
+ mtr->write<2>(*block, last_rec - REC_NEXT, free);
+ }
+
+ return DB_SUCCESS;
+}
+
+/*************************************************************//**
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+void
+page_delete_rec_list_start(
+/*=======================*/
+ rec_t* rec, /*!< in: record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_cur_t cur1;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ mem_heap_t* heap = NULL;
+
+ rec_offs_init(offsets_);
+
+ ut_ad(page_align(rec) == block->page.frame);
+ ut_ad((ibool) !!page_rec_is_comp(rec)
+ == dict_table_is_comp(index->table));
+#ifdef UNIV_ZIP_DEBUG
+ {
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+ page_t* page = buf_block_get_frame(block);
+
+ /* page_zip_validate() would detect a min_rec_mark mismatch
+ in btr_page_split_and_insert()
+ between btr_attach_half_pages() and insert_page = ...
+ when btr_page_get_split_rec_to_left() holds
+ (direction == FSP_DOWN). */
+ ut_a(!page_zip
+ || page_zip_validate_low(page_zip, page, index, TRUE));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (page_rec_is_infimum(rec)) {
+ return;
+ }
+
+ if (page_rec_is_supremum(rec)) {
+ /* We are deleting all records. */
+ page_create_empty(block, index, mtr);
+ return;
+ }
+
+ cur1.index = index;
+ page_cur_set_before_first(block, &cur1);
+ if (UNIV_UNLIKELY(!page_cur_move_to_next(&cur1))) {
+ ut_ad("corrupted page" == 0);
+ return;
+ }
+
+ const ulint n_core = page_rec_is_leaf(rec)
+ ? index->n_core_fields : 0;
+
+ while (page_cur_get_rec(&cur1) != rec) {
+ offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
+ offsets, n_core,
+ ULINT_UNDEFINED, &heap);
+ page_cur_delete_rec(&cur1, offsets, mtr);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return nth record
+@retval nullptr on corrupted page */
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+ const page_t* page, /*!< in: page */
+ ulint nth) /*!< in: nth record */
+{
+ const page_dir_slot_t* slot;
+ ulint i;
+ ulint n_owned;
+ const rec_t* rec;
+
+ if (nth == 0) {
+ return(page_get_infimum_rec(page));
+ }
+
+ ut_ad(nth < srv_page_size / (REC_N_NEW_EXTRA_BYTES + 1));
+
+ for (i = 0;; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ n_owned = page_dir_slot_get_n_owned(slot);
+
+ if (n_owned > nth) {
+ break;
+ } else {
+ nth -= n_owned;
+ }
+ }
+
+ if (UNIV_UNLIKELY(!i)) {
+ return nullptr;
+ }
+ rec = page_dir_slot_get_rec(slot + 2);
+
+ if (page_is_comp(page)) {
+ do {
+ rec = page_rec_get_next_low(rec, TRUE);
+ } while (rec && nth--);
+ } else {
+ do {
+ rec = page_rec_get_next_low(rec, FALSE);
+ } while (rec && nth--);
+ }
+
+ return(rec);
+}
+
+
+/************************************************************//**
+Gets the pointer to the previous record.
+@return pointer to previous record
+@retval nullptr on error */
+const rec_t*
+page_rec_get_prev_const(
+/*====================*/
+ const rec_t* rec) /*!< in: pointer to record, must not be page
+ infimum */
+{
+ const rec_t* rec2;
+ const rec_t* prev_rec = NULL;
+
+ ut_ad(page_rec_check(rec));
+
+ const page_t* const page = page_align(rec);
+
+ ut_ad(!page_rec_is_infimum(rec));
+
+ ulint slot_no = page_dir_find_owner_slot(rec);
+
+ if (UNIV_UNLIKELY(!slot_no || slot_no == ULINT_UNDEFINED)) {
+ return nullptr;
+ }
+
+ const page_dir_slot_t* slot = page_dir_get_nth_slot(page, slot_no - 1);
+
+ if (UNIV_UNLIKELY(!(rec2 = page_dir_slot_get_rec_validate(slot)))) {
+ return nullptr;
+ }
+
+ if (page_is_comp(page)) {
+ while (rec2 && rec != rec2) {
+ prev_rec = rec2;
+ ulint offs = rec_get_next_offs(rec2, TRUE);
+ if (offs < PAGE_NEW_INFIMUM
+ || offs > page_header_get_field(page,
+ PAGE_HEAP_TOP)) {
+ return nullptr;
+ }
+ rec2 = page + offs;
+ }
+ switch (rec_get_status(prev_rec)) {
+ case REC_STATUS_INSTANT:
+ case REC_STATUS_ORDINARY:
+ if (!page_is_leaf(page)) {
+ return nullptr;
+ }
+ break;
+ case REC_STATUS_INFIMUM:
+ break;
+ case REC_STATUS_NODE_PTR:
+ if (!page_is_leaf(page)) {
+ break;
+ }
+ /* fall through */
+ default:
+ return nullptr;
+ }
+ } else {
+ while (rec2 && rec != rec2) {
+ prev_rec = rec2;
+ ulint offs = rec_get_next_offs(rec2, FALSE);
+ if (offs < PAGE_OLD_INFIMUM
+ || offs > page_header_get_field(page,
+ PAGE_HEAP_TOP)) {
+ return nullptr;
+ }
+ rec2 = page + offs;
+ }
+ }
+
+ return(prev_rec);
+}
+
+/** Return the number of preceding records in an index page.
+@param rec index record
+@return number of preceding records, including the infimum pseudo-record
+@retval ULINT_UNDEFINED on corrupted page */
+ulint page_rec_get_n_recs_before(const rec_t *rec)
+{
+ const page_t *const page= page_align(rec);
+ const page_dir_slot_t *slot = page_dir_get_nth_slot(page, 0);
+ const page_dir_slot_t *const end_slot= slot - 2 * page_dir_get_n_slots(page);
+
+ lint n= 0;
+
+ ut_ad(page_rec_check(rec));
+
+ if (page_is_comp(page))
+ {
+ for (; rec_get_n_owned_new(rec) == 0; n--)
+ if (UNIV_UNLIKELY(!(rec= page_rec_get_next_low(rec, true))))
+ return ULINT_UNDEFINED;
+
+ do
+ {
+ const rec_t *slot_rec= page_dir_slot_get_rec_validate(slot);
+ if (UNIV_UNLIKELY(!slot_rec))
+ break;
+ n+= lint(rec_get_n_owned_new(slot_rec));
+
+ if (rec == slot_rec)
+ goto found;
+ }
+ while ((slot-= 2) > end_slot);
+ }
+ else
+ {
+ for (; rec_get_n_owned_old(rec) == 0; n--)
+ if (UNIV_UNLIKELY(!(rec= page_rec_get_next_low(rec, false))))
+ return ULINT_UNDEFINED;
+
+ do
+ {
+ const rec_t *slot_rec= page_dir_slot_get_rec_validate(slot);
+ if (UNIV_UNLIKELY(!slot_rec))
+ break;
+ n+= lint(rec_get_n_owned_old(slot_rec));
+
+ if (rec == slot_rec)
+ goto found;
+ }
+ while ((slot-= 2) > end_slot);
+ }
+
+ return ULINT_UNDEFINED;
+found:
+ return --n < 0 ? ULINT_UNDEFINED : ulint(n);
+}
+
+/************************************************************//**
+Prints record contents including the data relevant only in
+the index page context. */
+void
+page_rec_print(
+/*===========*/
+ const rec_t* rec, /*!< in: physical record */
+ const rec_offs* offsets)/*!< in: record descriptor */
+{
+ ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+ rec_print_new(stderr, rec, offsets);
+ if (page_rec_is_comp(rec)) {
+ ib::info() << "n_owned: " << rec_get_n_owned_new(rec)
+ << "; heap_no: " << rec_get_heap_no_new(rec)
+ << "; next rec: " << rec_get_next_offs(rec, TRUE);
+ } else {
+ ib::info() << "n_owned: " << rec_get_n_owned_old(rec)
+ << "; heap_no: " << rec_get_heap_no_old(rec)
+ << "; next rec: " << rec_get_next_offs(rec, FALSE);
+ }
+
+ page_rec_check(rec);
+ rec_validate(rec, offsets);
+}
+
+#ifdef UNIV_BTR_PRINT
+/***************************************************************//**
+This is used to print the contents of the directory for
+debugging purposes. */
+void
+page_dir_print(
+/*===========*/
+ page_t* page, /*!< in: index page */
+ ulint pr_n) /*!< in: print n first and n last entries */
+{
+ ulint n;
+ ulint i;
+ page_dir_slot_t* slot;
+
+ n = page_dir_get_n_slots(page);
+
+ fprintf(stderr, "--------------------------------\n"
+ "PAGE DIRECTORY\n"
+ "Page address %p\n"
+ "Directory stack top at offs: %lu; number of slots: %lu\n",
+ page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
+ (ulong) n);
+ for (i = 0; i < n; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ if ((i == pr_n) && (i < n - pr_n)) {
+ fputs(" ... \n", stderr);
+ }
+ if ((i < pr_n) || (i >= n - pr_n)) {
+ fprintf(stderr,
+ "Contents of slot: %lu: n_owned: %lu,"
+ " rec offs: %lu\n",
+ (ulong) i,
+ (ulong) page_dir_slot_get_n_owned(slot),
+ (ulong)
+ page_offset(page_dir_slot_get_rec(slot)));
+ }
+ }
+ fprintf(stderr, "Total of %lu records\n"
+ "--------------------------------\n",
+ (ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
+}
+
+/***************************************************************//**
+This is used to print the contents of the page record list for
+debugging purposes. */
+void
+page_print_list(
+/*============*/
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint pr_n) /*!< in: print n first and n last entries */
+{
+ page_t* page = block->page.frame;
+ page_cur_t cur;
+ ulint count;
+ ulint n_recs;
+ mem_heap_t* heap = NULL;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+ fprint(stderr,
+ "--------------------------------\n"
+ "PAGE RECORD LIST\n"
+ "Page address %p\n", page);
+
+ n_recs = page_get_n_recs(page);
+
+ page_cur_set_before_first(block, &cur);
+ count = 0;
+ for (;;) {
+ offsets = rec_get_offsets(cur.rec, index, offsets,
+ page_rec_is_leaf(cur.rec),
+ ULINT_UNDEFINED, &heap);
+ page_rec_print(cur.rec, offsets);
+
+ if (count == pr_n) {
+ break;
+ }
+ if (page_cur_is_after_last(&cur)) {
+ break;
+ }
+ page_cur_move_to_next(&cur);
+ count++;
+ }
+
+ if (n_recs > 2 * pr_n) {
+ fputs(" ... \n", stderr);
+ }
+
+ while (!page_cur_is_after_last(&cur)) {
+ page_cur_move_to_next(&cur);
+
+ if (count + pr_n >= n_recs) {
+ offsets = rec_get_offsets(cur.rec, index, offsets,
+ page_rec_is_leaf(cur.rec),
+ ULINT_UNDEFINED, &heap);
+ page_rec_print(cur.rec, offsets);
+ }
+ count++;
+ }
+
+ fprintf(stderr,
+ "Total of %lu records \n"
+ "--------------------------------\n",
+ (ulong) (count + 1));
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+/***************************************************************//**
+Prints the info in a page header. */
+void
+page_header_print(
+/*==============*/
+ const page_t* page)
+{
+ fprintf(stderr,
+ "--------------------------------\n"
+ "PAGE HEADER INFO\n"
+ "Page address %p, n records %u (%s)\n"
+ "n dir slots %u, heap top %u\n"
+ "Page n heap %u, free %u, garbage %u\n"
+ "Page last insert %u, direction %u, n direction %u\n",
+ page, page_header_get_field(page, PAGE_N_RECS),
+ page_is_comp(page) ? "compact format" : "original format",
+ page_header_get_field(page, PAGE_N_DIR_SLOTS),
+ page_header_get_field(page, PAGE_HEAP_TOP),
+ page_dir_get_n_heap(page),
+ page_header_get_field(page, PAGE_FREE),
+ page_header_get_field(page, PAGE_GARBAGE),
+ page_header_get_field(page, PAGE_LAST_INSERT),
+ page_get_direction(page),
+ page_header_get_field(page, PAGE_N_DIRECTION));
+}
+
+/***************************************************************//**
+This is used to print the contents of the page for
+debugging purposes. */
+void
+page_print(
+/*=======*/
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint dn, /*!< in: print dn first and last entries
+ in directory */
+ ulint rn) /*!< in: print rn first and last records
+ in directory */
+{
+ page_t* page = block->page.frame;
+
+ page_header_print(page);
+ page_dir_print(page, dn);
+ page_print_list(block, index, rn);
+}
+#endif /* UNIV_BTR_PRINT */
+
+/***************************************************************//**
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field.
+@return TRUE if ok */
+ibool
+page_rec_validate(
+/*==============*/
+ const rec_t* rec, /*!< in: physical record */
+ const rec_offs* offsets)/*!< in: array returned by rec_get_offsets() */
+{
+ ulint n_owned;
+ ulint heap_no;
+ const page_t* page;
+
+ page = page_align(rec);
+ ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
+
+ page_rec_check(rec);
+ rec_validate(rec, offsets);
+
+ if (page_rec_is_comp(rec)) {
+ n_owned = rec_get_n_owned_new(rec);
+ heap_no = rec_get_heap_no_new(rec);
+ } else {
+ n_owned = rec_get_n_owned_old(rec);
+ heap_no = rec_get_heap_no_old(rec);
+ }
+
+ if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
+ ib::warn() << "Dir slot of rec " << page_offset(rec)
+ << ", n owned too big " << n_owned;
+ return(FALSE);
+ }
+
+ if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
+ ib::warn() << "Heap no of rec " << page_offset(rec)
+ << " too big " << heap_no << " "
+ << page_dir_get_n_heap(page);
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+#ifdef UNIV_DEBUG
+/***************************************************************//**
+Checks that the first directory slot points to the infimum record and
+the last to the supremum. This function is intended to track if the
+bug fixed in 4.0.14 has caused corruption to users' databases. */
+void
+page_check_dir(
+/*===========*/
+ const page_t* page) /*!< in: index page */
+{
+ ulint n_slots;
+ ulint infimum_offs;
+ ulint supremum_offs;
+
+ n_slots = page_dir_get_n_slots(page);
+ infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
+ supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
+ n_slots - 1));
+
+ if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
+
+ ib::fatal() << "Page directory corruption: infimum not"
+ " pointed to";
+ }
+
+ if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
+
+ ib::fatal() << "Page directory corruption: supremum not"
+ " pointed to";
+ }
+}
+#endif /* UNIV_DEBUG */
+
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return TRUE if ok */
+ibool
+page_simple_validate_old(
+/*=====================*/
+ const page_t* page) /*!< in: index page in ROW_FORMAT=REDUNDANT */
+{
+ const page_dir_slot_t* slot;
+ ulint slot_no;
+ ulint n_slots;
+ const rec_t* rec;
+ const byte* rec_heap_top;
+ ulint count;
+ ulint own_count;
+ ibool ret = FALSE;
+
+ ut_a(!page_is_comp(page));
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+
+ if (UNIV_UNLIKELY(n_slots < 2 || n_slots > srv_page_size / 4)) {
+ ib::error() << "Nonsensical number of page dir slots: "
+ << n_slots;
+ goto func_exit;
+ }
+
+ rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+ if (UNIV_UNLIKELY(rec_heap_top
+ > page_dir_get_nth_slot(page, n_slots - 1))) {
+ ib::error()
+ << "Record heap and dir overlap on a page, heap top "
+ << page_header_get_field(page, PAGE_HEAP_TOP)
+ << ", dir "
+ << page_offset(page_dir_get_nth_slot(page,
+ n_slots - 1));
+
+ goto func_exit;
+ }
+
+ /* Validate the record list in a loop checking also that it is
+ consistent with the page record directory. */
+
+ count = 0;
+ own_count = 1;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ rec = page_get_infimum_rec(page);
+
+ for (;;) {
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ ib::error() << "Record " << (rec - page)
+ << " is above rec heap top "
+ << (rec_heap_top - page);
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) != 0)) {
+ /* This is a record pointed to by a dir slot */
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
+ != own_count)) {
+
+ ib::error() << "Wrong owned count "
+ << rec_get_n_owned_old(rec)
+ << ", " << own_count << ", rec "
+ << (rec - page);
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY
+ (page_dir_slot_get_rec(slot) != rec)) {
+ ib::error() << "Dir slot does not point"
+ " to right rec " << (rec - page);
+
+ goto func_exit;
+ }
+
+ own_count = 0;
+
+ if (!page_rec_is_supremum(rec)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ break;
+ }
+
+ if (UNIV_UNLIKELY
+ (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
+ || rec_get_next_offs(rec, FALSE) >= srv_page_size)) {
+
+ ib::error() << "Next record offset nonsensical "
+ << rec_get_next_offs(rec, FALSE) << " for rec "
+ << (rec - page);
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > srv_page_size)) {
+ ib::error() << "Page record list appears"
+ " to be circular " << count;
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_const(rec);
+ own_count++;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
+ ib::error() << "n owned is zero in a supremum rec";
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+ ib::error() << "n slots wrong "
+ << slot_no << ", " << (n_slots - 1);
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS))
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
+ ib::error() << "n recs wrong "
+ << page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
+
+ goto func_exit;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+ || rec >= page + srv_page_size)) {
+ ib::error() << "Free list record has"
+ " a nonsensical offset " << (rec - page);
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ ib::error() << "Free list record " << (rec - page)
+ << " is above rec heap top "
+ << (rec_heap_top - page);
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > srv_page_size)) {
+ ib::error() << "Page free list appears"
+ " to be circular " << count;
+ goto func_exit;
+ }
+
+ ulint offs = rec_get_next_offs(rec, FALSE);
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted " << count;
+ goto func_exit;
+ }
+
+ rec = page + offs;
+ }
+
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+
+ ib::error() << "N heap is wrong "
+ << page_dir_get_n_heap(page) << ", " << (count + 1);
+
+ goto func_exit;
+ }
+
+ ret = TRUE;
+
+func_exit:
+ return(ret);
+}
+
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return TRUE if ok */
+ibool
+page_simple_validate_new(
+/*=====================*/
+ const page_t* page) /*!< in: index page in ROW_FORMAT!=REDUNDANT */
+{
+ const page_dir_slot_t* slot;
+ ulint slot_no;
+ ulint n_slots;
+ const rec_t* rec;
+ const byte* rec_heap_top;
+ ulint count;
+ ulint own_count;
+ ibool ret = FALSE;
+
+ ut_a(page_is_comp(page));
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+
+ if (UNIV_UNLIKELY(n_slots < 2 || n_slots > srv_page_size / 4)) {
+ ib::error() << "Nonsensical number of page dir slots: "
+ << n_slots;
+ goto func_exit;
+ }
+
+ rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+ if (UNIV_UNLIKELY(rec_heap_top
+ > page_dir_get_nth_slot(page, n_slots - 1))) {
+
+ ib::error() << "Record heap and dir overlap on a page,"
+ " heap top "
+ << page_header_get_field(page, PAGE_HEAP_TOP)
+ << ", dir " << page_offset(
+ page_dir_get_nth_slot(page, n_slots - 1));
+
+ goto func_exit;
+ }
+
+ /* Validate the record list in a loop checking also that it is
+ consistent with the page record directory. */
+
+ count = 0;
+ own_count = 1;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ rec = page + PAGE_NEW_INFIMUM;
+
+ for (;;) {
+ if (UNIV_UNLIKELY(rec < page + PAGE_NEW_INFIMUM
+ || rec > rec_heap_top)) {
+ ib::error() << "Record " << page_offset(rec)
+ << " is out of bounds: "
+ << page_offset(rec_heap_top);
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != 0)) {
+ /* This is a record pointed to by a dir slot */
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
+ != own_count)) {
+
+ ib::error() << "Wrong owned count "
+ << rec_get_n_owned_new(rec) << ", "
+ << own_count << ", rec "
+ << page_offset(rec);
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY
+ (page_dir_slot_get_rec(slot) != rec)) {
+ ib::error() << "Dir slot does not point"
+ " to right rec " << page_offset(rec);
+
+ goto func_exit;
+ }
+
+ own_count = 0;
+
+ if (!page_rec_is_supremum(rec)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ break;
+ }
+
+ if (UNIV_UNLIKELY
+ (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
+ || rec_get_next_offs(rec, TRUE) >= srv_page_size)) {
+
+ ib::error() << "Next record offset nonsensical "
+ << rec_get_next_offs(rec, TRUE)
+ << " for rec " << page_offset(rec);
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > srv_page_size)) {
+ ib::error() << "Page record list appears to be"
+ " circular " << count;
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_const(rec);
+ own_count++;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+ ib::error() << "n owned is zero in a supremum rec";
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+ ib::error() << "n slots wrong " << slot_no << ", "
+ << (n_slots - 1);
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS))
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
+ ib::error() << "n recs wrong "
+ << page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
+
+ goto func_exit;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+ || rec >= page + srv_page_size)) {
+
+ ib::error() << "Free list record has"
+ " a nonsensical offset " << page_offset(rec);
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ ib::error() << "Free list record " << page_offset(rec)
+ << " is above rec heap top "
+ << page_offset(rec_heap_top);
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > srv_page_size)) {
+ ib::error() << "Page free list appears to be"
+ " circular " << count;
+ goto func_exit;
+ }
+
+ const ulint offs = rec_get_next_offs(rec, TRUE);
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted " << count;
+ goto func_exit;
+ }
+
+ rec = page + offs;
+ }
+
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+
+ ib::error() << "N heap is wrong "
+ << page_dir_get_n_heap(page) << ", " << (count + 1);
+
+ goto func_exit;
+ }
+
+ ret = TRUE;
+
+func_exit:
+ return(ret);
+}
+
+/** Check the consistency of an index page.
+@param[in] page index page
+@param[in] index B-tree or R-tree index
+@return whether the page is valid */
+bool page_validate(const page_t* page, const dict_index_t* index)
+{
+ const page_dir_slot_t* slot;
+ const rec_t* rec;
+ const rec_t* old_rec = NULL;
+ const rec_t* first_rec = NULL;
+ ulint offs = 0;
+ ulint n_slots;
+ ibool ret = TRUE;
+ ulint i;
+ rec_offs offsets_1[REC_OFFS_NORMAL_SIZE];
+ rec_offs offsets_2[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_1;
+ rec_offs* old_offsets = offsets_2;
+
+ rec_offs_init(offsets_1);
+ rec_offs_init(offsets_2);
+
+#ifdef UNIV_GIS_DEBUG
+ if (dict_index_is_spatial(index)) {
+ fprintf(stderr, "Page no: %lu\n", page_get_page_no(page));
+ }
+#endif /* UNIV_DEBUG */
+
+ if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
+ != dict_table_is_comp(index->table))) {
+ ib::error() << "'compact format' flag mismatch";
+func_exit2:
+ ib::error() << "Apparent corruption in space "
+ << page_get_space_id(page) << " page "
+ << page_get_page_no(page)
+ << " of index " << index->name
+ << " of table " << index->table->name;
+ return FALSE;
+ }
+
+ if (page_is_comp(page)) {
+ if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
+ goto func_exit2;
+ }
+ } else {
+ if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
+ goto func_exit2;
+ }
+ }
+
+ /* Multiple transactions cannot simultaneously operate on the
+ same temp-table in parallel.
+ max_trx_id is ignored for temp tables because it not required
+ for MVCC. */
+ if (!page_is_leaf(page) || page_is_empty(page)
+ || !dict_index_is_sec_or_ibuf(index)
+ || index->table->is_temporary()) {
+ } else if (trx_id_t sys_max_trx_id = trx_sys.get_max_trx_id()) {
+ trx_id_t max_trx_id = page_get_max_trx_id(page);
+
+ if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
+ ib::error() << "PAGE_MAX_TRX_ID out of bounds: "
+ << max_trx_id << ", " << sys_max_trx_id;
+ ret = FALSE;
+ }
+ } else {
+ ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
+ }
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+
+ if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
+ <= page_dir_get_nth_slot(page, n_slots - 1)))) {
+
+ ib::warn() << "Record heap and directory overlap";
+ goto func_exit2;
+ }
+
+ switch (uint16_t type = fil_page_get_type(page)) {
+ case FIL_PAGE_RTREE:
+ if (!index->is_spatial()) {
+wrong_page_type:
+ ib::warn() << "Wrong page type " << type;
+ ret = FALSE;
+ }
+ break;
+ case FIL_PAGE_TYPE_INSTANT:
+ if (index->is_instant()
+ && page_get_page_no(page) == index->page) {
+ break;
+ }
+ goto wrong_page_type;
+ case FIL_PAGE_INDEX:
+ if (index->is_spatial()) {
+ goto wrong_page_type;
+ }
+ if (index->is_instant()
+ && page_get_page_no(page) == index->page) {
+ goto wrong_page_type;
+ }
+ break;
+ default:
+ goto wrong_page_type;
+ }
+
+ /* The following buffer is used to check that the
+ records in the page record heap do not overlap */
+ mem_heap_t* heap = mem_heap_create(srv_page_size + 200);;
+ byte* buf = static_cast<byte*>(mem_heap_zalloc(heap, srv_page_size));
+
+ /* Validate the record list in a loop checking also that
+ it is consistent with the directory. */
+ ulint count = 0, data_size = 0, own_count = 1, slot_no = 0;
+ ulint info_bits;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ rec = page_get_infimum_rec(page);
+
+ const ulint n_core = page_is_leaf(page) ? index->n_core_fields : 0;
+
+ for (;;) {
+ offsets = rec_get_offsets(rec, index, offsets, n_core,
+ ULINT_UNDEFINED, &heap);
+
+ if (page_is_comp(page) && page_rec_is_user_rec(rec)
+ && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
+ == page_is_leaf(page))) {
+ ib::error() << "'node_ptr' flag mismatch";
+ ret = FALSE;
+ goto next_rec;
+ }
+
+ if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+ ret = FALSE;
+ goto next_rec;
+ }
+
+ info_bits = rec_get_info_bits(rec, page_is_comp(page));
+ if (info_bits
+ & ~(REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG)) {
+ ib::error() << "info_bits has an incorrect value "
+ << info_bits;
+ ret = false;
+ }
+
+ if (rec == first_rec) {
+ if (info_bits & REC_INFO_MIN_REC_FLAG) {
+ if (page_has_prev(page)) {
+ ib::error() << "REC_INFO_MIN_REC_FLAG "
+ "is set on non-left page";
+ ret = false;
+ } else if (!page_is_leaf(page)) {
+ /* leftmost node pointer page */
+ } else if (!index->is_instant()) {
+ ib::error() << "REC_INFO_MIN_REC_FLAG "
+ "is set in a leaf-page record";
+ ret = false;
+ } else if (!(info_bits & REC_INFO_DELETED_FLAG)
+ != !index->table->instant) {
+ ib::error() << (index->table->instant
+ ? "Metadata record "
+ "is not delete-marked"
+ : "Metadata record "
+ "is delete-marked");
+ ret = false;
+ }
+ } else if (!page_has_prev(page)
+ && index->is_instant()) {
+ ib::error() << "Metadata record is missing";
+ ret = false;
+ }
+ } else if (info_bits & REC_INFO_MIN_REC_FLAG) {
+ ib::error() << "REC_INFO_MIN_REC_FLAG record is not "
+ "first in page";
+ ret = false;
+ }
+
+ if (page_is_comp(page)) {
+ const rec_comp_status_t status = rec_get_status(rec);
+ if (status != REC_STATUS_ORDINARY
+ && status != REC_STATUS_NODE_PTR
+ && status != REC_STATUS_INFIMUM
+ && status != REC_STATUS_SUPREMUM
+ && status != REC_STATUS_INSTANT) {
+ ib::error() << "impossible record status "
+ << status;
+ ret = false;
+ } else if (page_rec_is_infimum(rec)) {
+ if (status != REC_STATUS_INFIMUM) {
+ ib::error()
+ << "infimum record has status "
+ << status;
+ ret = false;
+ }
+ } else if (page_rec_is_supremum(rec)) {
+ if (status != REC_STATUS_SUPREMUM) {
+ ib::error() << "supremum record has "
+ "status "
+ << status;
+ ret = false;
+ }
+ } else if (!page_is_leaf(page)) {
+ if (status != REC_STATUS_NODE_PTR) {
+ ib::error() << "node ptr record has "
+ "status "
+ << status;
+ ret = false;
+ }
+ } else if (!index->is_instant()
+ && status == REC_STATUS_INSTANT) {
+ ib::error() << "instantly added record in a "
+ "non-instant index";
+ ret = false;
+ }
+ }
+
+ /* Check that the records are in the ascending order */
+ if (count >= PAGE_HEAP_NO_USER_LOW
+ && !page_rec_is_supremum(rec)) {
+
+ int ret = cmp_rec_rec(
+ rec, old_rec, offsets, old_offsets, index);
+
+ /* For spatial index, on nonleaf leavel, we
+ allow recs to be equal. */
+ if (ret <= 0 && !(ret == 0 && index->is_spatial()
+ && !page_is_leaf(page))) {
+
+ ib::error() << "Records in wrong order";
+
+ fputs("\nInnoDB: previous record ", stderr);
+ /* For spatial index, print the mbr info.*/
+ if (index->type & DICT_SPATIAL) {
+ putc('\n', stderr);
+ rec_print_mbr_rec(stderr,
+ old_rec, old_offsets);
+ fputs("\nInnoDB: record ", stderr);
+ putc('\n', stderr);
+ rec_print_mbr_rec(stderr, rec, offsets);
+ putc('\n', stderr);
+ putc('\n', stderr);
+
+ } else {
+ rec_print_new(stderr, old_rec, old_offsets);
+ fputs("\nInnoDB: record ", stderr);
+ rec_print_new(stderr, rec, offsets);
+ putc('\n', stderr);
+ }
+
+ ret = FALSE;
+ }
+ }
+
+ if (page_rec_is_user_rec(rec)) {
+
+ data_size += rec_offs_size(offsets);
+
+#if defined(UNIV_GIS_DEBUG)
+ /* For spatial index, print the mbr info.*/
+ if (index->type & DICT_SPATIAL) {
+ rec_print_mbr_rec(stderr, rec, offsets);
+ putc('\n', stderr);
+ }
+#endif /* UNIV_GIS_DEBUG */
+ }
+
+ offs = page_offset(rec_get_start(rec, offsets));
+ i = rec_offs_size(offsets);
+ if (UNIV_UNLIKELY(offs + i >= srv_page_size)) {
+ ib::error() << "Record offset out of bounds: "
+ << offs << '+' << i;
+ ret = FALSE;
+ goto next_rec;
+ }
+ while (i--) {
+ if (UNIV_UNLIKELY(buf[offs + i])) {
+ ib::error() << "Record overlaps another: "
+ << offs << '+' << i;
+ ret = FALSE;
+ break;
+ }
+ buf[offs + i] = 1;
+ }
+
+ if (ulint rec_own_count = page_is_comp(page)
+ ? rec_get_n_owned_new(rec)
+ : rec_get_n_owned_old(rec)) {
+ /* This is a record pointed to by a dir slot */
+ if (UNIV_UNLIKELY(rec_own_count != own_count)) {
+ ib::error() << "Wrong owned count at " << offs
+ << ": " << rec_own_count
+ << ", " << own_count;
+ ret = FALSE;
+ }
+
+ if (page_dir_slot_get_rec(slot) != rec) {
+ ib::error() << "Dir slot does not"
+ " point to right rec at " << offs;
+ ret = FALSE;
+ }
+
+ if (ret) {
+ page_dir_slot_check(slot);
+ }
+
+ own_count = 0;
+ if (!page_rec_is_supremum(rec)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+next_rec:
+ old_rec = rec;
+ rec = page_rec_get_next_const(rec);
+
+ if (UNIV_UNLIKELY(!rec != page_rec_is_supremum(old_rec))) {
+ ib::error() << "supremum is not last record: " << offs;
+ ret = FALSE;
+ }
+
+ if (!rec) {
+ rec = old_rec; /* supremum */
+ break;
+ }
+
+ count++;
+ own_count++;
+
+ if (page_rec_is_infimum(old_rec)
+ && page_rec_is_user_rec(rec)) {
+ first_rec = rec;
+ }
+
+ /* set old_offsets to offsets; recycle offsets */
+ std::swap(old_offsets, offsets);
+ }
+
+ if (page_is_comp(page)) {
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+
+ goto n_owned_zero;
+ }
+ } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
+n_owned_zero:
+ ib::error() << "n owned is zero at " << offs;
+ ret = FALSE;
+ }
+
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+ ib::error() << "n slots wrong " << slot_no << " "
+ << (n_slots - 1);
+ ret = FALSE;
+ }
+
+ if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS))
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
+ ib::error() << "n recs wrong "
+ << page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
+ ret = FALSE;
+ }
+
+ if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
+ ib::error() << "Summed data size " << data_size
+ << ", returned by func " << page_get_data_size(page);
+ ret = FALSE;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ offsets = rec_get_offsets(rec, index, offsets, n_core,
+ ULINT_UNDEFINED, &heap);
+ if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+ ret = FALSE;
+next_free:
+ const ulint offs = rec_get_next_offs(
+ rec, page_is_comp(page));
+ if (!offs) {
+ break;
+ }
+ if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
+ || offs >= srv_page_size)) {
+ ib::error() << "Page free list is corrupted";
+ ret = FALSE;
+ break;
+ }
+
+ rec = page + offs;
+ continue;
+ }
+
+ count++;
+ offs = page_offset(rec_get_start(rec, offsets));
+ i = rec_offs_size(offsets);
+ if (UNIV_UNLIKELY(offs + i >= srv_page_size)) {
+ ib::error() << "Free record offset out of bounds: "
+ << offs << '+' << i;
+ ret = FALSE;
+ goto next_free;
+ }
+ while (i--) {
+ if (UNIV_UNLIKELY(buf[offs + i])) {
+ ib::error() << "Free record overlaps another: "
+ << offs << '+' << i;
+ ret = FALSE;
+ break;
+ }
+ buf[offs + i] = 1;
+ }
+
+ goto next_free;
+ }
+
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+ ib::error() << "N heap is wrong "
+ << page_dir_get_n_heap(page) << " " << count + 1;
+ ret = FALSE;
+ }
+
+ mem_heap_free(heap);
+
+ if (UNIV_UNLIKELY(!ret)) {
+ goto func_exit2;
+ }
+
+ return(ret);
+}
+
+/***************************************************************//**
+Looks in the page record list for a record with the given heap number.
+@return record, NULL if not found */
+const rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+ const page_t* page, /*!< in: index page */
+ ulint heap_no)/*!< in: heap number */
+{
+ const rec_t* rec;
+
+ if (page_is_comp(page)) {
+ rec = page + PAGE_NEW_INFIMUM;
+
+ for (;;) {
+ ulint rec_heap_no = rec_get_heap_no_new(rec);
+
+ if (rec_heap_no == heap_no) {
+
+ return(rec);
+ } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+
+ return(NULL);
+ }
+
+ rec = page + rec_get_next_offs(rec, TRUE);
+ }
+ } else {
+ rec = page + PAGE_OLD_INFIMUM;
+
+ for (;;) {
+ ulint rec_heap_no = rec_get_heap_no_old(rec);
+
+ if (rec_heap_no == heap_no) {
+
+ return(rec);
+ } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+
+ return(NULL);
+ }
+
+ rec = page + rec_get_next_offs(rec, FALSE);
+ }
+ }
+}
+
+/** Get the last non-delete-marked record on a page.
+@param[in] page index tree leaf page
+@return the last record, not delete-marked
+@retval infimum record if all records are delete-marked */
+const rec_t *page_find_rec_last_not_deleted(const page_t *page)
+{
+ ut_ad(page_is_leaf(page));
+
+ if (page_is_comp(page))
+ {
+ const rec_t *rec= page + PAGE_NEW_INFIMUM;
+ const rec_t *prev_rec= rec;
+ do
+ {
+ if (!(rec[-REC_NEW_INFO_BITS] &
+ (REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG)))
+ prev_rec= rec;
+ if (!(rec= page_rec_get_next_low(rec, true)))
+ return page + PAGE_NEW_INFIMUM;
+ } while (rec != page + PAGE_NEW_SUPREMUM);
+ return prev_rec;
+ }
+ else
+ {
+ const rec_t *rec= page + PAGE_OLD_INFIMUM;
+ const rec_t *prev_rec= rec;
+ do
+ {
+ if (!(rec[-REC_OLD_INFO_BITS] &
+ (REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG)))
+ prev_rec= rec;
+ if (!(rec= page_rec_get_next_low(rec, false)))
+ return page + PAGE_OLD_INFIMUM;
+ } while (rec != page + PAGE_OLD_SUPREMUM);
+ return prev_rec;
+ }
+}