summaryrefslogtreecommitdiffstats
path: root/storage/innobase/include/btr0btr.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
commit06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/include/btr0btr.h
parentInitial commit. (diff)
downloadmariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz
mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/include/btr0btr.h')
-rw-r--r--storage/innobase/include/btr0btr.h543
1 files changed, 543 insertions, 0 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
new file mode 100644
index 00000000..5a0401fa
--- /dev/null
+++ b/storage/innobase/include/btr0btr.h
@@ -0,0 +1,543 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2014, 2023, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0btr.h
+The B-tree
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#pragma once
+
+#include "dict0dict.h"
+#include "data0data.h"
+#include "rem0types.h"
+#include "page0cur.h"
+#include "btr0types.h"
+#include "gis0type.h"
+
+#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
+ (not really a hard limit).
+ Used in debug assertions
+ in btr_page_set_level and
+ btr_page_get_level */
+
+/** Maximum record size which can be stored on a page, without using the
+special big record storage structure */
+#define BTR_PAGE_MAX_REC_SIZE (srv_page_size / 2 - 200)
+
+/** @brief Maximum depth of a B-tree in InnoDB.
+
+Note that this isn't a maximum as such; none of the tree operations
+avoid producing trees bigger than this. It is instead a "max depth
+that other code must work with", useful for e.g. fixed-size arrays
+that must store some information about each level in a tree. In other
+words: if a B-tree with bigger depth than this is encountered, it is
+not acceptable for it to lead to mysterious memory corruption, but it
+is acceptable for the program to die with a clear assert failure. */
+#define BTR_MAX_LEVELS 100
+
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
+ btr_latch_mode((latch_mode) & ~(BTR_INSERT \
+ | BTR_DELETE_MARK \
+ | BTR_RTREE_UNDO_INS \
+ | BTR_RTREE_DELETE_MARK \
+ | BTR_DELETE \
+ | BTR_IGNORE_SEC_UNIQUE \
+ | BTR_ALREADY_S_LATCHED \
+ | BTR_LATCH_FOR_INSERT \
+ | BTR_LATCH_FOR_DELETE))
+
+#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode) \
+ btr_latch_mode((latch_mode) \
+ & ~(BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE))
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+ const dict_index_t* index) /*!< in: index tree */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Report a decryption failure. */
+ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index);
+
+/** Get an index page and declare its latching order level.
+@param[in] index index tree
+@param[in] page page number
+@param[in] mode latch mode
+@param[in] merge whether change buffer merge should be attempted
+@param[in,out] mtr mini-transaction
+@param[out] err error code
+@return block */
+buf_block_t *btr_block_get(const dict_index_t &index,
+ uint32_t page, rw_lock_type_t mode, bool merge,
+ mtr_t *mtr, dberr_t *err= nullptr);
+
+/**************************************************************//**
+Gets the index id field of a page.
+@return index id */
+UNIV_INLINE
+index_id_t
+btr_page_get_index_id(
+/*==================*/
+ const page_t* page) /*!< in: index page */
+ MY_ATTRIBUTE((warn_unused_result));
+/** Read the B-tree or R-tree PAGE_LEVEL.
+@param page B-tree or R-tree page
+@return number of child page links to reach the leaf level
+@retval 0 for leaf pages */
+inline uint16_t btr_page_get_level(const page_t *page)
+{
+ uint16_t level= mach_read_from_2(my_assume_aligned<2>
+ (PAGE_HEADER + PAGE_LEVEL + page));
+ ut_ad(level <= BTR_MAX_NODE_LEVEL);
+ return level;
+} MY_ATTRIBUTE((warn_unused_result))
+
+/** Read FIL_PAGE_NEXT.
+@param page buffer pool page
+@return previous page number */
+inline uint32_t btr_page_get_next(const page_t* page)
+{
+ return mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_NEXT));
+}
+
+/** Read FIL_PAGE_PREV.
+@param page buffer pool page
+@return previous page number */
+inline uint32_t btr_page_get_prev(const page_t* page)
+{
+ return mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_PREV));
+}
+
+/**************************************************************//**
+Gets the child node file address in a node pointer.
+NOTE: the offsets array must contain all offsets for the record since
+we read the last field according to offsets and assume that it contains
+the child page number. In other words offsets must have been retrieved
+with rec_get_offsets(n_fields=ULINT_UNDEFINED).
+@return child node address */
+UNIV_INLINE
+uint32_t
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+ const rec_t* rec, /*!< in: node pointer record */
+ const rec_offs* offsets)/*!< in: array returned by rec_get_offsets() */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Create the root node for a new index tree.
+@param[in] type type of the index
+@param[in,out] space tablespace where created
+@param[in] index_id index id
+@param[in] index index, or NULL to create a system table
+@param[in,out] mtr mini-transaction
+@param[out] err error code
+@return page number of the created root
+@retval FIL_NULL if did not succeed */
+uint32_t
+btr_create(
+ ulint type,
+ fil_space_t* space,
+ index_id_t index_id,
+ dict_index_t* index,
+ mtr_t* mtr,
+ dberr_t* err)
+ MY_ATTRIBUTE((nonnull(2,5,6), warn_unused_result));
+
+/** Free a persistent index tree if it exists.
+@param[in,out] space tablespce
+@param[in] page root page number
+@param[in] index_id PAGE_INDEX_ID contents
+@param[in,out] mtr mini-transaction */
+void btr_free_if_exists(fil_space_t *space, uint32_t page,
+ index_id_t index_id, mtr_t *mtr);
+
+/** Drop a temporary table
+@param table temporary table */
+void btr_drop_temporary_table(const dict_table_t &table);
+
+/** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC.
+@param[in,out] index clustered index
+@return the last used AUTO_INCREMENT value
+@retval 0 on error or if no AUTO_INCREMENT value was used yet */
+ib_uint64_t
+btr_read_autoinc(dict_index_t* index)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC,
+or fall back to MAX(auto_increment_column).
+@param[in] table table containing an AUTO_INCREMENT column
+@param[in] col_no index of the AUTO_INCREMENT column
+@return the AUTO_INCREMENT value
+@retval 0 on error or if no AUTO_INCREMENT value was used yet */
+ib_uint64_t
+btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Write the next available AUTO_INCREMENT value to PAGE_ROOT_AUTO_INC.
+@param[in,out] index clustered index
+@param[in] autoinc the AUTO_INCREMENT value
+@param[in] reset whether to reset the AUTO_INCREMENT
+ to a possibly smaller value than currently
+ exists in the page */
+void
+btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false)
+ MY_ATTRIBUTE((nonnull));
+
+/** Write instant ALTER TABLE metadata to a root page.
+@param[in,out] root clustered index root page
+@param[in] index clustered index with instant ALTER TABLE
+@param[in,out] mtr mini-transaction */
+void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr);
+
+ATTRIBUTE_COLD __attribute__((nonnull))
+/** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE.
+@param[in] index clustered index with instant ALTER TABLE
+@param[in] all whether to reset FIL_PAGE_TYPE as well
+@param[in,out] mtr mini-transaction */
+void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr);
+
+/*************************************************************//**
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called.
+@return inserted record */
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
+ on the root page; when the function returns,
+ the cursor is positioned on the predecessor
+ of the inserted record */
+ rec_offs** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr, /*!< in: mtr */
+ dberr_t* err) /*!< out: error code */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+/*************************************************************//**
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@param cursor page cursor
+@param mtr mini-transaction
+@return error code
+@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */
+dberr_t btr_page_reorganize(page_cur_t *cursor, mtr_t *mtr)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Decide if the page should be split at the convergence point of inserts
+converging to the left.
+@param[in] cursor insert position
+@return the first record to be moved to the right half page
+@retval NULL if no split is recommended */
+rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor);
+/** Decide if the page should be split at the convergence point of inserts
+converging to the right.
+@param[in] cursor insert position
+@param[out] split_rec if split recommended, the first record
+ on the right half page, or
+ NULL if the to-be-inserted record
+ should be first
+@return whether split is recommended */
+bool
+btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec);
+
+/*************************************************************//**
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+
+@return inserted record */
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
+ function returns, the cursor is positioned
+ on the predecessor of the inserted record */
+ rec_offs** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr, /*!< in: mtr */
+ dberr_t* err) /*!< out: error code */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+/*******************************************************//**
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+dberr_t
+btr_insert_on_non_leaf_level(
+ ulint flags, /*!< in: undo logging and locking flags */
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level, must be > 0 */
+ dtuple_t* tuple, /*!< in: the record to be inserted */
+ mtr_t* mtr) /*!< in: mtr */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Set a child page pointer record as the predefined minimum record.
+@tparam has_prev whether the page is supposed to have a left sibling
+@param[in,out] rec leftmost record on a leftmost non-leaf page
+@param[in,out] block buffer pool block
+@param[in,out] mtr mini-transaction */
+template<bool has_prev= false>
+inline void btr_set_min_rec_mark(rec_t *rec, const buf_block_t &block,
+ mtr_t *mtr)
+{
+ ut_ad(block.page.frame == page_align(rec));
+ ut_ad(!page_is_leaf(block.page.frame));
+ ut_ad(has_prev == page_has_prev(block.page.frame));
+
+ rec-= page_rec_is_comp(rec) ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS;
+
+ if (block.page.zip.data)
+ /* This flag is computed from other contents on a ROW_FORMAT=COMPRESSED
+ page. We are not modifying the compressed page frame at all. */
+ *rec|= REC_INFO_MIN_REC_FLAG;
+ else
+ mtr->write<1>(block, rec, *rec | REC_INFO_MIN_REC_FLAG);
+}
+
+/** Seek to the parent page of a B-tree page.
+@param[in,out] mtr mini-transaction
+@param[in,out] cursor cursor pointing to the x-latched parent page
+@return whether the cursor was successfully positioned */
+bool btr_page_get_father(mtr_t* mtr, btr_cur_t* cursor)
+ MY_ATTRIBUTE((nonnull,warn_unused_result));
+#ifdef UNIV_DEBUG
+/************************************************************//**
+Checks that the node pointer to a page is appropriate.
+@return TRUE */
+ibool
+btr_check_node_ptr(
+/*===============*/
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: index page */
+ mtr_t* mtr) /*!< in: mtr */
+ MY_ATTRIBUTE((warn_unused_result));
+#endif /* UNIV_DEBUG */
+/*************************************************************//**
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the
+brother reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to
+the brothers, if they exist.
+@return error code
+@retval DB_FAIL if the tree could not be merged */
+dberr_t
+btr_compress(
+/*=========*/
+ btr_cur_t* cursor, /*!< in/out: cursor on the page to merge
+ or lift; the page must not be empty:
+ when deleting records, use btr_discard_page()
+ if the page would become empty */
+ bool adjust, /*!< in: whether the cursor position should be
+ adjusted even when compression occurs */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+/*************************************************************//**
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+dberr_t
+btr_discard_page(
+/*=============*/
+ btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
+ the root page */
+ mtr_t* mtr); /*!< in: mtr */
+
+/**************************************************************//**
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents!
+@retval NULL if no page could be allocated */
+buf_block_t*
+btr_page_alloc(
+/*===========*/
+ dict_index_t* index, /*!< in: index tree */
+ uint32_t hint_page_no, /*!< in: hint of a good page */
+ byte file_direction, /*!< in: direction where a possible
+ page split is made */
+ ulint level, /*!< in: level where the page is placed
+ in the tree */
+ mtr_t* mtr, /*!< in/out: mini-transaction
+ for the allocation */
+ mtr_t* init_mtr, /*!< in/out: mini-transaction
+ for x-latching and initializing
+ the page */
+ dberr_t* err) /*!< out: error code */
+ MY_ATTRIBUTE((warn_unused_result));
+/** Empty an index page (possibly the root page). @see btr_page_create().
+@param[in,out] block page to be emptied
+@param[in,out] page_zip compressed page frame, or NULL
+@param[in] index index of the page
+@param[in] level B-tree level of the page (0=leaf)
+@param[in,out] mtr mini-transaction */
+void
+btr_page_empty(
+ buf_block_t* block,
+ page_zip_des_t* page_zip,
+ dict_index_t* index,
+ ulint level,
+ mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull(1, 3, 5)));
+/**************************************************************//**
+Creates a new index page (not the root, and also not
+used in page reorganization). @see btr_page_empty(). */
+void
+btr_page_create(
+/*============*/
+ buf_block_t* block, /*!< in/out: page to be created */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr); /*!< in: mtr */
+
+/** Free an index page.
+@param[in,out] index index tree
+@param[in,out] block block to be freed
+@param[in,out] mtr mini-transaction
+@param[in] blob whether this is freeing a BLOB page
+@param[in] latched whether index->table->space->x_lock() was called */
+MY_ATTRIBUTE((nonnull))
+dberr_t btr_page_free(dict_index_t *index, buf_block_t *block, mtr_t *mtr,
+ bool blob= false, bool space_latched= false);
+
+/**************************************************************//**
+Gets the root node of a tree and x- or s-latches it.
+@return root page, x- or s-latched */
+buf_block_t*
+btr_root_block_get(
+/*===============*/
+ dict_index_t* index, /*!< in: index tree */
+ rw_lock_type_t mode, /*!< in: either RW_S_LATCH
+ or RW_X_LATCH */
+ mtr_t* mtr, /*!< in: mtr */
+ dberr_t* err); /*!< out: error code */
+/*************************************************************//**
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@return error code
+@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */
+dberr_t btr_page_reorganize_block(
+ ulint z_level,/*!< in: compression level to be used
+ if dealing with compressed page */
+ buf_block_t* block, /*!< in/out: B-tree page */
+ dict_index_t* index, /*!< in: the index tree of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
+
+#ifdef UNIV_BTR_PRINT
+/*************************************************************//**
+Prints size info of a B-tree. */
+void
+btr_print_size(
+/*===========*/
+ dict_index_t* index) /*!< in: index tree */
+ MY_ATTRIBUTE((nonnull));
+/**************************************************************//**
+Prints directories and other info of all nodes in the index. */
+void
+btr_print_index(
+/*============*/
+ dict_index_t* index, /*!< in: index */
+ ulint width) /*!< in: print this many entries from start
+ and end */
+ MY_ATTRIBUTE((nonnull));
+#endif /* UNIV_BTR_PRINT */
+/************************************************************//**
+Checks the size and number of fields in a record based on the definition of
+the index.
+@return TRUE if ok */
+ibool
+btr_index_rec_validate(
+/*===================*/
+ const rec_t* rec, /*!< in: index record */
+ const dict_index_t* index, /*!< in: index */
+ ibool dump_on_error) /*!< in: TRUE if the function
+ should print hex dump of record
+ and page on error */
+ MY_ATTRIBUTE((warn_unused_result));
+/**************************************************************//**
+Checks the consistency of an index tree.
+@return DB_SUCCESS if ok, error code if not */
+dberr_t
+btr_validate_index(
+/*===============*/
+ dict_index_t* index, /*!< in: index */
+ const trx_t* trx) /*!< in: transaction or 0 */
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Remove a page from the level list of pages.
+@param[in] block page to remove
+@param[in] index index tree
+@param[in,out] mtr mini-transaction */
+dberr_t btr_level_list_remove(const buf_block_t& block,
+ const dict_index_t& index, mtr_t* mtr)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/*************************************************************//**
+If page is the only on its level, this function moves its records to the
+father page, thus reducing the tree height.
+@return father block */
+buf_block_t*
+btr_lift_page_up(
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: page which is the only on its level;
+ must not be empty: use
+ btr_discard_only_page_on_level if the last
+ record from the page should be removed */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ dberr_t* err) /*!< out: error code */
+ __attribute__((nonnull));
+
+#define BTR_N_LEAF_PAGES 1
+#define BTR_TOTAL_SIZE 2
+
+#include "btr0btr.inl"
+
+/****************************************************************
+Global variable controlling if scrubbing should be performed */
+extern my_bool srv_immediate_scrub_data_uncompressed;